From 8621caa0d45e731f2e9f5889ff5bb384fcd6e059 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Dec 2016 14:28:58 +0000 Subject: [PATCH 001/142] crypto: arm64/chacha20 - implement NEON version based on SSE3 code This is a straight port to arm64/NEON of the x86 SSE3 implementation of the ChaCha20 stream cipher. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 6 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/chacha20-neon-core.S | 480 +++++++++++++++++++++++++ arch/arm64/crypto/chacha20-neon-glue.c | 131 +++++++ 4 files changed, 620 insertions(+) create mode 100644 arch/arm64/crypto/chacha20-neon-core.S create mode 100644 arch/arm64/crypto/chacha20-neon-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 450a85df041a..0bf0f531f539 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -72,4 +72,10 @@ config CRYPTO_CRC32_ARM64 depends on ARM64 select CRYPTO_HASH +config CRYPTO_CHACHA20_NEON + tristate "NEON accelerated ChaCha20 symmetric cipher" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_CHACHA20 + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index aa8888d7b744..9d2826c5fccf 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -41,6 +41,9 @@ sha256-arm64-y := sha256-glue.o sha256-core.o obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o sha512-arm64-y := sha512-glue.o sha512-core.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o +chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o + AFLAGS_aes-ce.o := -DINTERLEAVE=4 AFLAGS_aes-neon.o := -DINTERLEAVE=4 diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S new file mode 100644 index 000000000000..e2cd65580807 --- /dev/null +++ b/arch/arm64/crypto/chacha20-neon-core.S @@ -0,0 +1,480 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + + .text + .align 6 + +ENTRY(chacha20_block_xor_neon) + // x0: Input state matrix, s + // x1: 1 data block output, o + // x2: 1 data block input, i + + // + // This function encrypts one ChaCha20 block by loading the state matrix + // in four NEON registers. It performs matrix operation on four words in + // parallel, but requires shuffling to rearrange the words after each + // round. + // + + // x0..3 = s0..3 + ld1 {v0.4s-v3.4s}, [x0] + ld1 {v8.4s-v11.4s}, [x0] + + mov x3, #10 + +.Ldoubleround: + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + rev32 v3.8h, v3.8h + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #12 + sri v1.4s, v4.4s, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + add v0.4s, v0.4s, v1.4s + eor v4.16b, v3.16b, v0.16b + shl v3.4s, v4.4s, #8 + sri v3.4s, v4.4s, #24 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #7 + sri v1.4s, v4.4s, #25 + + // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + ext v1.16b, v1.16b, v1.16b, #4 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + ext v2.16b, v2.16b, v2.16b, #8 + // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + ext v3.16b, v3.16b, v3.16b, #12 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + rev32 v3.8h, v3.8h + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #12 + sri v1.4s, v4.4s, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + add v0.4s, v0.4s, v1.4s + eor v4.16b, v3.16b, v0.16b + shl v3.4s, v4.4s, #8 + sri v3.4s, v4.4s, #24 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #7 + sri v1.4s, v4.4s, #25 + + // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + ext v1.16b, v1.16b, v1.16b, #12 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + ext v2.16b, v2.16b, v2.16b, #8 + // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + ext v3.16b, v3.16b, v3.16b, #4 + + subs x3, x3, #1 + b.ne .Ldoubleround + + ld1 {v4.16b-v7.16b}, [x2] + + // o0 = i0 ^ (x0 + s0) + add v0.4s, v0.4s, v8.4s + eor v0.16b, v0.16b, v4.16b + + // o1 = i1 ^ (x1 + s1) + add v1.4s, v1.4s, v9.4s + eor v1.16b, v1.16b, v5.16b + + // o2 = i2 ^ (x2 + s2) + add v2.4s, v2.4s, v10.4s + eor v2.16b, v2.16b, v6.16b + + // o3 = i3 ^ (x3 + s3) + add v3.4s, v3.4s, v11.4s + eor v3.16b, v3.16b, v7.16b + + st1 {v0.16b-v3.16b}, [x1] + + ret +ENDPROC(chacha20_block_xor_neon) + + .align 6 +ENTRY(chacha20_4block_xor_neon) + // x0: Input state matrix, s + // x1: 4 data blocks output, o + // x2: 4 data blocks input, i + + // + // This function encrypts four consecutive ChaCha20 blocks by loading + // the state matrix in NEON registers four times. The algorithm performs + // each operation on the corresponding word of each state matrix, hence + // requires no word shuffling. For final XORing step we transpose the + // matrix by interleaving 32- and then 64-bit words, which allows us to + // do XOR in NEON registers. + // + adr x3, CTRINC + ld1 {v16.4s}, [x3] + + // x0..15[0-3] = s0..3[0..3] + mov x4, x0 + ld4r { v0.4s- v3.4s}, [x4], #16 + ld4r { v4.4s- v7.4s}, [x4], #16 + ld4r { v8.4s-v11.4s}, [x4], #16 + ld4r {v12.4s-v15.4s}, [x4] + + // x12 += counter values 0-3 + add v12.4s, v12.4s, v16.4s + + mov x3, #10 + +.Ldoubleround4: + // x0 += x4, x12 = rotl32(x12 ^ x0, 16) + // x1 += x5, x13 = rotl32(x13 ^ x1, 16) + // x2 += x6, x14 = rotl32(x14 ^ x2, 16) + // x3 += x7, x15 = rotl32(x15 ^ x3, 16) + add v0.4s, v0.4s, v4.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + + eor v12.16b, v12.16b, v0.16b + eor v13.16b, v13.16b, v1.16b + eor v14.16b, v14.16b, v2.16b + eor v15.16b, v15.16b, v3.16b + + rev32 v12.8h, v12.8h + rev32 v13.8h, v13.8h + rev32 v14.8h, v14.8h + rev32 v15.8h, v15.8h + + // x8 += x12, x4 = rotl32(x4 ^ x8, 12) + // x9 += x13, x5 = rotl32(x5 ^ x9, 12) + // x10 += x14, x6 = rotl32(x6 ^ x10, 12) + // x11 += x15, x7 = rotl32(x7 ^ x11, 12) + add v8.4s, v8.4s, v12.4s + add v9.4s, v9.4s, v13.4s + add v10.4s, v10.4s, v14.4s + add v11.4s, v11.4s, v15.4s + + eor v17.16b, v4.16b, v8.16b + eor v18.16b, v5.16b, v9.16b + eor v19.16b, v6.16b, v10.16b + eor v20.16b, v7.16b, v11.16b + + shl v4.4s, v17.4s, #12 + shl v5.4s, v18.4s, #12 + shl v6.4s, v19.4s, #12 + shl v7.4s, v20.4s, #12 + + sri v4.4s, v17.4s, #20 + sri v5.4s, v18.4s, #20 + sri v6.4s, v19.4s, #20 + sri v7.4s, v20.4s, #20 + + // x0 += x4, x12 = rotl32(x12 ^ x0, 8) + // x1 += x5, x13 = rotl32(x13 ^ x1, 8) + // x2 += x6, x14 = rotl32(x14 ^ x2, 8) + // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + add v0.4s, v0.4s, v4.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + + eor v17.16b, v12.16b, v0.16b + eor v18.16b, v13.16b, v1.16b + eor v19.16b, v14.16b, v2.16b + eor v20.16b, v15.16b, v3.16b + + shl v12.4s, v17.4s, #8 + shl v13.4s, v18.4s, #8 + shl v14.4s, v19.4s, #8 + shl v15.4s, v20.4s, #8 + + sri v12.4s, v17.4s, #24 + sri v13.4s, v18.4s, #24 + sri v14.4s, v19.4s, #24 + sri v15.4s, v20.4s, #24 + + // x8 += x12, x4 = rotl32(x4 ^ x8, 7) + // x9 += x13, x5 = rotl32(x5 ^ x9, 7) + // x10 += x14, x6 = rotl32(x6 ^ x10, 7) + // x11 += x15, x7 = rotl32(x7 ^ x11, 7) + add v8.4s, v8.4s, v12.4s + add v9.4s, v9.4s, v13.4s + add v10.4s, v10.4s, v14.4s + add v11.4s, v11.4s, v15.4s + + eor v17.16b, v4.16b, v8.16b + eor v18.16b, v5.16b, v9.16b + eor v19.16b, v6.16b, v10.16b + eor v20.16b, v7.16b, v11.16b + + shl v4.4s, v17.4s, #7 + shl v5.4s, v18.4s, #7 + shl v6.4s, v19.4s, #7 + shl v7.4s, v20.4s, #7 + + sri v4.4s, v17.4s, #25 + sri v5.4s, v18.4s, #25 + sri v6.4s, v19.4s, #25 + sri v7.4s, v20.4s, #25 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 16) + // x1 += x6, x12 = rotl32(x12 ^ x1, 16) + // x2 += x7, x13 = rotl32(x13 ^ x2, 16) + // x3 += x4, x14 = rotl32(x14 ^ x3, 16) + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v4.4s + + eor v15.16b, v15.16b, v0.16b + eor v12.16b, v12.16b, v1.16b + eor v13.16b, v13.16b, v2.16b + eor v14.16b, v14.16b, v3.16b + + rev32 v15.8h, v15.8h + rev32 v12.8h, v12.8h + rev32 v13.8h, v13.8h + rev32 v14.8h, v14.8h + + // x10 += x15, x5 = rotl32(x5 ^ x10, 12) + // x11 += x12, x6 = rotl32(x6 ^ x11, 12) + // x8 += x13, x7 = rotl32(x7 ^ x8, 12) + // x9 += x14, x4 = rotl32(x4 ^ x9, 12) + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v12.4s + add v8.4s, v8.4s, v13.4s + add v9.4s, v9.4s, v14.4s + + eor v17.16b, v5.16b, v10.16b + eor v18.16b, v6.16b, v11.16b + eor v19.16b, v7.16b, v8.16b + eor v20.16b, v4.16b, v9.16b + + shl v5.4s, v17.4s, #12 + shl v6.4s, v18.4s, #12 + shl v7.4s, v19.4s, #12 + shl v4.4s, v20.4s, #12 + + sri v5.4s, v17.4s, #20 + sri v6.4s, v18.4s, #20 + sri v7.4s, v19.4s, #20 + sri v4.4s, v20.4s, #20 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 8) + // x1 += x6, x12 = rotl32(x12 ^ x1, 8) + // x2 += x7, x13 = rotl32(x13 ^ x2, 8) + // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v4.4s + + eor v17.16b, v15.16b, v0.16b + eor v18.16b, v12.16b, v1.16b + eor v19.16b, v13.16b, v2.16b + eor v20.16b, v14.16b, v3.16b + + shl v15.4s, v17.4s, #8 + shl v12.4s, v18.4s, #8 + shl v13.4s, v19.4s, #8 + shl v14.4s, v20.4s, #8 + + sri v15.4s, v17.4s, #24 + sri v12.4s, v18.4s, #24 + sri v13.4s, v19.4s, #24 + sri v14.4s, v20.4s, #24 + + // x10 += x15, x5 = rotl32(x5 ^ x10, 7) + // x11 += x12, x6 = rotl32(x6 ^ x11, 7) + // x8 += x13, x7 = rotl32(x7 ^ x8, 7) + // x9 += x14, x4 = rotl32(x4 ^ x9, 7) + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v12.4s + add v8.4s, v8.4s, v13.4s + add v9.4s, v9.4s, v14.4s + + eor v17.16b, v5.16b, v10.16b + eor v18.16b, v6.16b, v11.16b + eor v19.16b, v7.16b, v8.16b + eor v20.16b, v4.16b, v9.16b + + shl v5.4s, v17.4s, #7 + shl v6.4s, v18.4s, #7 + shl v7.4s, v19.4s, #7 + shl v4.4s, v20.4s, #7 + + sri v5.4s, v17.4s, #25 + sri v6.4s, v18.4s, #25 + sri v7.4s, v19.4s, #25 + sri v4.4s, v20.4s, #25 + + subs x3, x3, #1 + b.ne .Ldoubleround4 + + // x0[0-3] += s0[0] + // x1[0-3] += s0[1] + // x2[0-3] += s0[2] + // x3[0-3] += s0[3] + ld4r {v17.4s-v20.4s}, [x0], #16 + add v0.4s, v0.4s, v17.4s + add v1.4s, v1.4s, v18.4s + add v2.4s, v2.4s, v19.4s + add v3.4s, v3.4s, v20.4s + + // x4[0-3] += s1[0] + // x5[0-3] += s1[1] + // x6[0-3] += s1[2] + // x7[0-3] += s1[3] + ld4r {v21.4s-v24.4s}, [x0], #16 + add v4.4s, v4.4s, v21.4s + add v5.4s, v5.4s, v22.4s + add v6.4s, v6.4s, v23.4s + add v7.4s, v7.4s, v24.4s + + // x8[0-3] += s2[0] + // x9[0-3] += s2[1] + // x10[0-3] += s2[2] + // x11[0-3] += s2[3] + ld4r {v17.4s-v20.4s}, [x0], #16 + add v8.4s, v8.4s, v17.4s + add v9.4s, v9.4s, v18.4s + add v10.4s, v10.4s, v19.4s + add v11.4s, v11.4s, v20.4s + + // x12[0-3] += s3[0] + // x13[0-3] += s3[1] + // x14[0-3] += s3[2] + // x15[0-3] += s3[3] + ld4r {v21.4s-v24.4s}, [x0] + add v12.4s, v12.4s, v21.4s + add v13.4s, v13.4s, v22.4s + add v14.4s, v14.4s, v23.4s + add v15.4s, v15.4s, v24.4s + + // x12 += counter values 0-3 + add v12.4s, v12.4s, v16.4s + + ld1 {v16.16b-v19.16b}, [x2], #64 + ld1 {v20.16b-v23.16b}, [x2], #64 + + // interleave 32-bit words in state n, n+1 + zip1 v24.4s, v0.4s, v1.4s + zip1 v25.4s, v2.4s, v3.4s + zip1 v26.4s, v4.4s, v5.4s + zip1 v27.4s, v6.4s, v7.4s + zip1 v28.4s, v8.4s, v9.4s + zip1 v29.4s, v10.4s, v11.4s + zip1 v30.4s, v12.4s, v13.4s + zip1 v31.4s, v14.4s, v15.4s + + zip2 v1.4s, v0.4s, v1.4s + zip2 v3.4s, v2.4s, v3.4s + zip2 v5.4s, v4.4s, v5.4s + zip2 v7.4s, v6.4s, v7.4s + zip2 v9.4s, v8.4s, v9.4s + zip2 v11.4s, v10.4s, v11.4s + zip2 v13.4s, v12.4s, v13.4s + zip2 v15.4s, v14.4s, v15.4s + + mov v0.16b, v24.16b + mov v2.16b, v25.16b + mov v4.16b, v26.16b + mov v6.16b, v27.16b + mov v8.16b, v28.16b + mov v10.16b, v29.16b + mov v12.16b, v30.16b + mov v14.16b, v31.16b + + // interleave 64-bit words in state n, n+2 + zip1 v24.2d, v0.2d, v2.2d + zip1 v25.2d, v1.2d, v3.2d + zip1 v26.2d, v4.2d, v6.2d + zip1 v27.2d, v5.2d, v7.2d + zip1 v28.2d, v8.2d, v10.2d + zip1 v29.2d, v9.2d, v11.2d + zip1 v30.2d, v12.2d, v14.2d + zip1 v31.2d, v13.2d, v15.2d + + zip2 v2.2d, v0.2d, v2.2d + zip2 v3.2d, v1.2d, v3.2d + zip2 v6.2d, v4.2d, v6.2d + zip2 v7.2d, v5.2d, v7.2d + zip2 v10.2d, v8.2d, v10.2d + zip2 v11.2d, v9.2d, v11.2d + zip2 v14.2d, v12.2d, v14.2d + zip2 v15.2d, v13.2d, v15.2d + + mov v0.16b, v24.16b + mov v1.16b, v25.16b + mov v4.16b, v26.16b + mov v5.16b, v27.16b + + mov v8.16b, v28.16b + mov v9.16b, v29.16b + mov v12.16b, v30.16b + mov v13.16b, v31.16b + + ld1 {v24.16b-v27.16b}, [x2], #64 + ld1 {v28.16b-v31.16b}, [x2] + + // xor with corresponding input, write to output + eor v16.16b, v16.16b, v0.16b + eor v17.16b, v17.16b, v4.16b + eor v18.16b, v18.16b, v8.16b + eor v19.16b, v19.16b, v12.16b + st1 {v16.16b-v19.16b}, [x1], #64 + + eor v20.16b, v20.16b, v2.16b + eor v21.16b, v21.16b, v6.16b + eor v22.16b, v22.16b, v10.16b + eor v23.16b, v23.16b, v14.16b + st1 {v20.16b-v23.16b}, [x1], #64 + + eor v24.16b, v24.16b, v1.16b + eor v25.16b, v25.16b, v5.16b + eor v26.16b, v26.16b, v9.16b + eor v27.16b, v27.16b, v13.16b + st1 {v24.16b-v27.16b}, [x1], #64 + + eor v28.16b, v28.16b, v3.16b + eor v29.16b, v29.16b, v7.16b + eor v30.16b, v30.16b, v11.16b + eor v31.16b, v31.16b, v15.16b + st1 {v28.16b-v31.16b}, [x1] + + ret +ENDPROC(chacha20_4block_xor_neon) + +CTRINC: .word 0, 1, 2, 3 diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c new file mode 100644 index 000000000000..705b42b06d00 --- /dev/null +++ b/arch/arm64/crypto/chacha20-neon-glue.c @@ -0,0 +1,131 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include + +asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); +asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); + +static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes) +{ + u8 buf[CHACHA20_BLOCK_SIZE]; + + while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + chacha20_4block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE * 4; + src += CHACHA20_BLOCK_SIZE * 4; + dst += CHACHA20_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA20_BLOCK_SIZE) { + chacha20_block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE; + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha20_block_xor_neon(state, buf, buf); + memcpy(dst, buf, bytes); + } +} + +static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + u32 state[16]; + int err; + + if (nbytes <= CHACHA20_BLOCK_SIZE) + return crypto_chacha20_crypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); + + crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); + + kernel_neon_begin(); + + while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { + chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, + rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % CHACHA20_BLOCK_SIZE); + } + + if (walk.nbytes) { + chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + + kernel_neon_end(); + + return err; +} + +static struct crypto_alg alg = { + .cra_name = "chacha20", + .cra_driver_name = "chacha20-neon", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_type = &crypto_blkcipher_type, + .cra_ctxsize = sizeof(struct chacha20_ctx), + .cra_alignmask = sizeof(u32) - 1, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .geniv = "seqiv", + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_simd, + .decrypt = chacha20_simd, + }, + }, +}; + +static int __init chacha20_simd_mod_init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit chacha20_simd_mod_fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(chacha20_simd_mod_init); +module_exit(chacha20_simd_mod_fini); + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); From 8096667273477e735b0072b11a6d617ccee45e5f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Dec 2016 14:28:59 +0000 Subject: [PATCH 002/142] crypto: arm/chacha20 - implement NEON version based on SSE3 code This is a straight port to ARM/NEON of the x86 SSE3 implementation of the ChaCha20 stream cipher. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 6 + arch/arm/crypto/Makefile | 2 + arch/arm/crypto/chacha20-neon-core.S | 523 +++++++++++++++++++++++++++ arch/arm/crypto/chacha20-neon-glue.c | 136 +++++++ 4 files changed, 667 insertions(+) create mode 100644 arch/arm/crypto/chacha20-neon-core.S create mode 100644 arch/arm/crypto/chacha20-neon-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 13f1b4c289d4..2f3339f015d3 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -130,4 +130,10 @@ config CRYPTO_CRC32_ARM_CE depends on KERNEL_MODE_NEON && CRC32 select CRYPTO_HASH +config CRYPTO_CHACHA20_NEON + tristate "NEON accelerated ChaCha20 symmetric cipher" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_CHACHA20 + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b578a1820ab1..8d74e55eacd4 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -40,6 +41,7 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o +chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S new file mode 100644 index 000000000000..b0a35935be7e --- /dev/null +++ b/arch/arm/crypto/chacha20-neon-core.S @@ -0,0 +1,523 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SNEON3 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + + .text + .fpu neon + .align 5 + +ENTRY(chacha20_block_xor_neon) + // r0: Input state matrix, s + // r1: 1 data block output, o + // r2: 1 data block input, i + + // + // This function encrypts one ChaCha20 block by loading the state matrix + // in four NEON registers. It performs matrix operation on four words in + // parallel, but requireds shuffling to rearrange the words after each + // round. + // + + // x0..3 = s0..3 + add ip, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [ip] + + vmov q8, q0 + vmov q9, q1 + vmov q10, q2 + vmov q11, q3 + + mov r3, #10 + +.Ldoubleround: + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #16 + vsri.u32 q3, q4, #16 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #12 + vsri.u32 q1, q4, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #8 + vsri.u32 q3, q4, #24 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #7 + vsri.u32 q1, q4, #25 + + // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vext.8 q1, q1, q1, #4 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vext.8 q2, q2, q2, #8 + // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vext.8 q3, q3, q3, #12 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #16 + vsri.u32 q3, q4, #16 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #12 + vsri.u32 q1, q4, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #8 + vsri.u32 q3, q4, #24 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #7 + vsri.u32 q1, q4, #25 + + // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vext.8 q1, q1, q1, #12 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vext.8 q2, q2, q2, #8 + // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vext.8 q3, q3, q3, #4 + + subs r3, r3, #1 + bne .Ldoubleround + + add ip, r2, #0x20 + vld1.8 {q4-q5}, [r2] + vld1.8 {q6-q7}, [ip] + + // o0 = i0 ^ (x0 + s0) + vadd.i32 q0, q0, q8 + veor q0, q0, q4 + + // o1 = i1 ^ (x1 + s1) + vadd.i32 q1, q1, q9 + veor q1, q1, q5 + + // o2 = i2 ^ (x2 + s2) + vadd.i32 q2, q2, q10 + veor q2, q2, q6 + + // o3 = i3 ^ (x3 + s3) + vadd.i32 q3, q3, q11 + veor q3, q3, q7 + + add ip, r1, #0x20 + vst1.8 {q0-q1}, [r1] + vst1.8 {q2-q3}, [ip] + + bx lr +ENDPROC(chacha20_block_xor_neon) + + .align 5 +ENTRY(chacha20_4block_xor_neon) + push {r4-r6, lr} + mov ip, sp // preserve the stack pointer + sub r3, sp, #0x20 // allocate a 32 byte buffer + bic r3, r3, #0x1f // aligned to 32 bytes + mov sp, r3 + + // r0: Input state matrix, s + // r1: 4 data blocks output, o + // r2: 4 data blocks input, i + + // + // This function encrypts four consecutive ChaCha20 blocks by loading + // the state matrix in NEON registers four times. The algorithm performs + // each operation on the corresponding word of each state matrix, hence + // requires no word shuffling. For final XORing step we transpose the + // matrix by interleaving 32- and then 64-bit words, which allows us to + // do XOR in NEON registers. + // + + // x0..15[0-3] = s0..3[0..3] + add r3, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [r3] + + adr r3, CTRINC + vdup.32 q15, d7[1] + vdup.32 q14, d7[0] + vld1.32 {q11}, [r3, :128] + vdup.32 q13, d6[1] + vdup.32 q12, d6[0] + vadd.i32 q12, q12, q11 // x12 += counter values 0-3 + vdup.32 q11, d5[1] + vdup.32 q10, d5[0] + vdup.32 q9, d4[1] + vdup.32 q8, d4[0] + vdup.32 q7, d3[1] + vdup.32 q6, d3[0] + vdup.32 q5, d2[1] + vdup.32 q4, d2[0] + vdup.32 q3, d1[1] + vdup.32 q2, d1[0] + vdup.32 q1, d0[1] + vdup.32 q0, d0[0] + + mov r3, #10 + +.Ldoubleround4: + // x0 += x4, x12 = rotl32(x12 ^ x0, 16) + // x1 += x5, x13 = rotl32(x13 ^ x1, 16) + // x2 += x6, x14 = rotl32(x14 ^ x2, 16) + // x3 += x7, x15 = rotl32(x15 ^ x3, 16) + vadd.i32 q0, q0, q4 + vadd.i32 q1, q1, q5 + vadd.i32 q2, q2, q6 + vadd.i32 q3, q3, q7 + + veor q12, q12, q0 + veor q13, q13, q1 + veor q14, q14, q2 + veor q15, q15, q3 + + vrev32.16 q12, q12 + vrev32.16 q13, q13 + vrev32.16 q14, q14 + vrev32.16 q15, q15 + + // x8 += x12, x4 = rotl32(x4 ^ x8, 12) + // x9 += x13, x5 = rotl32(x5 ^ x9, 12) + // x10 += x14, x6 = rotl32(x6 ^ x10, 12) + // x11 += x15, x7 = rotl32(x7 ^ x11, 12) + vadd.i32 q8, q8, q12 + vadd.i32 q9, q9, q13 + vadd.i32 q10, q10, q14 + vadd.i32 q11, q11, q15 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q4, q8 + veor q9, q5, q9 + vshl.u32 q4, q8, #12 + vshl.u32 q5, q9, #12 + vsri.u32 q4, q8, #20 + vsri.u32 q5, q9, #20 + + veor q8, q6, q10 + veor q9, q7, q11 + vshl.u32 q6, q8, #12 + vshl.u32 q7, q9, #12 + vsri.u32 q6, q8, #20 + vsri.u32 q7, q9, #20 + + // x0 += x4, x12 = rotl32(x12 ^ x0, 8) + // x1 += x5, x13 = rotl32(x13 ^ x1, 8) + // x2 += x6, x14 = rotl32(x14 ^ x2, 8) + // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vadd.i32 q0, q0, q4 + vadd.i32 q1, q1, q5 + vadd.i32 q2, q2, q6 + vadd.i32 q3, q3, q7 + + veor q8, q12, q0 + veor q9, q13, q1 + vshl.u32 q12, q8, #8 + vshl.u32 q13, q9, #8 + vsri.u32 q12, q8, #24 + vsri.u32 q13, q9, #24 + + veor q8, q14, q2 + veor q9, q15, q3 + vshl.u32 q14, q8, #8 + vshl.u32 q15, q9, #8 + vsri.u32 q14, q8, #24 + vsri.u32 q15, q9, #24 + + vld1.32 {q8-q9}, [sp, :256] + + // x8 += x12, x4 = rotl32(x4 ^ x8, 7) + // x9 += x13, x5 = rotl32(x5 ^ x9, 7) + // x10 += x14, x6 = rotl32(x6 ^ x10, 7) + // x11 += x15, x7 = rotl32(x7 ^ x11, 7) + vadd.i32 q8, q8, q12 + vadd.i32 q9, q9, q13 + vadd.i32 q10, q10, q14 + vadd.i32 q11, q11, q15 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q4, q8 + veor q9, q5, q9 + vshl.u32 q4, q8, #7 + vshl.u32 q5, q9, #7 + vsri.u32 q4, q8, #25 + vsri.u32 q5, q9, #25 + + veor q8, q6, q10 + veor q9, q7, q11 + vshl.u32 q6, q8, #7 + vshl.u32 q7, q9, #7 + vsri.u32 q6, q8, #25 + vsri.u32 q7, q9, #25 + + vld1.32 {q8-q9}, [sp, :256] + + // x0 += x5, x15 = rotl32(x15 ^ x0, 16) + // x1 += x6, x12 = rotl32(x12 ^ x1, 16) + // x2 += x7, x13 = rotl32(x13 ^ x2, 16) + // x3 += x4, x14 = rotl32(x14 ^ x3, 16) + vadd.i32 q0, q0, q5 + vadd.i32 q1, q1, q6 + vadd.i32 q2, q2, q7 + vadd.i32 q3, q3, q4 + + veor q15, q15, q0 + veor q12, q12, q1 + veor q13, q13, q2 + veor q14, q14, q3 + + vrev32.16 q15, q15 + vrev32.16 q12, q12 + vrev32.16 q13, q13 + vrev32.16 q14, q14 + + // x10 += x15, x5 = rotl32(x5 ^ x10, 12) + // x11 += x12, x6 = rotl32(x6 ^ x11, 12) + // x8 += x13, x7 = rotl32(x7 ^ x8, 12) + // x9 += x14, x4 = rotl32(x4 ^ x9, 12) + vadd.i32 q10, q10, q15 + vadd.i32 q11, q11, q12 + vadd.i32 q8, q8, q13 + vadd.i32 q9, q9, q14 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q7, q8 + veor q9, q4, q9 + vshl.u32 q7, q8, #12 + vshl.u32 q4, q9, #12 + vsri.u32 q7, q8, #20 + vsri.u32 q4, q9, #20 + + veor q8, q5, q10 + veor q9, q6, q11 + vshl.u32 q5, q8, #12 + vshl.u32 q6, q9, #12 + vsri.u32 q5, q8, #20 + vsri.u32 q6, q9, #20 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 8) + // x1 += x6, x12 = rotl32(x12 ^ x1, 8) + // x2 += x7, x13 = rotl32(x13 ^ x2, 8) + // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vadd.i32 q0, q0, q5 + vadd.i32 q1, q1, q6 + vadd.i32 q2, q2, q7 + vadd.i32 q3, q3, q4 + + veor q8, q15, q0 + veor q9, q12, q1 + vshl.u32 q15, q8, #8 + vshl.u32 q12, q9, #8 + vsri.u32 q15, q8, #24 + vsri.u32 q12, q9, #24 + + veor q8, q13, q2 + veor q9, q14, q3 + vshl.u32 q13, q8, #8 + vshl.u32 q14, q9, #8 + vsri.u32 q13, q8, #24 + vsri.u32 q14, q9, #24 + + vld1.32 {q8-q9}, [sp, :256] + + // x10 += x15, x5 = rotl32(x5 ^ x10, 7) + // x11 += x12, x6 = rotl32(x6 ^ x11, 7) + // x8 += x13, x7 = rotl32(x7 ^ x8, 7) + // x9 += x14, x4 = rotl32(x4 ^ x9, 7) + vadd.i32 q10, q10, q15 + vadd.i32 q11, q11, q12 + vadd.i32 q8, q8, q13 + vadd.i32 q9, q9, q14 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q7, q8 + veor q9, q4, q9 + vshl.u32 q7, q8, #7 + vshl.u32 q4, q9, #7 + vsri.u32 q7, q8, #25 + vsri.u32 q4, q9, #25 + + veor q8, q5, q10 + veor q9, q6, q11 + vshl.u32 q5, q8, #7 + vshl.u32 q6, q9, #7 + vsri.u32 q5, q8, #25 + vsri.u32 q6, q9, #25 + + subs r3, r3, #1 + beq 0f + + vld1.32 {q8-q9}, [sp, :256] + b .Ldoubleround4 + + // x0[0-3] += s0[0] + // x1[0-3] += s0[1] + // x2[0-3] += s0[2] + // x3[0-3] += s0[3] +0: ldmia r0!, {r3-r6} + vdup.32 q8, r3 + vdup.32 q9, r4 + vadd.i32 q0, q0, q8 + vadd.i32 q1, q1, q9 + vdup.32 q8, r5 + vdup.32 q9, r6 + vadd.i32 q2, q2, q8 + vadd.i32 q3, q3, q9 + + // x4[0-3] += s1[0] + // x5[0-3] += s1[1] + // x6[0-3] += s1[2] + // x7[0-3] += s1[3] + ldmia r0!, {r3-r6} + vdup.32 q8, r3 + vdup.32 q9, r4 + vadd.i32 q4, q4, q8 + vadd.i32 q5, q5, q9 + vdup.32 q8, r5 + vdup.32 q9, r6 + vadd.i32 q6, q6, q8 + vadd.i32 q7, q7, q9 + + // interleave 32-bit words in state n, n+1 + vzip.32 q0, q1 + vzip.32 q2, q3 + vzip.32 q4, q5 + vzip.32 q6, q7 + + // interleave 64-bit words in state n, n+2 + vswp d1, d4 + vswp d3, d6 + vswp d9, d12 + vswp d11, d14 + + // xor with corresponding input, write to output + vld1.8 {q8-q9}, [r2]! + veor q8, q8, q0 + veor q9, q9, q4 + vst1.8 {q8-q9}, [r1]! + + vld1.32 {q8-q9}, [sp, :256] + + // x8[0-3] += s2[0] + // x9[0-3] += s2[1] + // x10[0-3] += s2[2] + // x11[0-3] += s2[3] + ldmia r0!, {r3-r6} + vdup.32 q0, r3 + vdup.32 q4, r4 + vadd.i32 q8, q8, q0 + vadd.i32 q9, q9, q4 + vdup.32 q0, r5 + vdup.32 q4, r6 + vadd.i32 q10, q10, q0 + vadd.i32 q11, q11, q4 + + // x12[0-3] += s3[0] + // x13[0-3] += s3[1] + // x14[0-3] += s3[2] + // x15[0-3] += s3[3] + ldmia r0!, {r3-r6} + vdup.32 q0, r3 + vdup.32 q4, r4 + adr r3, CTRINC + vadd.i32 q12, q12, q0 + vld1.32 {q0}, [r3, :128] + vadd.i32 q13, q13, q4 + vadd.i32 q12, q12, q0 // x12 += counter values 0-3 + + vdup.32 q0, r5 + vdup.32 q4, r6 + vadd.i32 q14, q14, q0 + vadd.i32 q15, q15, q4 + + // interleave 32-bit words in state n, n+1 + vzip.32 q8, q9 + vzip.32 q10, q11 + vzip.32 q12, q13 + vzip.32 q14, q15 + + // interleave 64-bit words in state n, n+2 + vswp d17, d20 + vswp d19, d22 + vswp d25, d28 + vswp d27, d30 + + vmov q4, q1 + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q8 + veor q1, q1, q12 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q2 + veor q1, q1, q6 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q10 + veor q1, q1, q14 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q4 + veor q1, q1, q5 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q9 + veor q1, q1, q13 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q3 + veor q1, q1, q7 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2] + veor q0, q0, q11 + veor q1, q1, q15 + vst1.8 {q0-q1}, [r1] + + mov sp, ip + pop {r4-r6, pc} +ENDPROC(chacha20_4block_xor_neon) + + .align 4 +CTRINC: .word 0, 1, 2, 3 diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c new file mode 100644 index 000000000000..554f7f6069da --- /dev/null +++ b/arch/arm/crypto/chacha20-neon-glue.c @@ -0,0 +1,136 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); +asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); + +static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes) +{ + u8 buf[CHACHA20_BLOCK_SIZE]; + + while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + chacha20_4block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE * 4; + src += CHACHA20_BLOCK_SIZE * 4; + dst += CHACHA20_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA20_BLOCK_SIZE) { + chacha20_block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE; + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha20_block_xor_neon(state, buf, buf); + memcpy(dst, buf, bytes); + } +} + +static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + u32 state[16]; + int err; + + if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha20_crypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); + + crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); + + kernel_neon_begin(); + + while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { + chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, + rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % CHACHA20_BLOCK_SIZE); + } + + if (walk.nbytes) { + chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + + kernel_neon_end(); + + return err; +} + +static struct crypto_alg alg = { + .cra_name = "chacha20", + .cra_driver_name = "chacha20-neon", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_type = &crypto_blkcipher_type, + .cra_ctxsize = sizeof(struct chacha20_ctx), + .cra_alignmask = sizeof(u32) - 1, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .geniv = "seqiv", + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_simd, + .decrypt = chacha20_simd, + }, + }, +}; + +static int __init chacha20_simd_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + return crypto_register_alg(&alg); +} + +static void __exit chacha20_simd_mod_fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(chacha20_simd_mod_init); +module_exit(chacha20_simd_mod_fini); + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); From 9ae433bc79f97bae221d53bb1a8e21415ea58625 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 9 Dec 2016 14:33:51 +0000 Subject: [PATCH 003/142] crypto: chacha20 - convert generic and x86 versions to skcipher This converts the ChaCha20 code from a blkcipher to a skcipher, which is now the preferred way to implement symmetric block and stream ciphers. This ports the generic and x86 versions at the same time because the latter reuses routines of the former. Note that the skcipher_walk() API guarantees that all presented blocks except the final one are a multiple of the chunk size, so we can simplify the encrypt() routine somewhat. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/chacha20_glue.c | 69 ++++++++++++++----------------- crypto/chacha20_generic.c | 73 ++++++++++++++------------------- include/crypto/chacha20.h | 6 +-- 3 files changed, 64 insertions(+), 84 deletions(-) diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index f910d1d449f0..78f75b07dc25 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include @@ -63,36 +63,34 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, } } -static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int chacha20_simd(struct skcipher_request *req) { - u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 state[16] __aligned(CHACHA20_STATE_ALIGN); + struct skcipher_walk walk; int err; - if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(desc, dst, src, nbytes); + if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha20_crypt(req); - state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); + err = skcipher_walk_virt(&walk, req, true); - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); - - crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); + crypto_chacha20_init(state, ctx, walk.iv); kernel_fpu_begin(); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); + err = skcipher_walk_done(&walk, + walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } kernel_fpu_end(); @@ -100,27 +98,22 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, return err; } -static struct crypto_alg alg = { - .cra_name = "chacha20", - .cra_driver_name = "chacha20-simd", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct chacha20_ctx), - .cra_alignmask = sizeof(u32) - 1, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .geniv = "seqiv", - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_simd, - .decrypt = chacha20_simd, - }, - }, +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-simd", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = sizeof(u32) - 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_simd, + .decrypt = chacha20_simd, }; static int __init chacha20_simd_mod_init(void) @@ -133,12 +126,12 @@ static int __init chacha20_simd_mod_init(void) boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #endif - return crypto_register_alg(&alg); + return crypto_register_skcipher(&alg); } static void __exit chacha20_simd_mod_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_skcipher(&alg); } module_init(chacha20_simd_mod_init); diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index 1cab83146e33..8b3c04d625c3 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -10,10 +10,9 @@ */ #include -#include -#include -#include #include +#include +#include static inline u32 le32_to_cpuvp(const void *p) { @@ -63,10 +62,10 @@ void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv) } EXPORT_SYMBOL_GPL(crypto_chacha20_init); -int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize) { - struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); int i; if (keysize != CHACHA20_KEY_SIZE) @@ -79,66 +78,54 @@ int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, } EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); -int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +int crypto_chacha20_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; u32 state[16]; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, true); - crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); + crypto_chacha20_init(state, ctx, walk.iv); - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); - } - - if (walk.nbytes) { + while (walk.nbytes > 0) { chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } return err; } EXPORT_SYMBOL_GPL(crypto_chacha20_crypt); -static struct crypto_alg alg = { - .cra_name = "chacha20", - .cra_driver_name = "chacha20-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct chacha20_ctx), - .cra_alignmask = sizeof(u32) - 1, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .geniv = "seqiv", - .setkey = crypto_chacha20_setkey, - .encrypt = crypto_chacha20_crypt, - .decrypt = crypto_chacha20_crypt, - }, - }, +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = sizeof(u32) - 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = crypto_chacha20_crypt, + .decrypt = crypto_chacha20_crypt, }; static int __init chacha20_generic_mod_init(void) { - return crypto_register_alg(&alg); + return crypto_register_skcipher(&alg); } static void __exit chacha20_generic_mod_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_skcipher(&alg); } module_init(chacha20_generic_mod_init); diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h index 20d20f681a72..445fc45f4b5b 100644 --- a/include/crypto/chacha20.h +++ b/include/crypto/chacha20.h @@ -5,6 +5,7 @@ #ifndef _CRYPTO_CHACHA20_H #define _CRYPTO_CHACHA20_H +#include #include #include @@ -18,9 +19,8 @@ struct chacha20_ctx { void chacha20_block(u32 *state, void *stream); void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); -int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize); -int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); +int crypto_chacha20_crypt(struct skcipher_request *req); #endif From f262c770644fb7685f8e091b7070dfab286bfc5b Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 13 Dec 2016 15:30:59 +0100 Subject: [PATCH 004/142] crypto: drivers - bring back alphabetical order of Makefile THe major content of drivers/crypto/Makefile is sorted, only recent addition break this sort. This patch bring back this alphabetical sorting. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/Makefile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index bc53cb833a06..b08608d7080a 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/ +obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o @@ -11,6 +12,7 @@ obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/ obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o +obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o n2_crypto-y := n2_core.o n2_asm.o obj-$(CONFIG_CRYPTO_DEV_NX) += nx/ @@ -21,15 +23,13 @@ obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ -obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o -obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o -obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o -obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o -obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ -obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ -obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/ obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/ -obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/ +obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o +obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o +obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/ +obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o +obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ +obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ From 50fb57042402c819d247ac4231b80b0da86e2fd7 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Tue, 13 Dec 2016 16:32:06 +0200 Subject: [PATCH 005/142] crypto: aesni-intel - RFC4106 can zero copy when !PageHighMem In the common case of !PageHighMem we can do zero copy crypto even if sg crosses a pages boundary. Signed-off-by: Ilya Lesokhin Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 31c34ee131f3..36ca1502630c 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -740,9 +740,11 @@ static int helper_rfc4106_encrypt(struct aead_request *req) *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && - req->src->offset + req->src->length <= PAGE_SIZE && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length <= PAGE_SIZE) && sg_is_last(req->dst) && - req->dst->offset + req->dst->length <= PAGE_SIZE) { + (!PageHighMem(sg_page(req->dst)) || + req->dst->offset + req->dst->length <= PAGE_SIZE)) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); assoc = scatterwalk_map(&src_sg_walk); @@ -822,9 +824,11 @@ static int helper_rfc4106_decrypt(struct aead_request *req) *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && - req->src->offset + req->src->length <= PAGE_SIZE && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length <= PAGE_SIZE) && sg_is_last(req->dst) && - req->dst->offset + req->dst->length <= PAGE_SIZE) { + (!PageHighMem(sg_page(req->dst)) || + req->dst->offset + req->dst->length <= PAGE_SIZE)) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); assoc = scatterwalk_map(&src_sg_walk); From 6bc17d90e62d16828d1a2113b54cfa4e04582fb6 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 13 Dec 2016 15:51:09 +0100 Subject: [PATCH 006/142] hwrng: core - do not use multiple blank lines This patch fix the checkpatch warning "Please don't use multiple blank lines" Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 6ce5ce8be2f2..4d03b9798e25 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -30,7 +30,6 @@ */ - #include #include #include @@ -45,12 +44,10 @@ #include #include - #define RNG_MODULE_NAME "hw_random" #define PFX RNG_MODULE_NAME ": " #define RNG_MISCDEV_MINOR 183 /* official */ - static struct hwrng *current_rng; static struct task_struct *hwrng_fill; static LIST_HEAD(rng_list); @@ -298,7 +295,6 @@ out_put: goto out; } - static const struct file_operations rng_chrdev_ops = { .owner = THIS_MODULE, .open = rng_dev_open, @@ -316,7 +312,6 @@ static struct miscdevice rng_miscdev = { .groups = rng_dev_groups, }; - static ssize_t hwrng_attr_current_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) From 2a971e3b248775f808950bdc0ac75f12a2853eff Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 13 Dec 2016 15:51:10 +0100 Subject: [PATCH 007/142] hwrng: core - rewrite better comparison to NULL This patch fix the checkpatch warning "Comparison to NULL could be written "!ptr" Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 4d03b9798e25..3e565e12f397 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -442,8 +442,7 @@ int hwrng_register(struct hwrng *rng) int err = -EINVAL; struct hwrng *old_rng, *tmp; - if (rng->name == NULL || - (rng->data_read == NULL && rng->read == NULL)) + if (!rng->name || (!rng->data_read && !rng->read)) goto out; mutex_lock(&rng_mutex); From dd8014830d2b1fdf5328978ada706df3ec180c21 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 13 Dec 2016 15:51:11 +0100 Subject: [PATCH 008/142] hwrng: core - Rewrite the header checkpatch have lot of complaint about header. Furthermore, the header have some offtopic/useless information. This patch rewrite a proper header. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 38 +++++++++-------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 3e565e12f397..6266d1589bc6 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -1,33 +1,13 @@ /* - Added support for the AMD Geode LX RNG - (c) Copyright 2004-2005 Advanced Micro Devices, Inc. - - derived from - - Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG) - (c) Copyright 2003 Red Hat Inc - - derived from - - Hardware driver for the AMD 768 Random Number Generator (RNG) - (c) Copyright 2001 Red Hat Inc - - derived from - - Hardware driver for Intel i810 Random Number Generator (RNG) - Copyright 2000,2001 Jeff Garzik - Copyright 2000,2001 Philipp Rumpf - - Added generic RNG API - Copyright 2006 Michael Buesch - Copyright 2005 (c) MontaVista Software, Inc. - - Please read Documentation/hw_random.txt for details on use. - - ---------------------------------------------------------- - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - + * hw_random/core.c: HWRNG core API + * + * Copyright 2006 Michael Buesch + * Copyright 2005 (c) MontaVista Software, Inc. + * + * Please read Documentation/hw_random.txt for details on use. + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. */ #include From affdec58dafcf1cd7ff1dfaf2e4539b5532d0e84 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 13 Dec 2016 15:51:12 +0100 Subject: [PATCH 009/142] hwrng: core - Replace asm/uaccess.h by linux/uaccess.h This patch fix the checkpatch warning about asm/uaccess.h. In the same time, we sort the headers in alphabetical order. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 6266d1589bc6..f81fd72b6692 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -10,18 +10,18 @@ * of the GNU General Public License, incorporated herein by reference. */ -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #define RNG_MODULE_NAME "hw_random" From fd50d71f94fb1c8614098949db068cd4c8dbb91d Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 13 Dec 2016 15:51:13 +0100 Subject: [PATCH 010/142] hwrng: core - Move hwrng miscdev minor number to include/linux/miscdevice.h This patch move the define for hwrng's miscdev minor number to include/linux/miscdevice.h. It's better that all minor number are in the same place. Rename it to HWRNG_MINOR (from RNG_MISCDEV_MINOR) in he process since no other miscdev define have MISCDEV in their name. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 3 +-- include/linux/miscdevice.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index f81fd72b6692..c5f131d90473 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -26,7 +26,6 @@ #define RNG_MODULE_NAME "hw_random" #define PFX RNG_MODULE_NAME ": " -#define RNG_MISCDEV_MINOR 183 /* official */ static struct hwrng *current_rng; static struct task_struct *hwrng_fill; @@ -285,7 +284,7 @@ static const struct file_operations rng_chrdev_ops = { static const struct attribute_group *rng_dev_groups[]; static struct miscdevice rng_miscdev = { - .minor = RNG_MISCDEV_MINOR, + .minor = HWRNG_MINOR, .name = RNG_MODULE_NAME, .nodename = "hwrng", .fops = &rng_chrdev_ops, diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index ed30d5d713e3..5d81f739aa0a 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -31,6 +31,7 @@ #define SGI_MMTIMER 153 #define STORE_QUEUE_MINOR 155 /* unused */ #define I2O_MINOR 166 +#define HWRNG_MINOR 183 #define MICROCODE_MINOR 184 #define IRNET_MINOR 187 #define VFIO_MINOR 196 From 079840bd13f793b915f6c8e44452eeb4a0aba8ba Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 13 Dec 2016 15:51:14 +0100 Subject: [PATCH 011/142] hwrng: core - remove unused PFX macro This patch remove the unused PFX macro. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index c5f131d90473..68fdc60ddf50 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -25,7 +25,6 @@ #include #define RNG_MODULE_NAME "hw_random" -#define PFX RNG_MODULE_NAME ": " static struct hwrng *current_rng; static struct task_struct *hwrng_fill; From 862d64c9d303b14e2abe6bedaa17a44e27148f46 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Thu, 15 Dec 2016 10:59:42 +0100 Subject: [PATCH 012/142] crypto: bfin_crc - Fix format printing warning bfin_crc.c print some u32 as unsigned long ans so gcc complains about it. This patch remove the long print qualifier. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/bfin_crc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index 10db7df366c8..a118b9bed669 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -203,7 +203,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) crc->sg_cpu[i].x_count = 1; crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE; dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " - "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + "cfg:0x%x, x_count:0x%x, x_modify:0x%x\n", i, crc->sg_cpu[i].start_addr, crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, crc->sg_cpu[i].x_modify); @@ -233,7 +233,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) crc->sg_cpu[i].x_count = dma_count; crc->sg_cpu[i].x_modify = dma_mod; dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " - "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + "cfg:0x%x, x_count:0x%x, x_modify:0x%x\n", i, crc->sg_cpu[i].start_addr, crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, crc->sg_cpu[i].x_modify); @@ -257,7 +257,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) crc->sg_cpu[i].x_count = 1; crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE; dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " - "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + "cfg:0x%x, x_count:0x%x, x_modify:0x%x\n", i, crc->sg_cpu[i].start_addr, crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, crc->sg_cpu[i].x_modify); From 422ef504590ac71625129b0f93604ef73f1fc9fc Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Thu, 15 Dec 2016 10:59:43 +0100 Subject: [PATCH 013/142] crypto: bfin_crc - Remove unneeded linux/miscdevice.h include bfin_crc.h driver does not use any miscdevice, so this patch remove this unnecessary inclusion. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/bfin_crc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/bfin_crc.h b/drivers/crypto/bfin_crc.h index 75cef4dc85a1..786ef746d109 100644 --- a/drivers/crypto/bfin_crc.h +++ b/drivers/crypto/bfin_crc.h @@ -55,7 +55,6 @@ struct crc_info { #include #include -#include struct crc_register { u32 control; From 6207119444595d287b1e9e83a2066c17209698f3 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 15 Dec 2016 14:31:01 +0100 Subject: [PATCH 014/142] crypto: algif_hash - avoid zero-sized array With this reproducer: struct sockaddr_alg alg = { .salg_family = 0x26, .salg_type = "hash", .salg_feat = 0xf, .salg_mask = 0x5, .salg_name = "digest_null", }; int sock, sock2; sock = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(sock, (struct sockaddr *)&alg, sizeof(alg)); sock2 = accept(sock, NULL, NULL); setsockopt(sock, SOL_ALG, ALG_SET_KEY, "\x9b\xca", 2); accept(sock2, NULL, NULL); ==== 8< ======== 8< ======== 8< ======== 8< ==== one can immediatelly see an UBSAN warning: UBSAN: Undefined behaviour in crypto/algif_hash.c:187:7 variable length array bound value 0 <= 0 CPU: 0 PID: 15949 Comm: syz-executor Tainted: G E 4.4.30-0-default #1 ... Call Trace: ... [] ? __ubsan_handle_vla_bound_not_positive+0x13d/0x188 [] ? __ubsan_handle_out_of_bounds+0x1bc/0x1bc [] ? hash_accept+0x5bd/0x7d0 [algif_hash] [] ? hash_accept_nokey+0x3f/0x51 [algif_hash] [] ? hash_accept_parent_nokey+0x4a0/0x4a0 [algif_hash] [] ? SyS_accept+0x2b/0x40 It is a correct warning, as hash state is propagated to accept as zero, but creating a zero-length variable array is not allowed in C. Fix this as proposed by Herbert -- do "?: 1" on that site. No sizeof or similar happens in the code there, so we just allocate one byte even though we do not use the array. Signed-off-by: Jiri Slaby Cc: Herbert Xu Cc: "David S. Miller" (maintainer:CRYPTO API) Reported-by: Sasha Levin Signed-off-by: Herbert Xu --- crypto/algif_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index d19b09cdf284..54fc90e8339c 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -245,7 +245,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) struct alg_sock *ask = alg_sk(sk); struct hash_ctx *ctx = ask->private; struct ahash_request *req = &ctx->req; - char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))]; + char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1]; struct sock *sk2; struct alg_sock *ask2; struct hash_ctx *ctx2; From 785e5c616c849ec3615b3e86427f736315008b75 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Mon, 19 Dec 2016 10:20:44 +0800 Subject: [PATCH 015/142] crypto: mediatek - Add crypto driver support for some MediaTek chips This adds support for the MediaTek hardware accelerator on mt7623/mt2701/mt8521p SoC. This driver currently implement: - SHA1 and SHA2 family(HMAC) hash algorithms. - AES block cipher in CBC/ECB mode with 128/196/256 bits keys. Signed-off-by: Ryder Lee Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 17 + drivers/crypto/Makefile | 1 + drivers/crypto/mediatek/Makefile | 2 + drivers/crypto/mediatek/mtk-aes.c | 765 +++++++++++++ drivers/crypto/mediatek/mtk-platform.c | 604 ++++++++++ drivers/crypto/mediatek/mtk-platform.h | 238 ++++ drivers/crypto/mediatek/mtk-regs.h | 194 ++++ drivers/crypto/mediatek/mtk-sha.c | 1437 ++++++++++++++++++++++++ 8 files changed, 3258 insertions(+) create mode 100644 drivers/crypto/mediatek/Makefile create mode 100644 drivers/crypto/mediatek/mtk-aes.c create mode 100644 drivers/crypto/mediatek/mtk-platform.c create mode 100644 drivers/crypto/mediatek/mtk-platform.h create mode 100644 drivers/crypto/mediatek/mtk-regs.h create mode 100644 drivers/crypto/mediatek/mtk-sha.c diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 79564785ae30..8ded3af88b16 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -553,6 +553,23 @@ config CRYPTO_DEV_ROCKCHIP This driver interfaces with the hardware crypto accelerator. Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. +config CRYPTO_DEV_MEDIATEK + tristate "MediaTek's EIP97 Cryptographic Engine driver" + depends on ARM && (ARCH_MEDIATEK || COMPILE_TEST) + select NEON + select KERNEL_MODE_NEON + select ARM_CRYPTO + select CRYPTO_AES + select CRYPTO_BLKCIPHER + select CRYPTO_SHA1_ARM_NEON + select CRYPTO_SHA256_ARM + select CRYPTO_SHA512_ARM + select CRYPTO_HMAC + help + This driver allows you to utilize the hardware crypto accelerator + EIP97 which can be found on the MT7623 MT2701, MT8521p, etc .... + Select this if you want to use it for AES/SHA1/SHA2 algorithms. + source "drivers/crypto/chelsio/Kconfig" source "drivers/crypto/virtio/Kconfig" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index b08608d7080a..8891ccc5844c 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/ +obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/ obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o diff --git a/drivers/crypto/mediatek/Makefile b/drivers/crypto/mediatek/Makefile new file mode 100644 index 000000000000..187be79c7f3e --- /dev/null +++ b/drivers/crypto/mediatek/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mtk-crypto.o +mtk-crypto-objs:= mtk-platform.o mtk-aes.o mtk-sha.o diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c new file mode 100644 index 000000000000..3271471060d9 --- /dev/null +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -0,0 +1,765 @@ +/* + * Cryptographic API. + * + * Driver for EIP97 AES acceleration. + * + * Copyright (c) 2016 Ryder Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Some ideas are from atmel-aes.c drivers. + */ + +#include +#include "mtk-platform.h" + +#define AES_QUEUE_SIZE 512 +#define AES_BUF_ORDER 2 +#define AES_BUF_SIZE ((PAGE_SIZE << AES_BUF_ORDER) \ + & ~(AES_BLOCK_SIZE - 1)) + +/* AES command token */ +#define AES_CT_SIZE_ECB 2 +#define AES_CT_SIZE_CBC 3 +#define AES_CT_CTRL_HDR cpu_to_le32(0x00220000) +#define AES_COMMAND0 cpu_to_le32(0x05000000) +#define AES_COMMAND1 cpu_to_le32(0x2d060000) +#define AES_COMMAND2 cpu_to_le32(0xe4a63806) + +/* AES transform information */ +#define AES_TFM_ECB cpu_to_le32(0x0 << 0) +#define AES_TFM_CBC cpu_to_le32(0x1 << 0) +#define AES_TFM_DECRYPT cpu_to_le32(0x5 << 0) +#define AES_TFM_ENCRYPT cpu_to_le32(0x4 << 0) +#define AES_TFM_SIZE(x) cpu_to_le32((x) << 8) +#define AES_TFM_128BITS cpu_to_le32(0xb << 16) +#define AES_TFM_192BITS cpu_to_le32(0xd << 16) +#define AES_TFM_256BITS cpu_to_le32(0xf << 16) +#define AES_TFM_FULL_IV cpu_to_le32(0xf << 5) + +/* AES flags */ +#define AES_FLAGS_MODE_MSK 0x7 +#define AES_FLAGS_ECB BIT(0) +#define AES_FLAGS_CBC BIT(1) +#define AES_FLAGS_ENCRYPT BIT(2) +#define AES_FLAGS_BUSY BIT(3) + +/** + * mtk_aes_ct is a set of hardware instructions(command token) + * that are used to control engine's processing flow of AES. + */ +struct mtk_aes_ct { + __le32 ct_ctrl0; + __le32 ct_ctrl1; + __le32 ct_ctrl2; +}; + +/** + * mtk_aes_tfm is used to define AES transform state + * and contains all keys and initial vectors. + */ +struct mtk_aes_tfm { + __le32 tfm_ctrl0; + __le32 tfm_ctrl1; + __le32 state[SIZE_IN_WORDS(AES_KEYSIZE_256 + AES_BLOCK_SIZE)]; +}; + +/** + * mtk_aes_info consists of command token and transform state of AES, + * which should be encapsulated in command and result descriptors. + * + * The engine requires this information to do: + * - Commands decoding and control of the engine's data path. + * - Coordinating hardware data fetch and store operations. + * - Result token construction and output. + */ +struct mtk_aes_info { + struct mtk_aes_ct ct; + struct mtk_aes_tfm tfm; +}; + +struct mtk_aes_reqctx { + u64 mode; +}; + +struct mtk_aes_ctx { + struct mtk_cryp *cryp; + struct mtk_aes_info info; + u32 keylen; +}; + +struct mtk_aes_drv { + struct list_head dev_list; + /* Device list lock */ + spinlock_t lock; +}; + +static struct mtk_aes_drv mtk_aes = { + .dev_list = LIST_HEAD_INIT(mtk_aes.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(mtk_aes.lock), +}; + +static inline u32 mtk_aes_read(struct mtk_cryp *cryp, u32 offset) +{ + return readl_relaxed(cryp->base + offset); +} + +static inline void mtk_aes_write(struct mtk_cryp *cryp, + u32 offset, u32 value) +{ + writel_relaxed(value, cryp->base + offset); +} + +static struct mtk_cryp *mtk_aes_find_dev(struct mtk_aes_ctx *ctx) +{ + struct mtk_cryp *cryp = NULL; + struct mtk_cryp *tmp; + + spin_lock_bh(&mtk_aes.lock); + if (!ctx->cryp) { + list_for_each_entry(tmp, &mtk_aes.dev_list, aes_list) { + cryp = tmp; + break; + } + ctx->cryp = cryp; + } else { + cryp = ctx->cryp; + } + spin_unlock_bh(&mtk_aes.lock); + + return cryp; +} + +static inline size_t mtk_aes_padlen(size_t len) +{ + len &= AES_BLOCK_SIZE - 1; + return len ? AES_BLOCK_SIZE - len : 0; +} + +static bool mtk_aes_check_aligned(struct scatterlist *sg, size_t len, + struct mtk_aes_dma *dma) +{ + int nents; + + if (!IS_ALIGNED(len, AES_BLOCK_SIZE)) + return false; + + for (nents = 0; sg; sg = sg_next(sg), ++nents) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) + return false; + + if (len <= sg->length) { + if (!IS_ALIGNED(len, AES_BLOCK_SIZE)) + return false; + + dma->nents = nents + 1; + dma->remainder = sg->length - len; + sg->length = len; + return true; + } + + if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE)) + return false; + + len -= sg->length; + } + + return false; +} + +/* Initialize and map transform information of AES */ +static int mtk_aes_info_map(struct mtk_cryp *cryp, + struct mtk_aes_rec *aes, + size_t len) +{ + struct mtk_aes_ctx *ctx = crypto_ablkcipher_ctx( + crypto_ablkcipher_reqtfm(aes->req)); + struct mtk_aes_info *info = aes->info; + struct mtk_aes_ct *ct = &info->ct; + struct mtk_aes_tfm *tfm = &info->tfm; + + aes->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len); + + if (aes->flags & AES_FLAGS_ENCRYPT) + tfm->tfm_ctrl0 = AES_TFM_ENCRYPT; + else + tfm->tfm_ctrl0 = AES_TFM_DECRYPT; + + if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128)) + tfm->tfm_ctrl0 |= AES_TFM_128BITS; + else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256)) + tfm->tfm_ctrl0 |= AES_TFM_256BITS; + else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_192)) + tfm->tfm_ctrl0 |= AES_TFM_192BITS; + + ct->ct_ctrl0 = AES_COMMAND0 | cpu_to_le32(len); + ct->ct_ctrl1 = AES_COMMAND1; + + if (aes->flags & AES_FLAGS_CBC) { + const u32 *iv = (const u32 *)aes->req->info; + u32 *iv_state = tfm->state + ctx->keylen; + int i; + + aes->ct_size = AES_CT_SIZE_CBC; + ct->ct_ctrl2 = AES_COMMAND2; + + tfm->tfm_ctrl0 |= AES_TFM_SIZE(ctx->keylen + + SIZE_IN_WORDS(AES_BLOCK_SIZE)); + tfm->tfm_ctrl1 = AES_TFM_CBC | AES_TFM_FULL_IV; + + for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++) + iv_state[i] = cpu_to_le32(iv[i]); + + } else if (aes->flags & AES_FLAGS_ECB) { + aes->ct_size = AES_CT_SIZE_ECB; + tfm->tfm_ctrl0 |= AES_TFM_SIZE(ctx->keylen); + tfm->tfm_ctrl1 = AES_TFM_ECB; + } + + aes->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, aes->ct_dma))) { + dev_err(cryp->dev, "dma %d bytes error\n", sizeof(*info)); + return -EINVAL; + } + aes->tfm_dma = aes->ct_dma + sizeof(*ct); + + return 0; +} + +static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_ring *ring = cryp->ring[aes->id]; + struct mtk_desc *cmd = NULL, *res = NULL; + struct scatterlist *ssg, *dsg; + u32 len = aes->src.sg_len; + int nents; + + /* Fill in the command/result descriptors */ + for (nents = 0; nents < len; ++nents) { + ssg = &aes->src.sg[nents]; + dsg = &aes->dst.sg[nents]; + + cmd = ring->cmd_base + ring->pos; + cmd->hdr = MTK_DESC_BUF_LEN(ssg->length); + cmd->buf = cpu_to_le32(sg_dma_address(ssg)); + + res = ring->res_base + ring->pos; + res->hdr = MTK_DESC_BUF_LEN(dsg->length); + res->buf = cpu_to_le32(sg_dma_address(dsg)); + + if (nents == 0) { + res->hdr |= MTK_DESC_FIRST; + cmd->hdr |= MTK_DESC_FIRST | + MTK_DESC_CT_LEN(aes->ct_size); + cmd->ct = cpu_to_le32(aes->ct_dma); + cmd->ct_hdr = aes->ct_hdr; + cmd->tfm = cpu_to_le32(aes->tfm_dma); + } + + if (++ring->pos == MTK_DESC_NUM) + ring->pos = 0; + } + + cmd->hdr |= MTK_DESC_LAST; + res->hdr |= MTK_DESC_LAST; + + /* + * Make sure that all changes to the DMA ring are done before we + * start engine. + */ + wmb(); + /* Start DMA transfer */ + mtk_aes_write(cryp, RDR_PREP_COUNT(aes->id), MTK_DESC_CNT(len)); + mtk_aes_write(cryp, CDR_PREP_COUNT(aes->id), MTK_DESC_CNT(len)); + + return -EINPROGRESS; +} + +static inline void mtk_aes_restore_sg(const struct mtk_aes_dma *dma) +{ + struct scatterlist *sg = dma->sg; + int nents = dma->nents; + + if (!dma->remainder) + return; + + while (--nents > 0 && sg) + sg = sg_next(sg); + + if (!sg) + return; + + sg->length += dma->remainder; +} + +static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct scatterlist *src = aes->req->src; + struct scatterlist *dst = aes->req->dst; + size_t len = aes->req->nbytes; + size_t padlen = 0; + bool src_aligned, dst_aligned; + + aes->total = len; + aes->src.sg = src; + aes->dst.sg = dst; + aes->real_dst = dst; + + src_aligned = mtk_aes_check_aligned(src, len, &aes->src); + if (src == dst) + dst_aligned = src_aligned; + else + dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst); + + if (!src_aligned || !dst_aligned) { + padlen = mtk_aes_padlen(len); + + if (len + padlen > AES_BUF_SIZE) + return -ENOMEM; + + if (!src_aligned) { + sg_copy_to_buffer(src, sg_nents(src), aes->buf, len); + aes->src.sg = &aes->aligned_sg; + aes->src.nents = 1; + aes->src.remainder = 0; + } + + if (!dst_aligned) { + aes->dst.sg = &aes->aligned_sg; + aes->dst.nents = 1; + aes->dst.remainder = 0; + } + + sg_init_table(&aes->aligned_sg, 1); + sg_set_buf(&aes->aligned_sg, aes->buf, len + padlen); + } + + if (aes->src.sg == aes->dst.sg) { + aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, + aes->src.nents, DMA_BIDIRECTIONAL); + aes->dst.sg_len = aes->src.sg_len; + if (unlikely(!aes->src.sg_len)) + return -EFAULT; + } else { + aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, + aes->src.nents, DMA_TO_DEVICE); + if (unlikely(!aes->src.sg_len)) + return -EFAULT; + + aes->dst.sg_len = dma_map_sg(cryp->dev, aes->dst.sg, + aes->dst.nents, DMA_FROM_DEVICE); + if (unlikely(!aes->dst.sg_len)) { + dma_unmap_sg(cryp->dev, aes->src.sg, + aes->src.nents, DMA_TO_DEVICE); + return -EFAULT; + } + } + + return mtk_aes_info_map(cryp, aes, len + padlen); +} + +static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, + struct ablkcipher_request *req) +{ + struct mtk_aes_rec *aes = cryp->aes[id]; + struct crypto_async_request *areq, *backlog; + struct mtk_aes_reqctx *rctx; + struct mtk_aes_ctx *ctx; + unsigned long flags; + int err, ret = 0; + + spin_lock_irqsave(&aes->lock, flags); + if (req) + ret = ablkcipher_enqueue_request(&aes->queue, req); + if (aes->flags & AES_FLAGS_BUSY) { + spin_unlock_irqrestore(&aes->lock, flags); + return ret; + } + backlog = crypto_get_backlog(&aes->queue); + areq = crypto_dequeue_request(&aes->queue); + if (areq) + aes->flags |= AES_FLAGS_BUSY; + spin_unlock_irqrestore(&aes->lock, flags); + + if (!areq) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ablkcipher_request_cast(areq); + ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); + rctx = ablkcipher_request_ctx(req); + rctx->mode &= AES_FLAGS_MODE_MSK; + /* Assign new request to device */ + aes->req = req; + aes->info = &ctx->info; + aes->flags = (aes->flags & ~AES_FLAGS_MODE_MSK) | rctx->mode; + + err = mtk_aes_map(cryp, aes); + if (err) + return err; + + return mtk_aes_xmit(cryp, aes); +} + +static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + dma_unmap_single(cryp->dev, aes->ct_dma, + sizeof(struct mtk_aes_info), DMA_TO_DEVICE); + + if (aes->src.sg == aes->dst.sg) { + dma_unmap_sg(cryp->dev, aes->src.sg, + aes->src.nents, DMA_BIDIRECTIONAL); + + if (aes->src.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->src); + } else { + dma_unmap_sg(cryp->dev, aes->dst.sg, + aes->dst.nents, DMA_FROM_DEVICE); + + if (aes->dst.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->dst); + + dma_unmap_sg(cryp->dev, aes->src.sg, + aes->src.nents, DMA_TO_DEVICE); + + if (aes->src.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->src); + } + + if (aes->dst.sg == &aes->aligned_sg) + sg_copy_from_buffer(aes->real_dst, + sg_nents(aes->real_dst), + aes->buf, aes->total); +} + +static inline void mtk_aes_complete(struct mtk_cryp *cryp, + struct mtk_aes_rec *aes) +{ + aes->flags &= ~AES_FLAGS_BUSY; + + aes->req->base.complete(&aes->req->base, 0); + + /* Handle new request */ + mtk_aes_handle_queue(cryp, aes->id, NULL); +} + +/* Check and set the AES key to transform state buffer */ +static int mtk_aes_setkey(struct crypto_ablkcipher *tfm, + const u8 *key, u32 keylen) +{ + struct mtk_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + const u32 *key_tmp = (const u32 *)key; + u32 *key_state = ctx->info.tfm.state; + int i; + + if (keylen != AES_KEYSIZE_128 && + keylen != AES_KEYSIZE_192 && + keylen != AES_KEYSIZE_256) { + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + ctx->keylen = SIZE_IN_WORDS(keylen); + + for (i = 0; i < ctx->keylen; i++) + key_state[i] = cpu_to_le32(key_tmp[i]); + + return 0; +} + +static int mtk_aes_crypt(struct ablkcipher_request *req, u64 mode) +{ + struct mtk_aes_ctx *ctx = crypto_ablkcipher_ctx( + crypto_ablkcipher_reqtfm(req)); + struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req); + + rctx->mode = mode; + + return mtk_aes_handle_queue(ctx->cryp, + !(mode & AES_FLAGS_ENCRYPT), req); +} + +static int mtk_ecb_encrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_ECB); +} + +static int mtk_ecb_decrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ECB); +} + +static int mtk_cbc_encrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CBC); +} + +static int mtk_cbc_decrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_CBC); +} + +static int mtk_aes_cra_init(struct crypto_tfm *tfm) +{ + struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct mtk_cryp *cryp = NULL; + + tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx); + + cryp = mtk_aes_find_dev(ctx); + if (!cryp) { + pr_err("can't find crypto device\n"); + return -ENODEV; + } + + return 0; +} + +static struct crypto_alg aes_algs[] = { +{ + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_cra_init, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_aes_ctx), + .cra_alignmask = 15, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_cbc_encrypt, + .decrypt = mtk_cbc_decrypt, + .ivsize = AES_BLOCK_SIZE, + } +}, +{ + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_cra_init, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_aes_ctx), + .cra_alignmask = 15, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_ecb_encrypt, + .decrypt = mtk_ecb_decrypt, + } +}, +}; + +static void mtk_aes_enc_task(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_aes_rec *aes = cryp->aes[0]; + + mtk_aes_unmap(cryp, aes); + mtk_aes_complete(cryp, aes); +} + +static void mtk_aes_dec_task(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_aes_rec *aes = cryp->aes[1]; + + mtk_aes_unmap(cryp, aes); + mtk_aes_complete(cryp, aes); +} + +static irqreturn_t mtk_aes_enc_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_aes_rec *aes = cryp->aes[0]; + u32 val = mtk_aes_read(cryp, RDR_STAT(RING0)); + + mtk_aes_write(cryp, RDR_STAT(RING0), val); + + if (likely(AES_FLAGS_BUSY & aes->flags)) { + mtk_aes_write(cryp, RDR_PROC_COUNT(RING0), MTK_CNT_RST); + mtk_aes_write(cryp, RDR_THRESH(RING0), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&aes->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +static irqreturn_t mtk_aes_dec_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_aes_rec *aes = cryp->aes[1]; + u32 val = mtk_aes_read(cryp, RDR_STAT(RING1)); + + mtk_aes_write(cryp, RDR_STAT(RING1), val); + + if (likely(AES_FLAGS_BUSY & aes->flags)) { + mtk_aes_write(cryp, RDR_PROC_COUNT(RING1), MTK_CNT_RST); + mtk_aes_write(cryp, RDR_THRESH(RING1), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&aes->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +/* + * The purpose of creating encryption and decryption records is + * to process outbound/inbound data in parallel, it can improve + * performance in most use cases, such as IPSec VPN, especially + * under heavy network traffic. + */ +static int mtk_aes_record_init(struct mtk_cryp *cryp) +{ + struct mtk_aes_rec **aes = cryp->aes; + int i, err = -ENOMEM; + + for (i = 0; i < MTK_REC_NUM; i++) { + aes[i] = kzalloc(sizeof(**aes), GFP_KERNEL); + if (!aes[i]) + goto err_cleanup; + + aes[i]->buf = (void *)__get_free_pages(GFP_KERNEL, + AES_BUF_ORDER); + if (!aes[i]->buf) + goto err_cleanup; + + aes[i]->id = i; + + spin_lock_init(&aes[i]->lock); + crypto_init_queue(&aes[i]->queue, AES_QUEUE_SIZE); + } + + tasklet_init(&aes[0]->task, mtk_aes_enc_task, (unsigned long)cryp); + tasklet_init(&aes[1]->task, mtk_aes_dec_task, (unsigned long)cryp); + + return 0; + +err_cleanup: + for (; i--; ) { + free_page((unsigned long)aes[i]->buf); + kfree(aes[i]); + } + + return err; +} + +static void mtk_aes_record_free(struct mtk_cryp *cryp) +{ + int i; + + for (i = 0; i < MTK_REC_NUM; i++) { + tasklet_kill(&cryp->aes[i]->task); + free_page((unsigned long)cryp->aes[i]->buf); + kfree(cryp->aes[i]); + } +} + +static void mtk_aes_unregister_algs(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) + crypto_unregister_alg(&aes_algs[i]); +} + +static int mtk_aes_register_algs(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + err = crypto_register_alg(&aes_algs[i]); + if (err) + goto err_aes_algs; + } + + return 0; + +err_aes_algs: + for (; i--; ) + crypto_unregister_alg(&aes_algs[i]); + + return err; +} + +int mtk_cipher_alg_register(struct mtk_cryp *cryp) +{ + int ret; + + INIT_LIST_HEAD(&cryp->aes_list); + + /* Initialize two cipher records */ + ret = mtk_aes_record_init(cryp); + if (ret) + goto err_record; + + /* Ring0 is use by encryption record */ + ret = devm_request_irq(cryp->dev, cryp->irq[RING0], mtk_aes_enc_irq, + IRQF_TRIGGER_LOW, "mtk-aes", cryp); + if (ret) { + dev_err(cryp->dev, "unable to request AES encryption irq.\n"); + goto err_res; + } + + /* Ring1 is use by decryption record */ + ret = devm_request_irq(cryp->dev, cryp->irq[RING1], mtk_aes_dec_irq, + IRQF_TRIGGER_LOW, "mtk-aes", cryp); + if (ret) { + dev_err(cryp->dev, "unable to request AES decryption irq.\n"); + goto err_res; + } + + /* Enable ring0 and ring1 interrupt */ + mtk_aes_write(cryp, AIC_ENABLE_SET(RING0), MTK_IRQ_RDR0); + mtk_aes_write(cryp, AIC_ENABLE_SET(RING1), MTK_IRQ_RDR1); + + spin_lock(&mtk_aes.lock); + list_add_tail(&cryp->aes_list, &mtk_aes.dev_list); + spin_unlock(&mtk_aes.lock); + + ret = mtk_aes_register_algs(); + if (ret) + goto err_algs; + + return 0; + +err_algs: + spin_lock(&mtk_aes.lock); + list_del(&cryp->aes_list); + spin_unlock(&mtk_aes.lock); +err_res: + mtk_aes_record_free(cryp); +err_record: + + dev_err(cryp->dev, "mtk-aes initialization failed.\n"); + return ret; +} + +void mtk_cipher_alg_release(struct mtk_cryp *cryp) +{ + spin_lock(&mtk_aes.lock); + list_del(&cryp->aes_list); + spin_unlock(&mtk_aes.lock); + + mtk_aes_unregister_algs(); + mtk_aes_record_free(cryp); +} diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c new file mode 100644 index 000000000000..286296fbf15d --- /dev/null +++ b/drivers/crypto/mediatek/mtk-platform.c @@ -0,0 +1,604 @@ +/* + * Driver for EIP97 cryptographic accelerator. + * + * Copyright (c) 2016 Ryder Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include "mtk-platform.h" + +#define MTK_BURST_SIZE_MSK GENMASK(7, 4) +#define MTK_BURST_SIZE(x) ((x) << 4) +#define MTK_DESC_SIZE(x) ((x) << 0) +#define MTK_DESC_OFFSET(x) ((x) << 16) +#define MTK_DESC_FETCH_SIZE(x) ((x) << 0) +#define MTK_DESC_FETCH_THRESH(x) ((x) << 16) +#define MTK_DESC_OVL_IRQ_EN BIT(25) +#define MTK_DESC_ATP_PRESENT BIT(30) + +#define MTK_DFSE_IDLE GENMASK(3, 0) +#define MTK_DFSE_THR_CTRL_EN BIT(30) +#define MTK_DFSE_THR_CTRL_RESET BIT(31) +#define MTK_DFSE_RING_ID(x) (((x) >> 12) & GENMASK(3, 0)) +#define MTK_DFSE_MIN_DATA(x) ((x) << 0) +#define MTK_DFSE_MAX_DATA(x) ((x) << 8) +#define MTK_DFE_MIN_CTRL(x) ((x) << 16) +#define MTK_DFE_MAX_CTRL(x) ((x) << 24) + +#define MTK_IN_BUF_MIN_THRESH(x) ((x) << 8) +#define MTK_IN_BUF_MAX_THRESH(x) ((x) << 12) +#define MTK_OUT_BUF_MIN_THRESH(x) ((x) << 0) +#define MTK_OUT_BUF_MAX_THRESH(x) ((x) << 4) +#define MTK_IN_TBUF_SIZE(x) (((x) >> 4) & GENMASK(3, 0)) +#define MTK_IN_DBUF_SIZE(x) (((x) >> 8) & GENMASK(3, 0)) +#define MTK_OUT_DBUF_SIZE(x) (((x) >> 16) & GENMASK(3, 0)) +#define MTK_CMD_FIFO_SIZE(x) (((x) >> 8) & GENMASK(3, 0)) +#define MTK_RES_FIFO_SIZE(x) (((x) >> 12) & GENMASK(3, 0)) + +#define MTK_PE_TK_LOC_AVL BIT(2) +#define MTK_PE_PROC_HELD BIT(14) +#define MTK_PE_TK_TIMEOUT_EN BIT(22) +#define MTK_PE_INPUT_DMA_ERR BIT(0) +#define MTK_PE_OUTPUT_DMA_ERR BIT(1) +#define MTK_PE_PKT_PORC_ERR BIT(2) +#define MTK_PE_PKT_TIMEOUT BIT(3) +#define MTK_PE_FATAL_ERR BIT(14) +#define MTK_PE_INPUT_DMA_ERR_EN BIT(16) +#define MTK_PE_OUTPUT_DMA_ERR_EN BIT(17) +#define MTK_PE_PKT_PORC_ERR_EN BIT(18) +#define MTK_PE_PKT_TIMEOUT_EN BIT(19) +#define MTK_PE_FATAL_ERR_EN BIT(30) +#define MTK_PE_INT_OUT_EN BIT(31) + +#define MTK_HIA_SIGNATURE ((u16)0x35ca) +#define MTK_HIA_DATA_WIDTH(x) (((x) >> 25) & GENMASK(1, 0)) +#define MTK_HIA_DMA_LENGTH(x) (((x) >> 20) & GENMASK(4, 0)) +#define MTK_CDR_STAT_CLR GENMASK(4, 0) +#define MTK_RDR_STAT_CLR GENMASK(7, 0) + +#define MTK_AIC_INT_MSK GENMASK(5, 0) +#define MTK_AIC_VER_MSK (GENMASK(15, 0) | GENMASK(27, 20)) +#define MTK_AIC_VER11 0x011036c9 +#define MTK_AIC_VER12 0x012036c9 +#define MTK_AIC_G_CLR GENMASK(30, 20) + +/** + * EIP97 is an integrated security subsystem to accelerate cryptographic + * functions and protocols to offload the host processor. + * Some important hardware modules are briefly introduced below: + * + * Host Interface Adapter(HIA) - the main interface between the host + * system and the hardware subsystem. It is responsible for attaching + * processing engine to the specific host bus interface and provides a + * standardized software view for off loading tasks to the engine. + * + * Command Descriptor Ring Manager(CDR Manager) - keeps track of how many + * CD the host has prepared in the CDR. It monitors the fill level of its + * CD-FIFO and if there's sufficient space for the next block of descriptors, + * then it fires off a DMA request to fetch a block of CDs. + * + * Data fetch engine(DFE) - It is responsible for parsing the CD and + * setting up the required control and packet data DMA transfers from + * system memory to the processing engine. + * + * Result Descriptor Ring Manager(RDR Manager) - same as CDR Manager, + * but target is result descriptors, Moreover, it also handles the RD + * updates under control of the DSE. For each packet data segment + * processed, the DSE triggers the RDR Manager to write the updated RD. + * If triggered to update, the RDR Manager sets up a DMA operation to + * copy the RD from the DSE to the correct location in the RDR. + * + * Data Store Engine(DSE) - It is responsible for parsing the prepared RD + * and setting up the required control and packet data DMA transfers from + * the processing engine to system memory. + * + * Advanced Interrupt Controllers(AICs) - receive interrupt request signals + * from various sources and combine them into one interrupt output. + * The AICs are used by: + * - One for the HIA global and processing engine interrupts. + * - The others for the descriptor ring interrupts. + */ + +/* Cryptographic engine capabilities */ +struct mtk_sys_cap { + /* host interface adapter */ + u32 hia_ver; + u32 hia_opt; + /* packet engine */ + u32 pkt_eng_opt; + /* global hardware */ + u32 hw_opt; +}; + +static void mtk_desc_ring_link(struct mtk_cryp *cryp, u32 mask) +{ + /* Assign rings to DFE/DSE thread and enable it */ + writel(MTK_DFSE_THR_CTRL_EN | mask, cryp->base + DFE_THR_CTRL); + writel(MTK_DFSE_THR_CTRL_EN | mask, cryp->base + DSE_THR_CTRL); +} + +static void mtk_dfe_dse_buf_setup(struct mtk_cryp *cryp, + struct mtk_sys_cap *cap) +{ + u32 width = MTK_HIA_DATA_WIDTH(cap->hia_opt) + 2; + u32 len = MTK_HIA_DMA_LENGTH(cap->hia_opt) - 1; + u32 ipbuf = min((u32)MTK_IN_DBUF_SIZE(cap->hw_opt) + width, len); + u32 opbuf = min((u32)MTK_OUT_DBUF_SIZE(cap->hw_opt) + width, len); + u32 itbuf = min((u32)MTK_IN_TBUF_SIZE(cap->hw_opt) + width, len); + + writel(MTK_DFSE_MIN_DATA(ipbuf - 1) | + MTK_DFSE_MAX_DATA(ipbuf) | + MTK_DFE_MIN_CTRL(itbuf - 1) | + MTK_DFE_MAX_CTRL(itbuf), + cryp->base + DFE_CFG); + + writel(MTK_DFSE_MIN_DATA(opbuf - 1) | + MTK_DFSE_MAX_DATA(opbuf), + cryp->base + DSE_CFG); + + writel(MTK_IN_BUF_MIN_THRESH(ipbuf - 1) | + MTK_IN_BUF_MAX_THRESH(ipbuf), + cryp->base + PE_IN_DBUF_THRESH); + + writel(MTK_IN_BUF_MIN_THRESH(itbuf - 1) | + MTK_IN_BUF_MAX_THRESH(itbuf), + cryp->base + PE_IN_TBUF_THRESH); + + writel(MTK_OUT_BUF_MIN_THRESH(opbuf - 1) | + MTK_OUT_BUF_MAX_THRESH(opbuf), + cryp->base + PE_OUT_DBUF_THRESH); + + writel(0, cryp->base + PE_OUT_TBUF_THRESH); + writel(0, cryp->base + PE_OUT_BUF_CTRL); +} + +static int mtk_dfe_dse_state_check(struct mtk_cryp *cryp) +{ + int ret = -EINVAL; + u32 val; + + /* Check for completion of all DMA transfers */ + val = readl(cryp->base + DFE_THR_STAT); + if (MTK_DFSE_RING_ID(val) == MTK_DFSE_IDLE) { + val = readl(cryp->base + DSE_THR_STAT); + if (MTK_DFSE_RING_ID(val) == MTK_DFSE_IDLE) + ret = 0; + } + + if (!ret) { + /* Take DFE/DSE thread out of reset */ + writel(0, cryp->base + DFE_THR_CTRL); + writel(0, cryp->base + DSE_THR_CTRL); + } else { + return -EBUSY; + } + + return 0; +} + +static int mtk_dfe_dse_reset(struct mtk_cryp *cryp) +{ + int err; + + /* Reset DSE/DFE and correct system priorities for all rings. */ + writel(MTK_DFSE_THR_CTRL_RESET, cryp->base + DFE_THR_CTRL); + writel(0, cryp->base + DFE_PRIO_0); + writel(0, cryp->base + DFE_PRIO_1); + writel(0, cryp->base + DFE_PRIO_2); + writel(0, cryp->base + DFE_PRIO_3); + + writel(MTK_DFSE_THR_CTRL_RESET, cryp->base + DSE_THR_CTRL); + writel(0, cryp->base + DSE_PRIO_0); + writel(0, cryp->base + DSE_PRIO_1); + writel(0, cryp->base + DSE_PRIO_2); + writel(0, cryp->base + DSE_PRIO_3); + + err = mtk_dfe_dse_state_check(cryp); + if (err) + return err; + + return 0; +} + +static void mtk_cmd_desc_ring_setup(struct mtk_cryp *cryp, + int i, struct mtk_sys_cap *cap) +{ + /* Full descriptor that fits FIFO minus one */ + u32 count = + ((1 << MTK_CMD_FIFO_SIZE(cap->hia_opt)) / MTK_DESC_SZ) - 1; + + /* Temporarily disable external triggering */ + writel(0, cryp->base + CDR_CFG(i)); + + /* Clear CDR count */ + writel(MTK_CNT_RST, cryp->base + CDR_PREP_COUNT(i)); + writel(MTK_CNT_RST, cryp->base + CDR_PROC_COUNT(i)); + + writel(0, cryp->base + CDR_PREP_PNTR(i)); + writel(0, cryp->base + CDR_PROC_PNTR(i)); + writel(0, cryp->base + CDR_DMA_CFG(i)); + + /* Configure CDR host address space */ + writel(0, cryp->base + CDR_BASE_ADDR_HI(i)); + writel(cryp->ring[i]->cmd_dma, cryp->base + CDR_BASE_ADDR_LO(i)); + + writel(MTK_DESC_RING_SZ, cryp->base + CDR_RING_SIZE(i)); + + /* Clear and disable all CDR interrupts */ + writel(MTK_CDR_STAT_CLR, cryp->base + CDR_STAT(i)); + + /* + * Set command descriptor offset and enable additional + * token present in descriptor. + */ + writel(MTK_DESC_SIZE(MTK_DESC_SZ) | + MTK_DESC_OFFSET(MTK_DESC_OFF) | + MTK_DESC_ATP_PRESENT, + cryp->base + CDR_DESC_SIZE(i)); + + writel(MTK_DESC_FETCH_SIZE(count * MTK_DESC_OFF) | + MTK_DESC_FETCH_THRESH(count * MTK_DESC_SZ), + cryp->base + CDR_CFG(i)); +} + +static void mtk_res_desc_ring_setup(struct mtk_cryp *cryp, + int i, struct mtk_sys_cap *cap) +{ + u32 rndup = 2; + u32 count = ((1 << MTK_RES_FIFO_SIZE(cap->hia_opt)) / rndup) - 1; + + /* Temporarily disable external triggering */ + writel(0, cryp->base + RDR_CFG(i)); + + /* Clear RDR count */ + writel(MTK_CNT_RST, cryp->base + RDR_PREP_COUNT(i)); + writel(MTK_CNT_RST, cryp->base + RDR_PROC_COUNT(i)); + + writel(0, cryp->base + RDR_PREP_PNTR(i)); + writel(0, cryp->base + RDR_PROC_PNTR(i)); + writel(0, cryp->base + RDR_DMA_CFG(i)); + + /* Configure RDR host address space */ + writel(0, cryp->base + RDR_BASE_ADDR_HI(i)); + writel(cryp->ring[i]->res_dma, cryp->base + RDR_BASE_ADDR_LO(i)); + + writel(MTK_DESC_RING_SZ, cryp->base + RDR_RING_SIZE(i)); + writel(MTK_RDR_STAT_CLR, cryp->base + RDR_STAT(i)); + + /* + * RDR manager generates update interrupts on a per-completed-packet, + * and the rd_proc_thresh_irq interrupt is fired when proc_pkt_count + * for the RDR exceeds the number of packets. + */ + writel(MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE, + cryp->base + RDR_THRESH(i)); + + /* + * Configure a threshold and time-out value for the processed + * result descriptors (or complete packets) that are written to + * the RDR. + */ + writel(MTK_DESC_SIZE(MTK_DESC_SZ) | MTK_DESC_OFFSET(MTK_DESC_OFF), + cryp->base + RDR_DESC_SIZE(i)); + + /* + * Configure HIA fetch size and fetch threshold that are used to + * fetch blocks of multiple descriptors. + */ + writel(MTK_DESC_FETCH_SIZE(count * MTK_DESC_OFF) | + MTK_DESC_FETCH_THRESH(count * rndup) | + MTK_DESC_OVL_IRQ_EN, + cryp->base + RDR_CFG(i)); +} + +static int mtk_packet_engine_setup(struct mtk_cryp *cryp) +{ + struct mtk_sys_cap cap; + int i, err; + u32 val; + + cap.hia_ver = readl(cryp->base + HIA_VERSION); + cap.hia_opt = readl(cryp->base + HIA_OPTIONS); + cap.hw_opt = readl(cryp->base + EIP97_OPTIONS); + + if (!(((u16)cap.hia_ver) == MTK_HIA_SIGNATURE)) + return -EINVAL; + + /* Configure endianness conversion method for master (DMA) interface */ + writel(0, cryp->base + EIP97_MST_CTRL); + + /* Set HIA burst size */ + val = readl(cryp->base + HIA_MST_CTRL); + val &= ~MTK_BURST_SIZE_MSK; + val |= MTK_BURST_SIZE(5); + writel(val, cryp->base + HIA_MST_CTRL); + + err = mtk_dfe_dse_reset(cryp); + if (err) { + dev_err(cryp->dev, "Failed to reset DFE and DSE.\n"); + return err; + } + + mtk_dfe_dse_buf_setup(cryp, &cap); + + /* Enable the 4 rings for the packet engines. */ + mtk_desc_ring_link(cryp, 0xf); + + for (i = 0; i < RING_MAX; i++) { + mtk_cmd_desc_ring_setup(cryp, i, &cap); + mtk_res_desc_ring_setup(cryp, i, &cap); + } + + writel(MTK_PE_TK_LOC_AVL | MTK_PE_PROC_HELD | MTK_PE_TK_TIMEOUT_EN, + cryp->base + PE_TOKEN_CTRL_STAT); + + /* Clear all pending interrupts */ + writel(MTK_AIC_G_CLR, cryp->base + AIC_G_ACK); + writel(MTK_PE_INPUT_DMA_ERR | MTK_PE_OUTPUT_DMA_ERR | + MTK_PE_PKT_PORC_ERR | MTK_PE_PKT_TIMEOUT | + MTK_PE_FATAL_ERR | MTK_PE_INPUT_DMA_ERR_EN | + MTK_PE_OUTPUT_DMA_ERR_EN | MTK_PE_PKT_PORC_ERR_EN | + MTK_PE_PKT_TIMEOUT_EN | MTK_PE_FATAL_ERR_EN | + MTK_PE_INT_OUT_EN, + cryp->base + PE_INTERRUPT_CTRL_STAT); + + return 0; +} + +static int mtk_aic_cap_check(struct mtk_cryp *cryp, int hw) +{ + u32 val; + + if (hw == RING_MAX) + val = readl(cryp->base + AIC_G_VERSION); + else + val = readl(cryp->base + AIC_VERSION(hw)); + + val &= MTK_AIC_VER_MSK; + if (val != MTK_AIC_VER11 && val != MTK_AIC_VER12) + return -ENXIO; + + if (hw == RING_MAX) + val = readl(cryp->base + AIC_G_OPTIONS); + else + val = readl(cryp->base + AIC_OPTIONS(hw)); + + val &= MTK_AIC_INT_MSK; + if (!val || val > 32) + return -ENXIO; + + return 0; +} + +static int mtk_aic_init(struct mtk_cryp *cryp, int hw) +{ + int err; + + err = mtk_aic_cap_check(cryp, hw); + if (err) + return err; + + /* Disable all interrupts and set initial configuration */ + if (hw == RING_MAX) { + writel(0, cryp->base + AIC_G_ENABLE_CTRL); + writel(0, cryp->base + AIC_G_POL_CTRL); + writel(0, cryp->base + AIC_G_TYPE_CTRL); + writel(0, cryp->base + AIC_G_ENABLE_SET); + } else { + writel(0, cryp->base + AIC_ENABLE_CTRL(hw)); + writel(0, cryp->base + AIC_POL_CTRL(hw)); + writel(0, cryp->base + AIC_TYPE_CTRL(hw)); + writel(0, cryp->base + AIC_ENABLE_SET(hw)); + } + + return 0; +} + +static int mtk_accelerator_init(struct mtk_cryp *cryp) +{ + int i, err; + + /* Initialize advanced interrupt controller(AIC) */ + for (i = 0; i < MTK_IRQ_NUM; i++) { + err = mtk_aic_init(cryp, i); + if (err) { + dev_err(cryp->dev, "Failed to initialize AIC.\n"); + return err; + } + } + + /* Initialize packet engine */ + err = mtk_packet_engine_setup(cryp); + if (err) { + dev_err(cryp->dev, "Failed to configure packet engine.\n"); + return err; + } + + return 0; +} + +static void mtk_desc_dma_free(struct mtk_cryp *cryp) +{ + int i; + + for (i = 0; i < RING_MAX; i++) { + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + cryp->ring[i]->res_base, + cryp->ring[i]->res_dma); + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + cryp->ring[i]->cmd_base, + cryp->ring[i]->cmd_dma); + kfree(cryp->ring[i]); + } +} + +static int mtk_desc_ring_alloc(struct mtk_cryp *cryp) +{ + struct mtk_ring **ring = cryp->ring; + int i, err = ENOMEM; + + for (i = 0; i < RING_MAX; i++) { + ring[i] = kzalloc(sizeof(**ring), GFP_KERNEL); + if (!ring[i]) + goto err_cleanup; + + ring[i]->cmd_base = dma_zalloc_coherent(cryp->dev, + MTK_DESC_RING_SZ, + &ring[i]->cmd_dma, + GFP_KERNEL); + if (!ring[i]->cmd_base) + goto err_cleanup; + + ring[i]->res_base = dma_zalloc_coherent(cryp->dev, + MTK_DESC_RING_SZ, + &ring[i]->res_dma, + GFP_KERNEL); + if (!ring[i]->res_base) + goto err_cleanup; + } + return 0; + +err_cleanup: + for (; i--; ) { + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + ring[i]->res_base, ring[i]->res_dma); + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + ring[i]->cmd_base, ring[i]->cmd_dma); + kfree(ring[i]); + } + return err; +} + +static int mtk_crypto_probe(struct platform_device *pdev) +{ + struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + struct mtk_cryp *cryp; + int i, err; + + cryp = devm_kzalloc(&pdev->dev, sizeof(*cryp), GFP_KERNEL); + if (!cryp) + return -ENOMEM; + + cryp->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cryp->base)) + return PTR_ERR(cryp->base); + + for (i = 0; i < MTK_IRQ_NUM; i++) { + cryp->irq[i] = platform_get_irq(pdev, i); + if (cryp->irq[i] < 0) { + dev_err(cryp->dev, "no IRQ:%d resource info\n", i); + return -ENXIO; + } + } + + cryp->clk_ethif = devm_clk_get(&pdev->dev, "ethif"); + cryp->clk_cryp = devm_clk_get(&pdev->dev, "cryp"); + if (IS_ERR(cryp->clk_ethif) || IS_ERR(cryp->clk_cryp)) + return -EPROBE_DEFER; + + cryp->dev = &pdev->dev; + pm_runtime_enable(cryp->dev); + pm_runtime_get_sync(cryp->dev); + + err = clk_prepare_enable(cryp->clk_ethif); + if (err) + goto err_clk_ethif; + + err = clk_prepare_enable(cryp->clk_cryp); + if (err) + goto err_clk_cryp; + + /* Allocate four command/result descriptor rings */ + err = mtk_desc_ring_alloc(cryp); + if (err) { + dev_err(cryp->dev, "Unable to allocate descriptor rings.\n"); + goto err_resource; + } + + /* Initialize hardware modules */ + err = mtk_accelerator_init(cryp); + if (err) { + dev_err(cryp->dev, "Failed to initialize cryptographic engine.\n"); + goto err_engine; + } + + err = mtk_cipher_alg_register(cryp); + if (err) { + dev_err(cryp->dev, "Unable to register cipher algorithm.\n"); + goto err_cipher; + } + + err = mtk_hash_alg_register(cryp); + if (err) { + dev_err(cryp->dev, "Unable to register hash algorithm.\n"); + goto err_hash; + } + + platform_set_drvdata(pdev, cryp); + return 0; + +err_hash: + mtk_cipher_alg_release(cryp); +err_cipher: + mtk_dfe_dse_reset(cryp); +err_engine: + mtk_desc_dma_free(cryp); +err_resource: + clk_disable_unprepare(cryp->clk_cryp); +err_clk_cryp: + clk_disable_unprepare(cryp->clk_ethif); +err_clk_ethif: + pm_runtime_put_sync(cryp->dev); + pm_runtime_disable(cryp->dev); + + return err; +} + +static int mtk_crypto_remove(struct platform_device *pdev) +{ + struct mtk_cryp *cryp = platform_get_drvdata(pdev); + + mtk_hash_alg_release(cryp); + mtk_cipher_alg_release(cryp); + mtk_desc_dma_free(cryp); + + clk_disable_unprepare(cryp->clk_cryp); + clk_disable_unprepare(cryp->clk_ethif); + + pm_runtime_put_sync(cryp->dev); + pm_runtime_disable(cryp->dev); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +const struct of_device_id of_crypto_id[] = { + { .compatible = "mediatek,eip97-crypto" }, + {}, +}; +MODULE_DEVICE_TABLE(of, of_crypto_id); + +static struct platform_driver mtk_crypto_driver = { + .probe = mtk_crypto_probe, + .remove = mtk_crypto_remove, + .driver = { + .name = "mtk-crypto", + .owner = THIS_MODULE, + .of_match_table = of_crypto_id, + }, +}; +module_platform_driver(mtk_crypto_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ryder Lee "); +MODULE_DESCRIPTION("Cryptographic accelerator driver for EIP97"); diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h new file mode 100644 index 000000000000..4d4309a007da --- /dev/null +++ b/drivers/crypto/mediatek/mtk-platform.h @@ -0,0 +1,238 @@ +/* + * Driver for EIP97 cryptographic accelerator. + * + * Copyright (c) 2016 Ryder Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __MTK_PLATFORM_H_ +#define __MTK_PLATFORM_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "mtk-regs.h" + +#define MTK_RDR_PROC_THRESH BIT(0) +#define MTK_RDR_PROC_MODE BIT(23) +#define MTK_CNT_RST BIT(31) +#define MTK_IRQ_RDR0 BIT(1) +#define MTK_IRQ_RDR1 BIT(3) +#define MTK_IRQ_RDR2 BIT(5) +#define MTK_IRQ_RDR3 BIT(7) + +#define SIZE_IN_WORDS(x) ((x) >> 2) + +/** + * Ring 0/1 are used by AES encrypt and decrypt. + * Ring 2/3 are used by SHA. + */ +enum { + RING0 = 0, + RING1, + RING2, + RING3, + RING_MAX, +}; + +#define MTK_REC_NUM (RING_MAX / 2) +#define MTK_IRQ_NUM 5 + +/** + * struct mtk_desc - DMA descriptor + * @hdr: the descriptor control header + * @buf: DMA address of input buffer segment + * @ct: DMA address of command token that control operation flow + * @ct_hdr: the command token control header + * @tag: the user-defined field + * @tfm: DMA address of transform state + * @bound: align descriptors offset boundary + * + * Structure passed to the crypto engine to describe where source + * data needs to be fetched and how it needs to be processed. + */ +struct mtk_desc { + __le32 hdr; + __le32 buf; + __le32 ct; + __le32 ct_hdr; + __le32 tag; + __le32 tfm; + __le32 bound[2]; +}; + +#define MTK_DESC_NUM 512 +#define MTK_DESC_OFF SIZE_IN_WORDS(sizeof(struct mtk_desc)) +#define MTK_DESC_SZ (MTK_DESC_OFF - 2) +#define MTK_DESC_RING_SZ ((sizeof(struct mtk_desc) * MTK_DESC_NUM)) +#define MTK_DESC_CNT(x) ((MTK_DESC_OFF * (x)) << 2) +#define MTK_DESC_LAST cpu_to_le32(BIT(22)) +#define MTK_DESC_FIRST cpu_to_le32(BIT(23)) +#define MTK_DESC_BUF_LEN(x) cpu_to_le32(x) +#define MTK_DESC_CT_LEN(x) cpu_to_le32((x) << 24) + +/** + * struct mtk_ring - Descriptor ring + * @cmd_base: pointer to command descriptor ring base + * @cmd_dma: DMA address of command descriptor ring + * @res_base: pointer to result descriptor ring base + * @res_dma: DMA address of result descriptor ring + * @pos: current position in the ring + * + * A descriptor ring is a circular buffer that is used to manage + * one or more descriptors. There are two type of descriptor rings; + * the command descriptor ring and result descriptor ring. + */ +struct mtk_ring { + struct mtk_desc *cmd_base; + dma_addr_t cmd_dma; + struct mtk_desc *res_base; + dma_addr_t res_dma; + u32 pos; +}; + +/** + * struct mtk_aes_dma - Structure that holds sg list info + * @sg: pointer to scatter-gather list + * @nents: number of entries in the sg list + * @remainder: remainder of sg list + * @sg_len: number of entries in the sg mapped list + */ +struct mtk_aes_dma { + struct scatterlist *sg; + int nents; + u32 remainder; + u32 sg_len; +}; + +/** + * struct mtk_aes_rec - AES operation record + * @queue: crypto request queue + * @req: pointer to ablkcipher request + * @task: the tasklet is use in AES interrupt + * @src: the structure that holds source sg list info + * @dst: the structure that holds destination sg list info + * @aligned_sg: the scatter list is use to alignment + * @real_dst: pointer to the destination sg list + * @total: request buffer length + * @buf: pointer to page buffer + * @info: pointer to AES transform state and command token + * @ct_hdr: AES command token control field + * @ct_size: size of AES command token + * @ct_dma: DMA address of AES command token + * @tfm_dma: DMA address of AES transform state + * @id: record identification + * @flags: it's describing AES operation state + * @lock: the ablkcipher queue lock + * + * Structure used to record AES execution state. + */ +struct mtk_aes_rec { + struct crypto_queue queue; + struct ablkcipher_request *req; + struct tasklet_struct task; + struct mtk_aes_dma src; + struct mtk_aes_dma dst; + + struct scatterlist aligned_sg; + struct scatterlist *real_dst; + + size_t total; + void *buf; + + void *info; + __le32 ct_hdr; + u32 ct_size; + dma_addr_t ct_dma; + dma_addr_t tfm_dma; + + u8 id; + unsigned long flags; + /* queue lock */ + spinlock_t lock; +}; + +/** + * struct mtk_sha_rec - SHA operation record + * @queue: crypto request queue + * @req: pointer to ahash request + * @task: the tasklet is use in SHA interrupt + * @info: pointer to SHA transform state and command token + * @ct_hdr: SHA command token control field + * @ct_size: size of SHA command token + * @ct_dma: DMA address of SHA command token + * @tfm_dma: DMA address of SHA transform state + * @id: record identification + * @flags: it's describing SHA operation state + * @lock: the ablkcipher queue lock + * + * Structure used to record SHA execution state. + */ +struct mtk_sha_rec { + struct crypto_queue queue; + struct ahash_request *req; + struct tasklet_struct task; + + void *info; + __le32 ct_hdr; + u32 ct_size; + dma_addr_t ct_dma; + dma_addr_t tfm_dma; + + u8 id; + unsigned long flags; + /* queue lock */ + spinlock_t lock; +}; + +/** + * struct mtk_cryp - Cryptographic device + * @base: pointer to mapped register I/O base + * @dev: pointer to device + * @clk_ethif: pointer to ethif clock + * @clk_cryp: pointer to crypto clock + * @irq: global system and rings IRQ + * @ring: pointer to execution state of AES + * @aes: pointer to execution state of SHA + * @sha: each execution record map to a ring + * @aes_list: device list of AES + * @sha_list: device list of SHA + * @tmp: pointer to temporary buffer for internal use + * @tmp_dma: DMA address of temporary buffer + * @rec: it's used to select SHA record for tfm + * + * Structure storing cryptographic device information. + */ +struct mtk_cryp { + void __iomem *base; + struct device *dev; + struct clk *clk_ethif; + struct clk *clk_cryp; + int irq[MTK_IRQ_NUM]; + + struct mtk_ring *ring[RING_MAX]; + struct mtk_aes_rec *aes[MTK_REC_NUM]; + struct mtk_sha_rec *sha[MTK_REC_NUM]; + + struct list_head aes_list; + struct list_head sha_list; + + void *tmp; + dma_addr_t tmp_dma; + bool rec; +}; + +int mtk_cipher_alg_register(struct mtk_cryp *cryp); +void mtk_cipher_alg_release(struct mtk_cryp *cryp); +int mtk_hash_alg_register(struct mtk_cryp *cryp); +void mtk_hash_alg_release(struct mtk_cryp *cryp); + +#endif /* __MTK_PLATFORM_H_ */ diff --git a/drivers/crypto/mediatek/mtk-regs.h b/drivers/crypto/mediatek/mtk-regs.h new file mode 100644 index 000000000000..94f4eb85be3f --- /dev/null +++ b/drivers/crypto/mediatek/mtk-regs.h @@ -0,0 +1,194 @@ +/* + * Support for MediaTek cryptographic accelerator. + * + * Copyright (c) 2016 MediaTek Inc. + * Author: Ryder Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + */ + +#ifndef __MTK_REGS_H__ +#define __MTK_REGS_H__ + +/* HIA, Command Descriptor Ring Manager */ +#define CDR_BASE_ADDR_LO(x) (0x0 + ((x) << 12)) +#define CDR_BASE_ADDR_HI(x) (0x4 + ((x) << 12)) +#define CDR_DATA_BASE_ADDR_LO(x) (0x8 + ((x) << 12)) +#define CDR_DATA_BASE_ADDR_HI(x) (0xC + ((x) << 12)) +#define CDR_ACD_BASE_ADDR_LO(x) (0x10 + ((x) << 12)) +#define CDR_ACD_BASE_ADDR_HI(x) (0x14 + ((x) << 12)) +#define CDR_RING_SIZE(x) (0x18 + ((x) << 12)) +#define CDR_DESC_SIZE(x) (0x1C + ((x) << 12)) +#define CDR_CFG(x) (0x20 + ((x) << 12)) +#define CDR_DMA_CFG(x) (0x24 + ((x) << 12)) +#define CDR_THRESH(x) (0x28 + ((x) << 12)) +#define CDR_PREP_COUNT(x) (0x2C + ((x) << 12)) +#define CDR_PROC_COUNT(x) (0x30 + ((x) << 12)) +#define CDR_PREP_PNTR(x) (0x34 + ((x) << 12)) +#define CDR_PROC_PNTR(x) (0x38 + ((x) << 12)) +#define CDR_STAT(x) (0x3C + ((x) << 12)) + +/* HIA, Result Descriptor Ring Manager */ +#define RDR_BASE_ADDR_LO(x) (0x800 + ((x) << 12)) +#define RDR_BASE_ADDR_HI(x) (0x804 + ((x) << 12)) +#define RDR_DATA_BASE_ADDR_LO(x) (0x808 + ((x) << 12)) +#define RDR_DATA_BASE_ADDR_HI(x) (0x80C + ((x) << 12)) +#define RDR_ACD_BASE_ADDR_LO(x) (0x810 + ((x) << 12)) +#define RDR_ACD_BASE_ADDR_HI(x) (0x814 + ((x) << 12)) +#define RDR_RING_SIZE(x) (0x818 + ((x) << 12)) +#define RDR_DESC_SIZE(x) (0x81C + ((x) << 12)) +#define RDR_CFG(x) (0x820 + ((x) << 12)) +#define RDR_DMA_CFG(x) (0x824 + ((x) << 12)) +#define RDR_THRESH(x) (0x828 + ((x) << 12)) +#define RDR_PREP_COUNT(x) (0x82C + ((x) << 12)) +#define RDR_PROC_COUNT(x) (0x830 + ((x) << 12)) +#define RDR_PREP_PNTR(x) (0x834 + ((x) << 12)) +#define RDR_PROC_PNTR(x) (0x838 + ((x) << 12)) +#define RDR_STAT(x) (0x83C + ((x) << 12)) + +/* HIA, Ring AIC */ +#define AIC_POL_CTRL(x) (0xE000 - ((x) << 12)) +#define AIC_TYPE_CTRL(x) (0xE004 - ((x) << 12)) +#define AIC_ENABLE_CTRL(x) (0xE008 - ((x) << 12)) +#define AIC_RAW_STAL(x) (0xE00C - ((x) << 12)) +#define AIC_ENABLE_SET(x) (0xE00C - ((x) << 12)) +#define AIC_ENABLED_STAT(x) (0xE010 - ((x) << 12)) +#define AIC_ACK(x) (0xE010 - ((x) << 12)) +#define AIC_ENABLE_CLR(x) (0xE014 - ((x) << 12)) +#define AIC_OPTIONS(x) (0xE018 - ((x) << 12)) +#define AIC_VERSION(x) (0xE01C - ((x) << 12)) + +/* HIA, Global AIC */ +#define AIC_G_POL_CTRL 0xF800 +#define AIC_G_TYPE_CTRL 0xF804 +#define AIC_G_ENABLE_CTRL 0xF808 +#define AIC_G_RAW_STAT 0xF80C +#define AIC_G_ENABLE_SET 0xF80C +#define AIC_G_ENABLED_STAT 0xF810 +#define AIC_G_ACK 0xF810 +#define AIC_G_ENABLE_CLR 0xF814 +#define AIC_G_OPTIONS 0xF818 +#define AIC_G_VERSION 0xF81C + +/* HIA, Data Fetch Engine */ +#define DFE_CFG 0xF000 +#define DFE_PRIO_0 0xF010 +#define DFE_PRIO_1 0xF014 +#define DFE_PRIO_2 0xF018 +#define DFE_PRIO_3 0xF01C + +/* HIA, Data Fetch Engine access monitoring for CDR */ +#define DFE_RING_REGION_LO(x) (0xF080 + ((x) << 3)) +#define DFE_RING_REGION_HI(x) (0xF084 + ((x) << 3)) + +/* HIA, Data Fetch Engine thread control and status for thread */ +#define DFE_THR_CTRL 0xF200 +#define DFE_THR_STAT 0xF204 +#define DFE_THR_DESC_CTRL 0xF208 +#define DFE_THR_DESC_DPTR_LO 0xF210 +#define DFE_THR_DESC_DPTR_HI 0xF214 +#define DFE_THR_DESC_ACDPTR_LO 0xF218 +#define DFE_THR_DESC_ACDPTR_HI 0xF21C + +/* HIA, Data Store Engine */ +#define DSE_CFG 0xF400 +#define DSE_PRIO_0 0xF410 +#define DSE_PRIO_1 0xF414 +#define DSE_PRIO_2 0xF418 +#define DSE_PRIO_3 0xF41C + +/* HIA, Data Store Engine access monitoring for RDR */ +#define DSE_RING_REGION_LO(x) (0xF480 + ((x) << 3)) +#define DSE_RING_REGION_HI(x) (0xF484 + ((x) << 3)) + +/* HIA, Data Store Engine thread control and status for thread */ +#define DSE_THR_CTRL 0xF600 +#define DSE_THR_STAT 0xF604 +#define DSE_THR_DESC_CTRL 0xF608 +#define DSE_THR_DESC_DPTR_LO 0xF610 +#define DSE_THR_DESC_DPTR_HI 0xF614 +#define DSE_THR_DESC_S_DPTR_LO 0xF618 +#define DSE_THR_DESC_S_DPTR_HI 0xF61C +#define DSE_THR_ERROR_STAT 0xF620 + +/* HIA Global */ +#define HIA_MST_CTRL 0xFFF4 +#define HIA_OPTIONS 0xFFF8 +#define HIA_VERSION 0xFFFC + +/* Processing Engine Input Side, Processing Engine */ +#define PE_IN_DBUF_THRESH 0x10000 +#define PE_IN_TBUF_THRESH 0x10100 + +/* Packet Engine Configuration / Status Registers */ +#define PE_TOKEN_CTRL_STAT 0x11000 +#define PE_FUNCTION_EN 0x11004 +#define PE_CONTEXT_CTRL 0x11008 +#define PE_INTERRUPT_CTRL_STAT 0x11010 +#define PE_CONTEXT_STAT 0x1100C +#define PE_OUT_TRANS_CTRL_STAT 0x11018 +#define PE_OUT_BUF_CTRL 0x1101C + +/* Packet Engine PRNG Registers */ +#define PE_PRNG_STAT 0x11040 +#define PE_PRNG_CTRL 0x11044 +#define PE_PRNG_SEED_L 0x11048 +#define PE_PRNG_SEED_H 0x1104C +#define PE_PRNG_KEY_0_L 0x11050 +#define PE_PRNG_KEY_0_H 0x11054 +#define PE_PRNG_KEY_1_L 0x11058 +#define PE_PRNG_KEY_1_H 0x1105C +#define PE_PRNG_RES_0 0x11060 +#define PE_PRNG_RES_1 0x11064 +#define PE_PRNG_RES_2 0x11068 +#define PE_PRNG_RES_3 0x1106C +#define PE_PRNG_LFSR_L 0x11070 +#define PE_PRNG_LFSR_H 0x11074 + +/* Packet Engine AIC */ +#define PE_EIP96_AIC_POL_CTRL 0x113C0 +#define PE_EIP96_AIC_TYPE_CTRL 0x113C4 +#define PE_EIP96_AIC_ENABLE_CTRL 0x113C8 +#define PE_EIP96_AIC_RAW_STAT 0x113CC +#define PE_EIP96_AIC_ENABLE_SET 0x113CC +#define PE_EIP96_AIC_ENABLED_STAT 0x113D0 +#define PE_EIP96_AIC_ACK 0x113D0 +#define PE_EIP96_AIC_ENABLE_CLR 0x113D4 +#define PE_EIP96_AIC_OPTIONS 0x113D8 +#define PE_EIP96_AIC_VERSION 0x113DC + +/* Packet Engine Options & Version Registers */ +#define PE_EIP96_OPTIONS 0x113F8 +#define PE_EIP96_VERSION 0x113FC + +/* Processing Engine Output Side */ +#define PE_OUT_DBUF_THRESH 0x11C00 +#define PE_OUT_TBUF_THRESH 0x11D00 + +/* Processing Engine Local AIC */ +#define PE_AIC_POL_CTRL 0x11F00 +#define PE_AIC_TYPE_CTRL 0x11F04 +#define PE_AIC_ENABLE_CTRL 0x11F08 +#define PE_AIC_RAW_STAT 0x11F0C +#define PE_AIC_ENABLE_SET 0x11F0C +#define PE_AIC_ENABLED_STAT 0x11F10 +#define PE_AIC_ENABLE_CLR 0x11F14 +#define PE_AIC_OPTIONS 0x11F18 +#define PE_AIC_VERSION 0x11F1C + +/* Processing Engine General Configuration and Version */ +#define PE_IN_FLIGHT 0x11FF0 +#define PE_OPTIONS 0x11FF8 +#define PE_VERSION 0x11FFC + +/* EIP-97 - Global */ +#define EIP97_CLOCK_STATE 0x1FFE4 +#define EIP97_FORCE_CLOCK_ON 0x1FFE8 +#define EIP97_FORCE_CLOCK_OFF 0x1FFEC +#define EIP97_MST_CTRL 0x1FFF4 +#define EIP97_OPTIONS 0x1FFF8 +#define EIP97_VERSION 0x1FFFC +#endif /* __MTK_REGS_H__ */ diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c new file mode 100644 index 000000000000..89513632c8ed --- /dev/null +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -0,0 +1,1437 @@ +/* + * Cryptographic API. + * + * Driver for EIP97 SHA1/SHA2(HMAC) acceleration. + * + * Copyright (c) 2016 Ryder Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Some ideas are from atmel-sha.c and omap-sham.c drivers. + */ + +#include +#include "mtk-platform.h" + +#define SHA_ALIGN_MSK (sizeof(u32) - 1) +#define SHA_QUEUE_SIZE 512 +#define SHA_TMP_BUF_SIZE 512 +#define SHA_BUF_SIZE ((u32)PAGE_SIZE) + +#define SHA_OP_UPDATE 1 +#define SHA_OP_FINAL 2 + +#define SHA_DATA_LEN_MSK cpu_to_le32(GENMASK(16, 0)) + +/* SHA command token */ +#define SHA_CT_SIZE 5 +#define SHA_CT_CTRL_HDR cpu_to_le32(0x02220000) +#define SHA_COMMAND0 cpu_to_le32(0x03020000) +#define SHA_COMMAND1 cpu_to_le32(0x21060000) +#define SHA_COMMAND2 cpu_to_le32(0xe0e63802) + +/* SHA transform information */ +#define SHA_TFM_HASH cpu_to_le32(0x2 << 0) +#define SHA_TFM_INNER_DIG cpu_to_le32(0x1 << 21) +#define SHA_TFM_SIZE(x) cpu_to_le32((x) << 8) +#define SHA_TFM_START cpu_to_le32(0x1 << 4) +#define SHA_TFM_CONTINUE cpu_to_le32(0x1 << 5) +#define SHA_TFM_HASH_STORE cpu_to_le32(0x1 << 19) +#define SHA_TFM_SHA1 cpu_to_le32(0x2 << 23) +#define SHA_TFM_SHA256 cpu_to_le32(0x3 << 23) +#define SHA_TFM_SHA224 cpu_to_le32(0x4 << 23) +#define SHA_TFM_SHA512 cpu_to_le32(0x5 << 23) +#define SHA_TFM_SHA384 cpu_to_le32(0x6 << 23) +#define SHA_TFM_DIGEST(x) cpu_to_le32(((x) & GENMASK(3, 0)) << 24) + +/* SHA flags */ +#define SHA_FLAGS_BUSY BIT(0) +#define SHA_FLAGS_FINAL BIT(1) +#define SHA_FLAGS_FINUP BIT(2) +#define SHA_FLAGS_SG BIT(3) +#define SHA_FLAGS_ALGO_MSK GENMASK(8, 4) +#define SHA_FLAGS_SHA1 BIT(4) +#define SHA_FLAGS_SHA224 BIT(5) +#define SHA_FLAGS_SHA256 BIT(6) +#define SHA_FLAGS_SHA384 BIT(7) +#define SHA_FLAGS_SHA512 BIT(8) +#define SHA_FLAGS_HMAC BIT(9) +#define SHA_FLAGS_PAD BIT(10) + +/** + * mtk_sha_ct is a set of hardware instructions(command token) + * that are used to control engine's processing flow of SHA, + * and it contains the first two words of transform state. + */ +struct mtk_sha_ct { + __le32 tfm_ctrl0; + __le32 tfm_ctrl1; + __le32 ct_ctrl0; + __le32 ct_ctrl1; + __le32 ct_ctrl2; +}; + +/** + * mtk_sha_tfm is used to define SHA transform state + * and store result digest that produced by engine. + */ +struct mtk_sha_tfm { + __le32 tfm_ctrl0; + __le32 tfm_ctrl1; + __le32 digest[SIZE_IN_WORDS(SHA512_DIGEST_SIZE)]; +}; + +/** + * mtk_sha_info consists of command token and transform state + * of SHA, its role is similar to mtk_aes_info. + */ +struct mtk_sha_info { + struct mtk_sha_ct ct; + struct mtk_sha_tfm tfm; +}; + +struct mtk_sha_reqctx { + struct mtk_sha_info info; + unsigned long flags; + unsigned long op; + + u64 digcnt; + bool start; + size_t bufcnt; + dma_addr_t dma_addr; + + /* Walk state */ + struct scatterlist *sg; + u32 offset; /* Offset in current sg */ + u32 total; /* Total request */ + size_t ds; + size_t bs; + + u8 *buffer; +}; + +struct mtk_sha_hmac_ctx { + struct crypto_shash *shash; + u8 ipad[SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); + u8 opad[SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); +}; + +struct mtk_sha_ctx { + struct mtk_cryp *cryp; + unsigned long flags; + u8 id; + u8 buf[SHA_BUF_SIZE] __aligned(sizeof(u32)); + + struct mtk_sha_hmac_ctx base[0]; +}; + +struct mtk_sha_drv { + struct list_head dev_list; + /* Device list lock */ + spinlock_t lock; +}; + +static struct mtk_sha_drv mtk_sha = { + .dev_list = LIST_HEAD_INIT(mtk_sha.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(mtk_sha.lock), +}; + +static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id, + struct ahash_request *req); + +static inline u32 mtk_sha_read(struct mtk_cryp *cryp, u32 offset) +{ + return readl_relaxed(cryp->base + offset); +} + +static inline void mtk_sha_write(struct mtk_cryp *cryp, + u32 offset, u32 value) +{ + writel_relaxed(value, cryp->base + offset); +} + +static struct mtk_cryp *mtk_sha_find_dev(struct mtk_sha_ctx *tctx) +{ + struct mtk_cryp *cryp = NULL; + struct mtk_cryp *tmp; + + spin_lock_bh(&mtk_sha.lock); + if (!tctx->cryp) { + list_for_each_entry(tmp, &mtk_sha.dev_list, sha_list) { + cryp = tmp; + break; + } + tctx->cryp = cryp; + } else { + cryp = tctx->cryp; + } + + /* + * Assign record id to tfm in round-robin fashion, and this + * will help tfm to bind to corresponding descriptor rings. + */ + tctx->id = cryp->rec; + cryp->rec = !cryp->rec; + + spin_unlock_bh(&mtk_sha.lock); + + return cryp; +} + +static int mtk_sha_append_sg(struct mtk_sha_reqctx *ctx) +{ + size_t count; + + while ((ctx->bufcnt < SHA_BUF_SIZE) && ctx->total) { + count = min(ctx->sg->length - ctx->offset, ctx->total); + count = min(count, SHA_BUF_SIZE - ctx->bufcnt); + + if (count <= 0) { + /* + * Check if count <= 0 because the buffer is full or + * because the sg length is 0. In the latest case, + * check if there is another sg in the list, a 0 length + * sg doesn't necessarily mean the end of the sg list. + */ + if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) { + ctx->sg = sg_next(ctx->sg); + continue; + } else { + break; + } + } + + scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg, + ctx->offset, count, 0); + + ctx->bufcnt += count; + ctx->offset += count; + ctx->total -= count; + + if (ctx->offset == ctx->sg->length) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + else + ctx->total = 0; + } + } + + return 0; +} + +/* + * The purpose of this padding is to ensure that the padded message is a + * multiple of 512 bits (SHA1/SHA224/SHA256) or 1024 bits (SHA384/SHA512). + * The bit "1" is appended at the end of the message followed by + * "padlen-1" zero bits. Then a 64 bits block (SHA1/SHA224/SHA256) or + * 128 bits block (SHA384/SHA512) equals to the message length in bits + * is appended. + * + * For SHA1/SHA224/SHA256, padlen is calculated as followed: + * - if message length < 56 bytes then padlen = 56 - message length + * - else padlen = 64 + 56 - message length + * + * For SHA384/SHA512, padlen is calculated as followed: + * - if message length < 112 bytes then padlen = 112 - message length + * - else padlen = 128 + 112 - message length + */ +static void mtk_sha_fill_padding(struct mtk_sha_reqctx *ctx, u32 len) +{ + u32 index, padlen; + u64 bits[2]; + u64 size = ctx->digcnt; + + size += ctx->bufcnt; + size += len; + + bits[1] = cpu_to_be64(size << 3); + bits[0] = cpu_to_be64(size >> 61); + + if (ctx->flags & (SHA_FLAGS_SHA384 | SHA_FLAGS_SHA512)) { + index = ctx->bufcnt & 0x7f; + padlen = (index < 112) ? (112 - index) : ((128 + 112) - index); + *(ctx->buffer + ctx->bufcnt) = 0x80; + memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen - 1); + memcpy(ctx->buffer + ctx->bufcnt + padlen, bits, 16); + ctx->bufcnt += padlen + 16; + ctx->flags |= SHA_FLAGS_PAD; + } else { + index = ctx->bufcnt & 0x3f; + padlen = (index < 56) ? (56 - index) : ((64 + 56) - index); + *(ctx->buffer + ctx->bufcnt) = 0x80; + memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen - 1); + memcpy(ctx->buffer + ctx->bufcnt + padlen, &bits[1], 8); + ctx->bufcnt += padlen + 8; + ctx->flags |= SHA_FLAGS_PAD; + } +} + +/* Initialize basic transform information of SHA */ +static void mtk_sha_info_init(struct mtk_sha_rec *sha, + struct mtk_sha_reqctx *ctx) +{ + struct mtk_sha_info *info = sha->info; + struct mtk_sha_ct *ct = &info->ct; + struct mtk_sha_tfm *tfm = &info->tfm; + + sha->ct_hdr = SHA_CT_CTRL_HDR; + sha->ct_size = SHA_CT_SIZE; + + tfm->tfm_ctrl0 = SHA_TFM_HASH | SHA_TFM_INNER_DIG | + SHA_TFM_SIZE(SIZE_IN_WORDS(ctx->ds)); + + switch (ctx->flags & SHA_FLAGS_ALGO_MSK) { + case SHA_FLAGS_SHA1: + tfm->tfm_ctrl0 |= SHA_TFM_SHA1; + break; + case SHA_FLAGS_SHA224: + tfm->tfm_ctrl0 |= SHA_TFM_SHA224; + break; + case SHA_FLAGS_SHA256: + tfm->tfm_ctrl0 |= SHA_TFM_SHA256; + break; + case SHA_FLAGS_SHA384: + tfm->tfm_ctrl0 |= SHA_TFM_SHA384; + break; + case SHA_FLAGS_SHA512: + tfm->tfm_ctrl0 |= SHA_TFM_SHA512; + break; + + default: + /* Should not happen... */ + return; + } + + tfm->tfm_ctrl1 = SHA_TFM_HASH_STORE; + ct->tfm_ctrl0 = tfm->tfm_ctrl0 | SHA_TFM_CONTINUE | SHA_TFM_START; + ct->tfm_ctrl1 = tfm->tfm_ctrl1; + + ct->ct_ctrl0 = SHA_COMMAND0; + ct->ct_ctrl1 = SHA_COMMAND1; + ct->ct_ctrl2 = SHA_COMMAND2 | SHA_TFM_DIGEST(SIZE_IN_WORDS(ctx->ds)); +} + +/* + * Update input data length field of transform information and + * map it to DMA region. + */ +static int mtk_sha_info_map(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + size_t len) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + struct mtk_sha_info *info = sha->info; + struct mtk_sha_ct *ct = &info->ct; + + if (ctx->start) + ctx->start = false; + else + ct->tfm_ctrl0 &= ~SHA_TFM_START; + + sha->ct_hdr &= ~SHA_DATA_LEN_MSK; + sha->ct_hdr |= cpu_to_le32(len); + ct->ct_ctrl0 &= ~SHA_DATA_LEN_MSK; + ct->ct_ctrl0 |= cpu_to_le32(len); + + ctx->digcnt += len; + + sha->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(cryp->dev, sha->ct_dma))) { + dev_err(cryp->dev, "dma %d bytes error\n", sizeof(*info)); + return -EINVAL; + } + sha->tfm_dma = sha->ct_dma + sizeof(*ct); + + return 0; +} + +/* + * Because of hardware limitation, we must pre-calculate the inner + * and outer digest that need to be processed firstly by engine, then + * apply the result digest to the input message. These complex hashing + * procedures limits HMAC performance, so we use fallback SW encoding. + */ +static int mtk_sha_finish_hmac(struct ahash_request *req) +{ + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + struct mtk_sha_hmac_ctx *bctx = tctx->base; + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + SHASH_DESC_ON_STACK(shash, bctx->shash); + + shash->tfm = bctx->shash; + shash->flags = 0; /* not CRYPTO_TFM_REQ_MAY_SLEEP */ + + return crypto_shash_init(shash) ?: + crypto_shash_update(shash, bctx->opad, ctx->bs) ?: + crypto_shash_finup(shash, req->result, ctx->ds, req->result); +} + +/* Initialize request context */ +static int mtk_sha_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + ctx->flags = 0; + ctx->ds = crypto_ahash_digestsize(tfm); + + switch (ctx->ds) { + case SHA1_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA1; + ctx->bs = SHA1_BLOCK_SIZE; + break; + case SHA224_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA224; + ctx->bs = SHA224_BLOCK_SIZE; + break; + case SHA256_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA256; + ctx->bs = SHA256_BLOCK_SIZE; + break; + case SHA384_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA384; + ctx->bs = SHA384_BLOCK_SIZE; + break; + case SHA512_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA512; + ctx->bs = SHA512_BLOCK_SIZE; + break; + default: + return -EINVAL; + } + + ctx->bufcnt = 0; + ctx->digcnt = 0; + ctx->buffer = tctx->buf; + ctx->start = true; + + if (tctx->flags & SHA_FLAGS_HMAC) { + struct mtk_sha_hmac_ctx *bctx = tctx->base; + + memcpy(ctx->buffer, bctx->ipad, ctx->bs); + ctx->bufcnt = ctx->bs; + ctx->flags |= SHA_FLAGS_HMAC; + } + + return 0; +} + +static int mtk_sha_xmit(struct mtk_cryp *cryp, struct mtk_sha_rec *sha, + dma_addr_t addr, size_t len) +{ + struct mtk_ring *ring = cryp->ring[sha->id]; + struct mtk_desc *cmd = ring->cmd_base + ring->pos; + struct mtk_desc *res = ring->res_base + ring->pos; + int err; + + err = mtk_sha_info_map(cryp, sha, len); + if (err) + return err; + + /* Fill in the command/result descriptors */ + res->hdr = MTK_DESC_FIRST | + MTK_DESC_LAST | + MTK_DESC_BUF_LEN(len); + + res->buf = cpu_to_le32(cryp->tmp_dma); + + cmd->hdr = MTK_DESC_FIRST | + MTK_DESC_LAST | + MTK_DESC_BUF_LEN(len) | + MTK_DESC_CT_LEN(sha->ct_size); + + cmd->buf = cpu_to_le32(addr); + cmd->ct = cpu_to_le32(sha->ct_dma); + cmd->ct_hdr = sha->ct_hdr; + cmd->tfm = cpu_to_le32(sha->tfm_dma); + + if (++ring->pos == MTK_DESC_NUM) + ring->pos = 0; + + /* + * Make sure that all changes to the DMA ring are done before we + * start engine. + */ + wmb(); + /* Start DMA transfer */ + mtk_sha_write(cryp, RDR_PREP_COUNT(sha->id), MTK_DESC_CNT(1)); + mtk_sha_write(cryp, CDR_PREP_COUNT(sha->id), MTK_DESC_CNT(1)); + + return -EINPROGRESS; +} + +static int mtk_sha_xmit2(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + struct mtk_sha_reqctx *ctx, + size_t len1, size_t len2) +{ + struct mtk_ring *ring = cryp->ring[sha->id]; + struct mtk_desc *cmd = ring->cmd_base + ring->pos; + struct mtk_desc *res = ring->res_base + ring->pos; + int err; + + err = mtk_sha_info_map(cryp, sha, len1 + len2); + if (err) + return err; + + /* Fill in the command/result descriptors */ + res->hdr = MTK_DESC_BUF_LEN(len1) | MTK_DESC_FIRST; + res->buf = cpu_to_le32(cryp->tmp_dma); + + cmd->hdr = MTK_DESC_BUF_LEN(len1) | + MTK_DESC_FIRST | + MTK_DESC_CT_LEN(sha->ct_size); + cmd->buf = cpu_to_le32(sg_dma_address(ctx->sg)); + cmd->ct = cpu_to_le32(sha->ct_dma); + cmd->ct_hdr = sha->ct_hdr; + cmd->tfm = cpu_to_le32(sha->tfm_dma); + + if (++ring->pos == MTK_DESC_NUM) + ring->pos = 0; + + cmd = ring->cmd_base + ring->pos; + res = ring->res_base + ring->pos; + + res->hdr = MTK_DESC_BUF_LEN(len2) | MTK_DESC_LAST; + res->buf = cpu_to_le32(cryp->tmp_dma); + + cmd->hdr = MTK_DESC_BUF_LEN(len2) | MTK_DESC_LAST; + cmd->buf = cpu_to_le32(ctx->dma_addr); + + if (++ring->pos == MTK_DESC_NUM) + ring->pos = 0; + + /* + * Make sure that all changes to the DMA ring are done before we + * start engine. + */ + wmb(); + /* Start DMA transfer */ + mtk_sha_write(cryp, RDR_PREP_COUNT(sha->id), MTK_DESC_CNT(2)); + mtk_sha_write(cryp, CDR_PREP_COUNT(sha->id), MTK_DESC_CNT(2)); + + return -EINPROGRESS; +} + +static int mtk_sha_dma_map(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + struct mtk_sha_reqctx *ctx, + size_t count) +{ + ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer, + SHA_BUF_SIZE, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) { + dev_err(cryp->dev, "dma map error\n"); + return -EINVAL; + } + + ctx->flags &= ~SHA_FLAGS_SG; + + return mtk_sha_xmit(cryp, sha, ctx->dma_addr, count); +} + +static int mtk_sha_update_slow(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + size_t count; + u32 final; + + mtk_sha_append_sg(ctx); + + final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; + + dev_dbg(cryp->dev, "slow: bufcnt: %u\n", ctx->bufcnt); + + if (final) { + sha->flags |= SHA_FLAGS_FINAL; + mtk_sha_fill_padding(ctx, 0); + } + + if (final || (ctx->bufcnt == SHA_BUF_SIZE && ctx->total)) { + count = ctx->bufcnt; + ctx->bufcnt = 0; + + return mtk_sha_dma_map(cryp, sha, ctx, count); + } + return 0; +} + +static int mtk_sha_update_start(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + u32 len, final, tail; + struct scatterlist *sg; + + if (!ctx->total) + return 0; + + if (ctx->bufcnt || ctx->offset) + return mtk_sha_update_slow(cryp, sha); + + sg = ctx->sg; + + if (!IS_ALIGNED(sg->offset, sizeof(u32))) + return mtk_sha_update_slow(cryp, sha); + + if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, ctx->bs)) + /* size is not ctx->bs aligned */ + return mtk_sha_update_slow(cryp, sha); + + len = min(ctx->total, sg->length); + + if (sg_is_last(sg)) { + if (!(ctx->flags & SHA_FLAGS_FINUP)) { + /* not last sg must be ctx->bs aligned */ + tail = len & (ctx->bs - 1); + len -= tail; + } + } + + ctx->total -= len; + ctx->offset = len; /* offset where to start slow */ + + final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; + + /* Add padding */ + if (final) { + size_t count; + + tail = len & (ctx->bs - 1); + len -= tail; + ctx->total += tail; + ctx->offset = len; /* offset where to start slow */ + + sg = ctx->sg; + mtk_sha_append_sg(ctx); + mtk_sha_fill_padding(ctx, len); + + ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer, + SHA_BUF_SIZE, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) { + dev_err(cryp->dev, "dma map bytes error\n"); + return -EINVAL; + } + + sha->flags |= SHA_FLAGS_FINAL; + count = ctx->bufcnt; + ctx->bufcnt = 0; + + if (len == 0) { + ctx->flags &= ~SHA_FLAGS_SG; + return mtk_sha_xmit(cryp, sha, ctx->dma_addr, count); + + } else { + ctx->sg = sg; + if (!dma_map_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE)) { + dev_err(cryp->dev, "dma_map_sg error\n"); + return -EINVAL; + } + + ctx->flags |= SHA_FLAGS_SG; + return mtk_sha_xmit2(cryp, sha, ctx, len, count); + } + } + + if (!dma_map_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE)) { + dev_err(cryp->dev, "dma_map_sg error\n"); + return -EINVAL; + } + + ctx->flags |= SHA_FLAGS_SG; + + return mtk_sha_xmit(cryp, sha, sg_dma_address(ctx->sg), len); +} + +static int mtk_sha_final_req(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + struct ahash_request *req = sha->req; + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + size_t count; + + mtk_sha_fill_padding(ctx, 0); + + sha->flags |= SHA_FLAGS_FINAL; + count = ctx->bufcnt; + ctx->bufcnt = 0; + + return mtk_sha_dma_map(cryp, sha, ctx, count); +} + +/* Copy ready hash (+ finalize hmac) */ +static int mtk_sha_finish(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + u32 *digest = ctx->info.tfm.digest; + u32 *result = (u32 *)req->result; + int i; + + /* Get the hash from the digest buffer */ + for (i = 0; i < SIZE_IN_WORDS(ctx->ds); i++) + result[i] = le32_to_cpu(digest[i]); + + if (ctx->flags & SHA_FLAGS_HMAC) + return mtk_sha_finish_hmac(req); + + return 0; +} + +static void mtk_sha_finish_req(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, int err) +{ + if (likely(!err && (SHA_FLAGS_FINAL & sha->flags))) + err = mtk_sha_finish(sha->req); + + sha->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL); + + sha->req->base.complete(&sha->req->base, err); + + /* Handle new request */ + mtk_sha_handle_queue(cryp, sha->id - RING2, NULL); +} + +static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id, + struct ahash_request *req) +{ + struct mtk_sha_rec *sha = cryp->sha[id]; + struct crypto_async_request *async_req, *backlog; + struct mtk_sha_reqctx *ctx; + unsigned long flags; + int err = 0, ret = 0; + + spin_lock_irqsave(&sha->lock, flags); + if (req) + ret = ahash_enqueue_request(&sha->queue, req); + + if (SHA_FLAGS_BUSY & sha->flags) { + spin_unlock_irqrestore(&sha->lock, flags); + return ret; + } + + backlog = crypto_get_backlog(&sha->queue); + async_req = crypto_dequeue_request(&sha->queue); + if (async_req) + sha->flags |= SHA_FLAGS_BUSY; + spin_unlock_irqrestore(&sha->lock, flags); + + if (!async_req) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ahash_request_cast(async_req); + ctx = ahash_request_ctx(req); + + sha->req = req; + sha->info = &ctx->info; + + mtk_sha_info_init(sha, ctx); + + if (ctx->op == SHA_OP_UPDATE) { + err = mtk_sha_update_start(cryp, sha); + if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) + /* No final() after finup() */ + err = mtk_sha_final_req(cryp, sha); + } else if (ctx->op == SHA_OP_FINAL) { + err = mtk_sha_final_req(cryp, sha); + } + + if (unlikely(err != -EINPROGRESS)) + /* Task will not finish it, so do it here */ + mtk_sha_finish_req(cryp, sha, err); + + return ret; +} + +static int mtk_sha_enqueue(struct ahash_request *req, u32 op) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + + ctx->op = op; + + return mtk_sha_handle_queue(tctx->cryp, tctx->id, req); +} + +static void mtk_sha_unmap(struct mtk_cryp *cryp, struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + + dma_unmap_single(cryp->dev, sha->ct_dma, + sizeof(struct mtk_sha_info), DMA_BIDIRECTIONAL); + + if (ctx->flags & SHA_FLAGS_SG) { + dma_unmap_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE); + if (ctx->sg->length == ctx->offset) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + } + if (ctx->flags & SHA_FLAGS_PAD) { + dma_unmap_single(cryp->dev, ctx->dma_addr, + SHA_BUF_SIZE, DMA_TO_DEVICE); + } + } else + dma_unmap_single(cryp->dev, ctx->dma_addr, + SHA_BUF_SIZE, DMA_TO_DEVICE); +} + +static void mtk_sha_complete(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + int err = 0; + + err = mtk_sha_update_start(cryp, sha); + if (err != -EINPROGRESS) + mtk_sha_finish_req(cryp, sha, err); +} + +static int mtk_sha_update(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + ctx->total = req->nbytes; + ctx->sg = req->src; + ctx->offset = 0; + + if ((ctx->bufcnt + ctx->total < SHA_BUF_SIZE) && + !(ctx->flags & SHA_FLAGS_FINUP)) + return mtk_sha_append_sg(ctx); + + return mtk_sha_enqueue(req, SHA_OP_UPDATE); +} + +static int mtk_sha_final(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + ctx->flags |= SHA_FLAGS_FINUP; + + if (ctx->flags & SHA_FLAGS_PAD) + return mtk_sha_finish(req); + + return mtk_sha_enqueue(req, SHA_OP_FINAL); +} + +static int mtk_sha_finup(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + int err1, err2; + + ctx->flags |= SHA_FLAGS_FINUP; + + err1 = mtk_sha_update(req); + if (err1 == -EINPROGRESS || err1 == -EBUSY) + return err1; + /* + * final() has to be always called to cleanup resources + * even if update() failed + */ + err2 = mtk_sha_final(req); + + return err1 ?: err2; +} + +static int mtk_sha_digest(struct ahash_request *req) +{ + return mtk_sha_init(req) ?: mtk_sha_finup(req); +} + +static int mtk_sha_setkey(struct crypto_ahash *tfm, + const unsigned char *key, u32 keylen) +{ + struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct mtk_sha_hmac_ctx *bctx = tctx->base; + size_t bs = crypto_shash_blocksize(bctx->shash); + size_t ds = crypto_shash_digestsize(bctx->shash); + int err, i; + + SHASH_DESC_ON_STACK(shash, bctx->shash); + + shash->tfm = bctx->shash; + shash->flags = crypto_shash_get_flags(bctx->shash) & + CRYPTO_TFM_REQ_MAY_SLEEP; + + if (keylen > bs) { + err = crypto_shash_digest(shash, key, keylen, bctx->ipad); + if (err) + return err; + keylen = ds; + } else { + memcpy(bctx->ipad, key, keylen); + } + + memset(bctx->ipad + keylen, 0, bs - keylen); + memcpy(bctx->opad, bctx->ipad, bs); + + for (i = 0; i < bs; i++) { + bctx->ipad[i] ^= 0x36; + bctx->opad[i] ^= 0x5c; + } + + return err; +} + +static int mtk_sha_export(struct ahash_request *req, void *out) +{ + const struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + memcpy(out, ctx, sizeof(*ctx)); + return 0; +} + +static int mtk_sha_import(struct ahash_request *req, const void *in) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + memcpy(ctx, in, sizeof(*ctx)); + return 0; +} + +static int mtk_sha_cra_init_alg(struct crypto_tfm *tfm, + const char *alg_base) +{ + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(tfm); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_sha_find_dev(tctx); + if (!cryp) + return -ENODEV; + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct mtk_sha_reqctx)); + + if (alg_base) { + struct mtk_sha_hmac_ctx *bctx = tctx->base; + + tctx->flags |= SHA_FLAGS_HMAC; + bctx->shash = crypto_alloc_shash(alg_base, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(bctx->shash)) { + pr_err("base driver %s could not be loaded.\n", + alg_base); + + return PTR_ERR(bctx->shash); + } + } + return 0; +} + +static int mtk_sha_cra_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, NULL); +} + +static int mtk_sha_cra_sha1_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha1"); +} + +static int mtk_sha_cra_sha224_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha224"); +} + +static int mtk_sha_cra_sha256_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha256"); +} + +static int mtk_sha_cra_sha384_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha384"); +} + +static int mtk_sha_cra_sha512_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha512"); +} + +static void mtk_sha_cra_exit(struct crypto_tfm *tfm) +{ + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(tfm); + + if (tctx->flags & SHA_FLAGS_HMAC) { + struct mtk_sha_hmac_ctx *bctx = tctx->base; + + crypto_free_shash(bctx->shash); + } +} + +static struct ahash_alg algs_sha1_sha224_sha256[] = { +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "mtk-sha1", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "mtk-sha224", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "mtk-sha256", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "mtk-hmac-sha1", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha1_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "mtk-hmac-sha224", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha224_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "mtk-hmac-sha256", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha256_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +}; + +static struct ahash_alg algs_sha384_sha512[] = { +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "mtk-sha384", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "mtk-sha512", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "mtk-hmac-sha384", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha384_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "mtk-hmac-sha512", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha512_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +}; + +static void mtk_sha_task0(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_sha_rec *sha = cryp->sha[0]; + + mtk_sha_unmap(cryp, sha); + mtk_sha_complete(cryp, sha); +} + +static void mtk_sha_task1(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_sha_rec *sha = cryp->sha[1]; + + mtk_sha_unmap(cryp, sha); + mtk_sha_complete(cryp, sha); +} + +static irqreturn_t mtk_sha_ring2_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_sha_rec *sha = cryp->sha[0]; + u32 val = mtk_sha_read(cryp, RDR_STAT(RING2)); + + mtk_sha_write(cryp, RDR_STAT(RING2), val); + + if (likely((SHA_FLAGS_BUSY & sha->flags))) { + mtk_sha_write(cryp, RDR_PROC_COUNT(RING2), MTK_CNT_RST); + mtk_sha_write(cryp, RDR_THRESH(RING2), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&sha->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +static irqreturn_t mtk_sha_ring3_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_sha_rec *sha = cryp->sha[1]; + u32 val = mtk_sha_read(cryp, RDR_STAT(RING3)); + + mtk_sha_write(cryp, RDR_STAT(RING3), val); + + if (likely((SHA_FLAGS_BUSY & sha->flags))) { + mtk_sha_write(cryp, RDR_PROC_COUNT(RING3), MTK_CNT_RST); + mtk_sha_write(cryp, RDR_THRESH(RING3), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&sha->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +/* + * The purpose of two SHA records is used to get extra performance. + * It is similar to mtk_aes_record_init(). + */ +static int mtk_sha_record_init(struct mtk_cryp *cryp) +{ + struct mtk_sha_rec **sha = cryp->sha; + int i, err = -ENOMEM; + + for (i = 0; i < MTK_REC_NUM; i++) { + sha[i] = kzalloc(sizeof(**sha), GFP_KERNEL); + if (!sha[i]) + goto err_cleanup; + + sha[i]->id = i + RING2; + + spin_lock_init(&sha[i]->lock); + crypto_init_queue(&sha[i]->queue, SHA_QUEUE_SIZE); + } + + tasklet_init(&sha[0]->task, mtk_sha_task0, (unsigned long)cryp); + tasklet_init(&sha[1]->task, mtk_sha_task1, (unsigned long)cryp); + + cryp->rec = 1; + + return 0; + +err_cleanup: + for (; i--; ) + kfree(sha[i]); + return err; +} + +static void mtk_sha_record_free(struct mtk_cryp *cryp) +{ + int i; + + for (i = 0; i < MTK_REC_NUM; i++) { + tasklet_kill(&cryp->sha[i]->task); + kfree(cryp->sha[i]); + } +} + +static void mtk_sha_unregister_algs(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs_sha1_sha224_sha256); i++) + crypto_unregister_ahash(&algs_sha1_sha224_sha256[i]); + + for (i = 0; i < ARRAY_SIZE(algs_sha384_sha512); i++) + crypto_unregister_ahash(&algs_sha384_sha512[i]); +} + +static int mtk_sha_register_algs(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(algs_sha1_sha224_sha256); i++) { + err = crypto_register_ahash(&algs_sha1_sha224_sha256[i]); + if (err) + goto err_sha_224_256_algs; + } + + for (i = 0; i < ARRAY_SIZE(algs_sha384_sha512); i++) { + err = crypto_register_ahash(&algs_sha384_sha512[i]); + if (err) + goto err_sha_384_512_algs; + } + + return 0; + +err_sha_384_512_algs: + for (; i--; ) + crypto_unregister_ahash(&algs_sha384_sha512[i]); + i = ARRAY_SIZE(algs_sha1_sha224_sha256); +err_sha_224_256_algs: + for (; i--; ) + crypto_unregister_ahash(&algs_sha1_sha224_sha256[i]); + + return err; +} + +int mtk_hash_alg_register(struct mtk_cryp *cryp) +{ + int err; + + INIT_LIST_HEAD(&cryp->sha_list); + + /* Initialize two hash records */ + err = mtk_sha_record_init(cryp); + if (err) + goto err_record; + + /* Ring2 is use by SHA record0 */ + err = devm_request_irq(cryp->dev, cryp->irq[RING2], + mtk_sha_ring2_irq, IRQF_TRIGGER_LOW, + "mtk-sha", cryp); + if (err) { + dev_err(cryp->dev, "unable to request sha irq0.\n"); + goto err_res; + } + + /* Ring3 is use by SHA record1 */ + err = devm_request_irq(cryp->dev, cryp->irq[RING3], + mtk_sha_ring3_irq, IRQF_TRIGGER_LOW, + "mtk-sha", cryp); + if (err) { + dev_err(cryp->dev, "unable to request sha irq1.\n"); + goto err_res; + } + + /* Enable ring2 and ring3 interrupt for hash */ + mtk_sha_write(cryp, AIC_ENABLE_SET(RING2), MTK_IRQ_RDR2); + mtk_sha_write(cryp, AIC_ENABLE_SET(RING3), MTK_IRQ_RDR3); + + cryp->tmp = dma_alloc_coherent(cryp->dev, SHA_TMP_BUF_SIZE, + &cryp->tmp_dma, GFP_KERNEL); + if (!cryp->tmp) { + dev_err(cryp->dev, "unable to allocate tmp buffer.\n"); + err = -EINVAL; + goto err_res; + } + + spin_lock(&mtk_sha.lock); + list_add_tail(&cryp->sha_list, &mtk_sha.dev_list); + spin_unlock(&mtk_sha.lock); + + err = mtk_sha_register_algs(); + if (err) + goto err_algs; + + return 0; + +err_algs: + spin_lock(&mtk_sha.lock); + list_del(&cryp->sha_list); + spin_unlock(&mtk_sha.lock); + dma_free_coherent(cryp->dev, SHA_TMP_BUF_SIZE, + cryp->tmp, cryp->tmp_dma); +err_res: + mtk_sha_record_free(cryp); +err_record: + + dev_err(cryp->dev, "mtk-sha initialization failed.\n"); + return err; +} + +void mtk_hash_alg_release(struct mtk_cryp *cryp) +{ + spin_lock(&mtk_sha.lock); + list_del(&cryp->sha_list); + spin_unlock(&mtk_sha.lock); + + mtk_sha_unregister_algs(); + dma_free_coherent(cryp->dev, SHA_TMP_BUF_SIZE, + cryp->tmp, cryp->tmp_dma); + mtk_sha_record_free(cryp); +} From fb91a661d99f460f2ea4c7f23ed47f56863ca1d1 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Mon, 19 Dec 2016 10:20:45 +0800 Subject: [PATCH 016/142] crypto: mediatek - add DT bindings documentation Add DT bindings documentation for the crypto driver Signed-off-by: Ryder Lee Acked-by: Rob Herring Signed-off-by: Herbert Xu --- .../bindings/crypto/mediatek-crypto.txt | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 Documentation/devicetree/bindings/crypto/mediatek-crypto.txt diff --git a/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt b/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt new file mode 100644 index 000000000000..c204725e5873 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt @@ -0,0 +1,27 @@ +MediaTek cryptographic accelerators + +Required properties: +- compatible: Should be "mediatek,eip97-crypto" +- reg: Address and length of the register set for the device +- interrupts: Should contain the five crypto engines interrupts in numeric + order. These are global system and four descriptor rings. +- clocks: the clock used by the core +- clock-names: the names of the clock listed in the clocks property. These are + "ethif", "cryp" +- power-domains: Must contain a reference to the PM domain. + + +Example: + crypto: crypto@1b240000 { + compatible = "mediatek,eip97-crypto"; + reg = <0 0x1b240000 0 0x20000>; + interrupts = , + , + , + , + ; + clocks = <&topckgen CLK_TOP_ETHIF_SEL>, + <ðsys CLK_ETHSYS_CRYPTO>; + clock-names = "ethif","cryp"; + power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>; + }; From 5386e5d1f8b7305e447b781f7ac02649f7a4d055 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 Dec 2016 17:39:26 +0800 Subject: [PATCH 017/142] Revert "crypto: arm64/ARM: NEON accelerated ChaCha20" This patch reverts the following commits: 8621caa0d45e731f2e9f5889ff5bb384fcd6e059 8096667273477e735b0072b11a6d617ccee45e5f I should not have applied them because they had already been obsoleted by a subsequent patch series. They also cause a build failure because of the subsequent commit 9ae433bc79f9. Fixes: 9ae433bc79f ("crypto: chacha20 - convert generic and...") Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 6 - arch/arm/crypto/Makefile | 2 - arch/arm/crypto/chacha20-neon-core.S | 523 ------------------------- arch/arm/crypto/chacha20-neon-glue.c | 136 ------- arch/arm64/crypto/Kconfig | 6 - arch/arm64/crypto/Makefile | 3 - arch/arm64/crypto/chacha20-neon-core.S | 480 ----------------------- arch/arm64/crypto/chacha20-neon-glue.c | 131 ------- 8 files changed, 1287 deletions(-) delete mode 100644 arch/arm/crypto/chacha20-neon-core.S delete mode 100644 arch/arm/crypto/chacha20-neon-glue.c delete mode 100644 arch/arm64/crypto/chacha20-neon-core.S delete mode 100644 arch/arm64/crypto/chacha20-neon-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 2f3339f015d3..13f1b4c289d4 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -130,10 +130,4 @@ config CRYPTO_CRC32_ARM_CE depends on KERNEL_MODE_NEON && CRC32 select CRYPTO_HASH -config CRYPTO_CHACHA20_NEON - tristate "NEON accelerated ChaCha20 symmetric cipher" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_CHACHA20 - endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 8d74e55eacd4..b578a1820ab1 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -41,7 +40,6 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o -chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S deleted file mode 100644 index b0a35935be7e..000000000000 --- a/arch/arm/crypto/chacha20-neon-core.S +++ /dev/null @@ -1,523 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions - * - * Copyright (C) 2016 Linaro, Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SNEON3 functions - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include - - .text - .fpu neon - .align 5 - -ENTRY(chacha20_block_xor_neon) - // r0: Input state matrix, s - // r1: 1 data block output, o - // r2: 1 data block input, i - - // - // This function encrypts one ChaCha20 block by loading the state matrix - // in four NEON registers. It performs matrix operation on four words in - // parallel, but requireds shuffling to rearrange the words after each - // round. - // - - // x0..3 = s0..3 - add ip, r0, #0x20 - vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [ip] - - vmov q8, q0 - vmov q9, q1 - vmov q10, q2 - vmov q11, q3 - - mov r3, #10 - -.Ldoubleround: - // x0 += x1, x3 = rotl32(x3 ^ x0, 16) - vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #16 - vsri.u32 q3, q4, #16 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 12) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #12 - vsri.u32 q1, q4, #20 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 8) - vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #8 - vsri.u32 q3, q4, #24 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 7) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #7 - vsri.u32 q1, q4, #25 - - // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) - vext.8 q1, q1, q1, #4 - // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) - vext.8 q2, q2, q2, #8 - // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) - vext.8 q3, q3, q3, #12 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 16) - vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #16 - vsri.u32 q3, q4, #16 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 12) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #12 - vsri.u32 q1, q4, #20 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 8) - vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #8 - vsri.u32 q3, q4, #24 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 7) - vadd.i32 q2, q2, q3 - veor q4, q1, q2 - vshl.u32 q1, q4, #7 - vsri.u32 q1, q4, #25 - - // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) - vext.8 q1, q1, q1, #12 - // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) - vext.8 q2, q2, q2, #8 - // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) - vext.8 q3, q3, q3, #4 - - subs r3, r3, #1 - bne .Ldoubleround - - add ip, r2, #0x20 - vld1.8 {q4-q5}, [r2] - vld1.8 {q6-q7}, [ip] - - // o0 = i0 ^ (x0 + s0) - vadd.i32 q0, q0, q8 - veor q0, q0, q4 - - // o1 = i1 ^ (x1 + s1) - vadd.i32 q1, q1, q9 - veor q1, q1, q5 - - // o2 = i2 ^ (x2 + s2) - vadd.i32 q2, q2, q10 - veor q2, q2, q6 - - // o3 = i3 ^ (x3 + s3) - vadd.i32 q3, q3, q11 - veor q3, q3, q7 - - add ip, r1, #0x20 - vst1.8 {q0-q1}, [r1] - vst1.8 {q2-q3}, [ip] - - bx lr -ENDPROC(chacha20_block_xor_neon) - - .align 5 -ENTRY(chacha20_4block_xor_neon) - push {r4-r6, lr} - mov ip, sp // preserve the stack pointer - sub r3, sp, #0x20 // allocate a 32 byte buffer - bic r3, r3, #0x1f // aligned to 32 bytes - mov sp, r3 - - // r0: Input state matrix, s - // r1: 4 data blocks output, o - // r2: 4 data blocks input, i - - // - // This function encrypts four consecutive ChaCha20 blocks by loading - // the state matrix in NEON registers four times. The algorithm performs - // each operation on the corresponding word of each state matrix, hence - // requires no word shuffling. For final XORing step we transpose the - // matrix by interleaving 32- and then 64-bit words, which allows us to - // do XOR in NEON registers. - // - - // x0..15[0-3] = s0..3[0..3] - add r3, r0, #0x20 - vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [r3] - - adr r3, CTRINC - vdup.32 q15, d7[1] - vdup.32 q14, d7[0] - vld1.32 {q11}, [r3, :128] - vdup.32 q13, d6[1] - vdup.32 q12, d6[0] - vadd.i32 q12, q12, q11 // x12 += counter values 0-3 - vdup.32 q11, d5[1] - vdup.32 q10, d5[0] - vdup.32 q9, d4[1] - vdup.32 q8, d4[0] - vdup.32 q7, d3[1] - vdup.32 q6, d3[0] - vdup.32 q5, d2[1] - vdup.32 q4, d2[0] - vdup.32 q3, d1[1] - vdup.32 q2, d1[0] - vdup.32 q1, d0[1] - vdup.32 q0, d0[0] - - mov r3, #10 - -.Ldoubleround4: - // x0 += x4, x12 = rotl32(x12 ^ x0, 16) - // x1 += x5, x13 = rotl32(x13 ^ x1, 16) - // x2 += x6, x14 = rotl32(x14 ^ x2, 16) - // x3 += x7, x15 = rotl32(x15 ^ x3, 16) - vadd.i32 q0, q0, q4 - vadd.i32 q1, q1, q5 - vadd.i32 q2, q2, q6 - vadd.i32 q3, q3, q7 - - veor q12, q12, q0 - veor q13, q13, q1 - veor q14, q14, q2 - veor q15, q15, q3 - - vrev32.16 q12, q12 - vrev32.16 q13, q13 - vrev32.16 q14, q14 - vrev32.16 q15, q15 - - // x8 += x12, x4 = rotl32(x4 ^ x8, 12) - // x9 += x13, x5 = rotl32(x5 ^ x9, 12) - // x10 += x14, x6 = rotl32(x6 ^ x10, 12) - // x11 += x15, x7 = rotl32(x7 ^ x11, 12) - vadd.i32 q8, q8, q12 - vadd.i32 q9, q9, q13 - vadd.i32 q10, q10, q14 - vadd.i32 q11, q11, q15 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q4, q8 - veor q9, q5, q9 - vshl.u32 q4, q8, #12 - vshl.u32 q5, q9, #12 - vsri.u32 q4, q8, #20 - vsri.u32 q5, q9, #20 - - veor q8, q6, q10 - veor q9, q7, q11 - vshl.u32 q6, q8, #12 - vshl.u32 q7, q9, #12 - vsri.u32 q6, q8, #20 - vsri.u32 q7, q9, #20 - - // x0 += x4, x12 = rotl32(x12 ^ x0, 8) - // x1 += x5, x13 = rotl32(x13 ^ x1, 8) - // x2 += x6, x14 = rotl32(x14 ^ x2, 8) - // x3 += x7, x15 = rotl32(x15 ^ x3, 8) - vadd.i32 q0, q0, q4 - vadd.i32 q1, q1, q5 - vadd.i32 q2, q2, q6 - vadd.i32 q3, q3, q7 - - veor q8, q12, q0 - veor q9, q13, q1 - vshl.u32 q12, q8, #8 - vshl.u32 q13, q9, #8 - vsri.u32 q12, q8, #24 - vsri.u32 q13, q9, #24 - - veor q8, q14, q2 - veor q9, q15, q3 - vshl.u32 q14, q8, #8 - vshl.u32 q15, q9, #8 - vsri.u32 q14, q8, #24 - vsri.u32 q15, q9, #24 - - vld1.32 {q8-q9}, [sp, :256] - - // x8 += x12, x4 = rotl32(x4 ^ x8, 7) - // x9 += x13, x5 = rotl32(x5 ^ x9, 7) - // x10 += x14, x6 = rotl32(x6 ^ x10, 7) - // x11 += x15, x7 = rotl32(x7 ^ x11, 7) - vadd.i32 q8, q8, q12 - vadd.i32 q9, q9, q13 - vadd.i32 q10, q10, q14 - vadd.i32 q11, q11, q15 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q4, q8 - veor q9, q5, q9 - vshl.u32 q4, q8, #7 - vshl.u32 q5, q9, #7 - vsri.u32 q4, q8, #25 - vsri.u32 q5, q9, #25 - - veor q8, q6, q10 - veor q9, q7, q11 - vshl.u32 q6, q8, #7 - vshl.u32 q7, q9, #7 - vsri.u32 q6, q8, #25 - vsri.u32 q7, q9, #25 - - vld1.32 {q8-q9}, [sp, :256] - - // x0 += x5, x15 = rotl32(x15 ^ x0, 16) - // x1 += x6, x12 = rotl32(x12 ^ x1, 16) - // x2 += x7, x13 = rotl32(x13 ^ x2, 16) - // x3 += x4, x14 = rotl32(x14 ^ x3, 16) - vadd.i32 q0, q0, q5 - vadd.i32 q1, q1, q6 - vadd.i32 q2, q2, q7 - vadd.i32 q3, q3, q4 - - veor q15, q15, q0 - veor q12, q12, q1 - veor q13, q13, q2 - veor q14, q14, q3 - - vrev32.16 q15, q15 - vrev32.16 q12, q12 - vrev32.16 q13, q13 - vrev32.16 q14, q14 - - // x10 += x15, x5 = rotl32(x5 ^ x10, 12) - // x11 += x12, x6 = rotl32(x6 ^ x11, 12) - // x8 += x13, x7 = rotl32(x7 ^ x8, 12) - // x9 += x14, x4 = rotl32(x4 ^ x9, 12) - vadd.i32 q10, q10, q15 - vadd.i32 q11, q11, q12 - vadd.i32 q8, q8, q13 - vadd.i32 q9, q9, q14 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q7, q8 - veor q9, q4, q9 - vshl.u32 q7, q8, #12 - vshl.u32 q4, q9, #12 - vsri.u32 q7, q8, #20 - vsri.u32 q4, q9, #20 - - veor q8, q5, q10 - veor q9, q6, q11 - vshl.u32 q5, q8, #12 - vshl.u32 q6, q9, #12 - vsri.u32 q5, q8, #20 - vsri.u32 q6, q9, #20 - - // x0 += x5, x15 = rotl32(x15 ^ x0, 8) - // x1 += x6, x12 = rotl32(x12 ^ x1, 8) - // x2 += x7, x13 = rotl32(x13 ^ x2, 8) - // x3 += x4, x14 = rotl32(x14 ^ x3, 8) - vadd.i32 q0, q0, q5 - vadd.i32 q1, q1, q6 - vadd.i32 q2, q2, q7 - vadd.i32 q3, q3, q4 - - veor q8, q15, q0 - veor q9, q12, q1 - vshl.u32 q15, q8, #8 - vshl.u32 q12, q9, #8 - vsri.u32 q15, q8, #24 - vsri.u32 q12, q9, #24 - - veor q8, q13, q2 - veor q9, q14, q3 - vshl.u32 q13, q8, #8 - vshl.u32 q14, q9, #8 - vsri.u32 q13, q8, #24 - vsri.u32 q14, q9, #24 - - vld1.32 {q8-q9}, [sp, :256] - - // x10 += x15, x5 = rotl32(x5 ^ x10, 7) - // x11 += x12, x6 = rotl32(x6 ^ x11, 7) - // x8 += x13, x7 = rotl32(x7 ^ x8, 7) - // x9 += x14, x4 = rotl32(x4 ^ x9, 7) - vadd.i32 q10, q10, q15 - vadd.i32 q11, q11, q12 - vadd.i32 q8, q8, q13 - vadd.i32 q9, q9, q14 - - vst1.32 {q8-q9}, [sp, :256] - - veor q8, q7, q8 - veor q9, q4, q9 - vshl.u32 q7, q8, #7 - vshl.u32 q4, q9, #7 - vsri.u32 q7, q8, #25 - vsri.u32 q4, q9, #25 - - veor q8, q5, q10 - veor q9, q6, q11 - vshl.u32 q5, q8, #7 - vshl.u32 q6, q9, #7 - vsri.u32 q5, q8, #25 - vsri.u32 q6, q9, #25 - - subs r3, r3, #1 - beq 0f - - vld1.32 {q8-q9}, [sp, :256] - b .Ldoubleround4 - - // x0[0-3] += s0[0] - // x1[0-3] += s0[1] - // x2[0-3] += s0[2] - // x3[0-3] += s0[3] -0: ldmia r0!, {r3-r6} - vdup.32 q8, r3 - vdup.32 q9, r4 - vadd.i32 q0, q0, q8 - vadd.i32 q1, q1, q9 - vdup.32 q8, r5 - vdup.32 q9, r6 - vadd.i32 q2, q2, q8 - vadd.i32 q3, q3, q9 - - // x4[0-3] += s1[0] - // x5[0-3] += s1[1] - // x6[0-3] += s1[2] - // x7[0-3] += s1[3] - ldmia r0!, {r3-r6} - vdup.32 q8, r3 - vdup.32 q9, r4 - vadd.i32 q4, q4, q8 - vadd.i32 q5, q5, q9 - vdup.32 q8, r5 - vdup.32 q9, r6 - vadd.i32 q6, q6, q8 - vadd.i32 q7, q7, q9 - - // interleave 32-bit words in state n, n+1 - vzip.32 q0, q1 - vzip.32 q2, q3 - vzip.32 q4, q5 - vzip.32 q6, q7 - - // interleave 64-bit words in state n, n+2 - vswp d1, d4 - vswp d3, d6 - vswp d9, d12 - vswp d11, d14 - - // xor with corresponding input, write to output - vld1.8 {q8-q9}, [r2]! - veor q8, q8, q0 - veor q9, q9, q4 - vst1.8 {q8-q9}, [r1]! - - vld1.32 {q8-q9}, [sp, :256] - - // x8[0-3] += s2[0] - // x9[0-3] += s2[1] - // x10[0-3] += s2[2] - // x11[0-3] += s2[3] - ldmia r0!, {r3-r6} - vdup.32 q0, r3 - vdup.32 q4, r4 - vadd.i32 q8, q8, q0 - vadd.i32 q9, q9, q4 - vdup.32 q0, r5 - vdup.32 q4, r6 - vadd.i32 q10, q10, q0 - vadd.i32 q11, q11, q4 - - // x12[0-3] += s3[0] - // x13[0-3] += s3[1] - // x14[0-3] += s3[2] - // x15[0-3] += s3[3] - ldmia r0!, {r3-r6} - vdup.32 q0, r3 - vdup.32 q4, r4 - adr r3, CTRINC - vadd.i32 q12, q12, q0 - vld1.32 {q0}, [r3, :128] - vadd.i32 q13, q13, q4 - vadd.i32 q12, q12, q0 // x12 += counter values 0-3 - - vdup.32 q0, r5 - vdup.32 q4, r6 - vadd.i32 q14, q14, q0 - vadd.i32 q15, q15, q4 - - // interleave 32-bit words in state n, n+1 - vzip.32 q8, q9 - vzip.32 q10, q11 - vzip.32 q12, q13 - vzip.32 q14, q15 - - // interleave 64-bit words in state n, n+2 - vswp d17, d20 - vswp d19, d22 - vswp d25, d28 - vswp d27, d30 - - vmov q4, q1 - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q8 - veor q1, q1, q12 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q2 - veor q1, q1, q6 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q10 - veor q1, q1, q14 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q4 - veor q1, q1, q5 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q9 - veor q1, q1, q13 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! - veor q0, q0, q3 - veor q1, q1, q7 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2] - veor q0, q0, q11 - veor q1, q1, q15 - vst1.8 {q0-q1}, [r1] - - mov sp, ip - pop {r4-r6, pc} -ENDPROC(chacha20_4block_xor_neon) - - .align 4 -CTRINC: .word 0, 1, 2, 3 diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c deleted file mode 100644 index 554f7f6069da..000000000000 --- a/arch/arm/crypto/chacha20-neon-glue.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions - * - * Copyright (C) 2016 Linaro, Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); -asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); - -static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) -{ - u8 buf[CHACHA20_BLOCK_SIZE]; - - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { - chacha20_4block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; - state[12] += 4; - } - while (bytes >= CHACHA20_BLOCK_SIZE) { - chacha20_block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; - state[12]++; - } - if (bytes) { - memcpy(buf, src, bytes); - chacha20_block_xor_neon(state, buf, buf); - memcpy(dst, buf, bytes); - } -} - -static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - u32 state[16]; - int err; - - if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(desc, dst, src, nbytes); - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); - - crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); - - kernel_neon_begin(); - - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); - } - - if (walk.nbytes) { - chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); - } - - kernel_neon_end(); - - return err; -} - -static struct crypto_alg alg = { - .cra_name = "chacha20", - .cra_driver_name = "chacha20-neon", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct chacha20_ctx), - .cra_alignmask = sizeof(u32) - 1, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .geniv = "seqiv", - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_simd, - .decrypt = chacha20_simd, - }, - }, -}; - -static int __init chacha20_simd_mod_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - - return crypto_register_alg(&alg); -} - -static void __exit chacha20_simd_mod_fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(chacha20_simd_mod_init); -module_exit(chacha20_simd_mod_fini); - -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 0bf0f531f539..450a85df041a 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -72,10 +72,4 @@ config CRYPTO_CRC32_ARM64 depends on ARM64 select CRYPTO_HASH -config CRYPTO_CHACHA20_NEON - tristate "NEON accelerated ChaCha20 symmetric cipher" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_CHACHA20 - endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 9d2826c5fccf..aa8888d7b744 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -41,9 +41,6 @@ sha256-arm64-y := sha256-glue.o sha256-core.o obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o sha512-arm64-y := sha512-glue.o sha512-core.o -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o -chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o - AFLAGS_aes-ce.o := -DINTERLEAVE=4 AFLAGS_aes-neon.o := -DINTERLEAVE=4 diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S deleted file mode 100644 index e2cd65580807..000000000000 --- a/arch/arm64/crypto/chacha20-neon-core.S +++ /dev/null @@ -1,480 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions - * - * Copyright (C) 2016 Linaro, Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include - - .text - .align 6 - -ENTRY(chacha20_block_xor_neon) - // x0: Input state matrix, s - // x1: 1 data block output, o - // x2: 1 data block input, i - - // - // This function encrypts one ChaCha20 block by loading the state matrix - // in four NEON registers. It performs matrix operation on four words in - // parallel, but requires shuffling to rearrange the words after each - // round. - // - - // x0..3 = s0..3 - ld1 {v0.4s-v3.4s}, [x0] - ld1 {v8.4s-v11.4s}, [x0] - - mov x3, #10 - -.Ldoubleround: - // x0 += x1, x3 = rotl32(x3 ^ x0, 16) - add v0.4s, v0.4s, v1.4s - eor v3.16b, v3.16b, v0.16b - rev32 v3.8h, v3.8h - - // x2 += x3, x1 = rotl32(x1 ^ x2, 12) - add v2.4s, v2.4s, v3.4s - eor v4.16b, v1.16b, v2.16b - shl v1.4s, v4.4s, #12 - sri v1.4s, v4.4s, #20 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 8) - add v0.4s, v0.4s, v1.4s - eor v4.16b, v3.16b, v0.16b - shl v3.4s, v4.4s, #8 - sri v3.4s, v4.4s, #24 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 7) - add v2.4s, v2.4s, v3.4s - eor v4.16b, v1.16b, v2.16b - shl v1.4s, v4.4s, #7 - sri v1.4s, v4.4s, #25 - - // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) - ext v1.16b, v1.16b, v1.16b, #4 - // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) - ext v2.16b, v2.16b, v2.16b, #8 - // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) - ext v3.16b, v3.16b, v3.16b, #12 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 16) - add v0.4s, v0.4s, v1.4s - eor v3.16b, v3.16b, v0.16b - rev32 v3.8h, v3.8h - - // x2 += x3, x1 = rotl32(x1 ^ x2, 12) - add v2.4s, v2.4s, v3.4s - eor v4.16b, v1.16b, v2.16b - shl v1.4s, v4.4s, #12 - sri v1.4s, v4.4s, #20 - - // x0 += x1, x3 = rotl32(x3 ^ x0, 8) - add v0.4s, v0.4s, v1.4s - eor v4.16b, v3.16b, v0.16b - shl v3.4s, v4.4s, #8 - sri v3.4s, v4.4s, #24 - - // x2 += x3, x1 = rotl32(x1 ^ x2, 7) - add v2.4s, v2.4s, v3.4s - eor v4.16b, v1.16b, v2.16b - shl v1.4s, v4.4s, #7 - sri v1.4s, v4.4s, #25 - - // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) - ext v1.16b, v1.16b, v1.16b, #12 - // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) - ext v2.16b, v2.16b, v2.16b, #8 - // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) - ext v3.16b, v3.16b, v3.16b, #4 - - subs x3, x3, #1 - b.ne .Ldoubleround - - ld1 {v4.16b-v7.16b}, [x2] - - // o0 = i0 ^ (x0 + s0) - add v0.4s, v0.4s, v8.4s - eor v0.16b, v0.16b, v4.16b - - // o1 = i1 ^ (x1 + s1) - add v1.4s, v1.4s, v9.4s - eor v1.16b, v1.16b, v5.16b - - // o2 = i2 ^ (x2 + s2) - add v2.4s, v2.4s, v10.4s - eor v2.16b, v2.16b, v6.16b - - // o3 = i3 ^ (x3 + s3) - add v3.4s, v3.4s, v11.4s - eor v3.16b, v3.16b, v7.16b - - st1 {v0.16b-v3.16b}, [x1] - - ret -ENDPROC(chacha20_block_xor_neon) - - .align 6 -ENTRY(chacha20_4block_xor_neon) - // x0: Input state matrix, s - // x1: 4 data blocks output, o - // x2: 4 data blocks input, i - - // - // This function encrypts four consecutive ChaCha20 blocks by loading - // the state matrix in NEON registers four times. The algorithm performs - // each operation on the corresponding word of each state matrix, hence - // requires no word shuffling. For final XORing step we transpose the - // matrix by interleaving 32- and then 64-bit words, which allows us to - // do XOR in NEON registers. - // - adr x3, CTRINC - ld1 {v16.4s}, [x3] - - // x0..15[0-3] = s0..3[0..3] - mov x4, x0 - ld4r { v0.4s- v3.4s}, [x4], #16 - ld4r { v4.4s- v7.4s}, [x4], #16 - ld4r { v8.4s-v11.4s}, [x4], #16 - ld4r {v12.4s-v15.4s}, [x4] - - // x12 += counter values 0-3 - add v12.4s, v12.4s, v16.4s - - mov x3, #10 - -.Ldoubleround4: - // x0 += x4, x12 = rotl32(x12 ^ x0, 16) - // x1 += x5, x13 = rotl32(x13 ^ x1, 16) - // x2 += x6, x14 = rotl32(x14 ^ x2, 16) - // x3 += x7, x15 = rotl32(x15 ^ x3, 16) - add v0.4s, v0.4s, v4.4s - add v1.4s, v1.4s, v5.4s - add v2.4s, v2.4s, v6.4s - add v3.4s, v3.4s, v7.4s - - eor v12.16b, v12.16b, v0.16b - eor v13.16b, v13.16b, v1.16b - eor v14.16b, v14.16b, v2.16b - eor v15.16b, v15.16b, v3.16b - - rev32 v12.8h, v12.8h - rev32 v13.8h, v13.8h - rev32 v14.8h, v14.8h - rev32 v15.8h, v15.8h - - // x8 += x12, x4 = rotl32(x4 ^ x8, 12) - // x9 += x13, x5 = rotl32(x5 ^ x9, 12) - // x10 += x14, x6 = rotl32(x6 ^ x10, 12) - // x11 += x15, x7 = rotl32(x7 ^ x11, 12) - add v8.4s, v8.4s, v12.4s - add v9.4s, v9.4s, v13.4s - add v10.4s, v10.4s, v14.4s - add v11.4s, v11.4s, v15.4s - - eor v17.16b, v4.16b, v8.16b - eor v18.16b, v5.16b, v9.16b - eor v19.16b, v6.16b, v10.16b - eor v20.16b, v7.16b, v11.16b - - shl v4.4s, v17.4s, #12 - shl v5.4s, v18.4s, #12 - shl v6.4s, v19.4s, #12 - shl v7.4s, v20.4s, #12 - - sri v4.4s, v17.4s, #20 - sri v5.4s, v18.4s, #20 - sri v6.4s, v19.4s, #20 - sri v7.4s, v20.4s, #20 - - // x0 += x4, x12 = rotl32(x12 ^ x0, 8) - // x1 += x5, x13 = rotl32(x13 ^ x1, 8) - // x2 += x6, x14 = rotl32(x14 ^ x2, 8) - // x3 += x7, x15 = rotl32(x15 ^ x3, 8) - add v0.4s, v0.4s, v4.4s - add v1.4s, v1.4s, v5.4s - add v2.4s, v2.4s, v6.4s - add v3.4s, v3.4s, v7.4s - - eor v17.16b, v12.16b, v0.16b - eor v18.16b, v13.16b, v1.16b - eor v19.16b, v14.16b, v2.16b - eor v20.16b, v15.16b, v3.16b - - shl v12.4s, v17.4s, #8 - shl v13.4s, v18.4s, #8 - shl v14.4s, v19.4s, #8 - shl v15.4s, v20.4s, #8 - - sri v12.4s, v17.4s, #24 - sri v13.4s, v18.4s, #24 - sri v14.4s, v19.4s, #24 - sri v15.4s, v20.4s, #24 - - // x8 += x12, x4 = rotl32(x4 ^ x8, 7) - // x9 += x13, x5 = rotl32(x5 ^ x9, 7) - // x10 += x14, x6 = rotl32(x6 ^ x10, 7) - // x11 += x15, x7 = rotl32(x7 ^ x11, 7) - add v8.4s, v8.4s, v12.4s - add v9.4s, v9.4s, v13.4s - add v10.4s, v10.4s, v14.4s - add v11.4s, v11.4s, v15.4s - - eor v17.16b, v4.16b, v8.16b - eor v18.16b, v5.16b, v9.16b - eor v19.16b, v6.16b, v10.16b - eor v20.16b, v7.16b, v11.16b - - shl v4.4s, v17.4s, #7 - shl v5.4s, v18.4s, #7 - shl v6.4s, v19.4s, #7 - shl v7.4s, v20.4s, #7 - - sri v4.4s, v17.4s, #25 - sri v5.4s, v18.4s, #25 - sri v6.4s, v19.4s, #25 - sri v7.4s, v20.4s, #25 - - // x0 += x5, x15 = rotl32(x15 ^ x0, 16) - // x1 += x6, x12 = rotl32(x12 ^ x1, 16) - // x2 += x7, x13 = rotl32(x13 ^ x2, 16) - // x3 += x4, x14 = rotl32(x14 ^ x3, 16) - add v0.4s, v0.4s, v5.4s - add v1.4s, v1.4s, v6.4s - add v2.4s, v2.4s, v7.4s - add v3.4s, v3.4s, v4.4s - - eor v15.16b, v15.16b, v0.16b - eor v12.16b, v12.16b, v1.16b - eor v13.16b, v13.16b, v2.16b - eor v14.16b, v14.16b, v3.16b - - rev32 v15.8h, v15.8h - rev32 v12.8h, v12.8h - rev32 v13.8h, v13.8h - rev32 v14.8h, v14.8h - - // x10 += x15, x5 = rotl32(x5 ^ x10, 12) - // x11 += x12, x6 = rotl32(x6 ^ x11, 12) - // x8 += x13, x7 = rotl32(x7 ^ x8, 12) - // x9 += x14, x4 = rotl32(x4 ^ x9, 12) - add v10.4s, v10.4s, v15.4s - add v11.4s, v11.4s, v12.4s - add v8.4s, v8.4s, v13.4s - add v9.4s, v9.4s, v14.4s - - eor v17.16b, v5.16b, v10.16b - eor v18.16b, v6.16b, v11.16b - eor v19.16b, v7.16b, v8.16b - eor v20.16b, v4.16b, v9.16b - - shl v5.4s, v17.4s, #12 - shl v6.4s, v18.4s, #12 - shl v7.4s, v19.4s, #12 - shl v4.4s, v20.4s, #12 - - sri v5.4s, v17.4s, #20 - sri v6.4s, v18.4s, #20 - sri v7.4s, v19.4s, #20 - sri v4.4s, v20.4s, #20 - - // x0 += x5, x15 = rotl32(x15 ^ x0, 8) - // x1 += x6, x12 = rotl32(x12 ^ x1, 8) - // x2 += x7, x13 = rotl32(x13 ^ x2, 8) - // x3 += x4, x14 = rotl32(x14 ^ x3, 8) - add v0.4s, v0.4s, v5.4s - add v1.4s, v1.4s, v6.4s - add v2.4s, v2.4s, v7.4s - add v3.4s, v3.4s, v4.4s - - eor v17.16b, v15.16b, v0.16b - eor v18.16b, v12.16b, v1.16b - eor v19.16b, v13.16b, v2.16b - eor v20.16b, v14.16b, v3.16b - - shl v15.4s, v17.4s, #8 - shl v12.4s, v18.4s, #8 - shl v13.4s, v19.4s, #8 - shl v14.4s, v20.4s, #8 - - sri v15.4s, v17.4s, #24 - sri v12.4s, v18.4s, #24 - sri v13.4s, v19.4s, #24 - sri v14.4s, v20.4s, #24 - - // x10 += x15, x5 = rotl32(x5 ^ x10, 7) - // x11 += x12, x6 = rotl32(x6 ^ x11, 7) - // x8 += x13, x7 = rotl32(x7 ^ x8, 7) - // x9 += x14, x4 = rotl32(x4 ^ x9, 7) - add v10.4s, v10.4s, v15.4s - add v11.4s, v11.4s, v12.4s - add v8.4s, v8.4s, v13.4s - add v9.4s, v9.4s, v14.4s - - eor v17.16b, v5.16b, v10.16b - eor v18.16b, v6.16b, v11.16b - eor v19.16b, v7.16b, v8.16b - eor v20.16b, v4.16b, v9.16b - - shl v5.4s, v17.4s, #7 - shl v6.4s, v18.4s, #7 - shl v7.4s, v19.4s, #7 - shl v4.4s, v20.4s, #7 - - sri v5.4s, v17.4s, #25 - sri v6.4s, v18.4s, #25 - sri v7.4s, v19.4s, #25 - sri v4.4s, v20.4s, #25 - - subs x3, x3, #1 - b.ne .Ldoubleround4 - - // x0[0-3] += s0[0] - // x1[0-3] += s0[1] - // x2[0-3] += s0[2] - // x3[0-3] += s0[3] - ld4r {v17.4s-v20.4s}, [x0], #16 - add v0.4s, v0.4s, v17.4s - add v1.4s, v1.4s, v18.4s - add v2.4s, v2.4s, v19.4s - add v3.4s, v3.4s, v20.4s - - // x4[0-3] += s1[0] - // x5[0-3] += s1[1] - // x6[0-3] += s1[2] - // x7[0-3] += s1[3] - ld4r {v21.4s-v24.4s}, [x0], #16 - add v4.4s, v4.4s, v21.4s - add v5.4s, v5.4s, v22.4s - add v6.4s, v6.4s, v23.4s - add v7.4s, v7.4s, v24.4s - - // x8[0-3] += s2[0] - // x9[0-3] += s2[1] - // x10[0-3] += s2[2] - // x11[0-3] += s2[3] - ld4r {v17.4s-v20.4s}, [x0], #16 - add v8.4s, v8.4s, v17.4s - add v9.4s, v9.4s, v18.4s - add v10.4s, v10.4s, v19.4s - add v11.4s, v11.4s, v20.4s - - // x12[0-3] += s3[0] - // x13[0-3] += s3[1] - // x14[0-3] += s3[2] - // x15[0-3] += s3[3] - ld4r {v21.4s-v24.4s}, [x0] - add v12.4s, v12.4s, v21.4s - add v13.4s, v13.4s, v22.4s - add v14.4s, v14.4s, v23.4s - add v15.4s, v15.4s, v24.4s - - // x12 += counter values 0-3 - add v12.4s, v12.4s, v16.4s - - ld1 {v16.16b-v19.16b}, [x2], #64 - ld1 {v20.16b-v23.16b}, [x2], #64 - - // interleave 32-bit words in state n, n+1 - zip1 v24.4s, v0.4s, v1.4s - zip1 v25.4s, v2.4s, v3.4s - zip1 v26.4s, v4.4s, v5.4s - zip1 v27.4s, v6.4s, v7.4s - zip1 v28.4s, v8.4s, v9.4s - zip1 v29.4s, v10.4s, v11.4s - zip1 v30.4s, v12.4s, v13.4s - zip1 v31.4s, v14.4s, v15.4s - - zip2 v1.4s, v0.4s, v1.4s - zip2 v3.4s, v2.4s, v3.4s - zip2 v5.4s, v4.4s, v5.4s - zip2 v7.4s, v6.4s, v7.4s - zip2 v9.4s, v8.4s, v9.4s - zip2 v11.4s, v10.4s, v11.4s - zip2 v13.4s, v12.4s, v13.4s - zip2 v15.4s, v14.4s, v15.4s - - mov v0.16b, v24.16b - mov v2.16b, v25.16b - mov v4.16b, v26.16b - mov v6.16b, v27.16b - mov v8.16b, v28.16b - mov v10.16b, v29.16b - mov v12.16b, v30.16b - mov v14.16b, v31.16b - - // interleave 64-bit words in state n, n+2 - zip1 v24.2d, v0.2d, v2.2d - zip1 v25.2d, v1.2d, v3.2d - zip1 v26.2d, v4.2d, v6.2d - zip1 v27.2d, v5.2d, v7.2d - zip1 v28.2d, v8.2d, v10.2d - zip1 v29.2d, v9.2d, v11.2d - zip1 v30.2d, v12.2d, v14.2d - zip1 v31.2d, v13.2d, v15.2d - - zip2 v2.2d, v0.2d, v2.2d - zip2 v3.2d, v1.2d, v3.2d - zip2 v6.2d, v4.2d, v6.2d - zip2 v7.2d, v5.2d, v7.2d - zip2 v10.2d, v8.2d, v10.2d - zip2 v11.2d, v9.2d, v11.2d - zip2 v14.2d, v12.2d, v14.2d - zip2 v15.2d, v13.2d, v15.2d - - mov v0.16b, v24.16b - mov v1.16b, v25.16b - mov v4.16b, v26.16b - mov v5.16b, v27.16b - - mov v8.16b, v28.16b - mov v9.16b, v29.16b - mov v12.16b, v30.16b - mov v13.16b, v31.16b - - ld1 {v24.16b-v27.16b}, [x2], #64 - ld1 {v28.16b-v31.16b}, [x2] - - // xor with corresponding input, write to output - eor v16.16b, v16.16b, v0.16b - eor v17.16b, v17.16b, v4.16b - eor v18.16b, v18.16b, v8.16b - eor v19.16b, v19.16b, v12.16b - st1 {v16.16b-v19.16b}, [x1], #64 - - eor v20.16b, v20.16b, v2.16b - eor v21.16b, v21.16b, v6.16b - eor v22.16b, v22.16b, v10.16b - eor v23.16b, v23.16b, v14.16b - st1 {v20.16b-v23.16b}, [x1], #64 - - eor v24.16b, v24.16b, v1.16b - eor v25.16b, v25.16b, v5.16b - eor v26.16b, v26.16b, v9.16b - eor v27.16b, v27.16b, v13.16b - st1 {v24.16b-v27.16b}, [x1], #64 - - eor v28.16b, v28.16b, v3.16b - eor v29.16b, v29.16b, v7.16b - eor v30.16b, v30.16b, v11.16b - eor v31.16b, v31.16b, v15.16b - st1 {v28.16b-v31.16b}, [x1] - - ret -ENDPROC(chacha20_4block_xor_neon) - -CTRINC: .word 0, 1, 2, 3 diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c deleted file mode 100644 index 705b42b06d00..000000000000 --- a/arch/arm64/crypto/chacha20-neon-glue.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions - * - * Copyright (C) 2016 Linaro, Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include - -#include - -asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); -asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); - -static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) -{ - u8 buf[CHACHA20_BLOCK_SIZE]; - - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { - chacha20_4block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; - state[12] += 4; - } - while (bytes >= CHACHA20_BLOCK_SIZE) { - chacha20_block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; - state[12]++; - } - if (bytes) { - memcpy(buf, src, bytes); - chacha20_block_xor_neon(state, buf, buf); - memcpy(dst, buf, bytes); - } -} - -static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - u32 state[16]; - int err; - - if (nbytes <= CHACHA20_BLOCK_SIZE) - return crypto_chacha20_crypt(desc, dst, src, nbytes); - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); - - crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); - - kernel_neon_begin(); - - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); - } - - if (walk.nbytes) { - chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); - } - - kernel_neon_end(); - - return err; -} - -static struct crypto_alg alg = { - .cra_name = "chacha20", - .cra_driver_name = "chacha20-neon", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct chacha20_ctx), - .cra_alignmask = sizeof(u32) - 1, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .geniv = "seqiv", - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_simd, - .decrypt = chacha20_simd, - }, - }, -}; - -static int __init chacha20_simd_mod_init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit chacha20_simd_mod_fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(chacha20_simd_mod_init); -module_exit(chacha20_simd_mod_fini); - -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("chacha20"); From 0a3b1abedf265fe524dd19be39c85ba8af5e2bc6 Mon Sep 17 00:00:00 2001 From: Ahsan Atta Date: Thu, 22 Dec 2016 14:58:23 +0000 Subject: [PATCH 018/142] crypto: qat - fix indentation Signed-off-by: Ahsan Atta Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_dev_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/qat/qat_common/adf_dev_mgr.c index b3ebb25f9ca7..8afac52677a6 100644 --- a/drivers/crypto/qat/qat_common/adf_dev_mgr.c +++ b/drivers/crypto/qat/qat_common/adf_dev_mgr.c @@ -152,7 +152,7 @@ void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data) ptr->hw_device->instance_id = i++; if (i == class->instances) - break; + break; } } EXPORT_SYMBOL_GPL(adf_devmgr_update_class_index); From aebeff888256a8c6915c8f264746aa721487183c Mon Sep 17 00:00:00 2001 From: Ahsan Atta Date: Thu, 22 Dec 2016 14:59:24 +0000 Subject: [PATCH 019/142] crypto: qat - fix comments describing adf_disable_sriov() Signed-off-by: Ahsan Atta Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_sriov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 9320ae1d005b..b36d8653b1ba 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -162,9 +162,9 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) /** * adf_disable_sriov() - Disable SRIOV for the device - * @pdev: Pointer to pci device. + * @accel_dev: Pointer to accel device. * - * Function disables SRIOV for the pci device. + * Function disables SRIOV for the accel device. * * Return: 0 on success, error code otherwise. */ From ac6d9a2cec195a7778e013f405cfb86c1dd244a7 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 22 Dec 2016 14:59:40 +0000 Subject: [PATCH 020/142] crypto: qat - replace hardcoded BIT(0) in vf_isr Replace BIT(0) macro with proper definition in pf2vf path Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_vf_isr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index bf99e11a3403..4a73fc70f7a9 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -148,7 +148,7 @@ static void adf_pf2vf_bh_handler(void *data) INIT_WORK(&stop_data->work, adf_dev_stop_async); queue_work(adf_vf_stop_wq, &stop_data->work); /* To ack, clear the PF2VFINT bit */ - msg &= ~BIT(0); + msg &= ~ADF_PF2VF_INT; ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg); return; } @@ -168,7 +168,7 @@ static void adf_pf2vf_bh_handler(void *data) } /* To ack, clear the PF2VFINT bit */ - msg &= ~BIT(0); + msg &= ~ADF_PF2VF_INT; ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg); /* Re-enable PF2VF interrupts */ From 4f78ce870a4da0cf4db265ec4fd31eb4fb7f9c57 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 22 Dec 2016 15:00:12 +0000 Subject: [PATCH 021/142] crypto: qat - fix bar discovery for c62x Some accelerators of the c62x series have only two bars. This patch skips BAR0 if the accelerator does not have it. Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c62x/adf_drv.c | 2 +- drivers/crypto/qat/qat_common/adf_accel_devices.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index bc5cbc193aae..5b2d78a5b5aa 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -233,7 +233,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) &hw_data->accel_capabilities_mask); /* Find and map all the device's BARS */ - i = 0; + i = (hw_data->fuses & ADF_DEVICE_FUSECTL_MASK) ? 1 : 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, ADF_PCI_MAX_BARS * 2) { diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index e8822536530b..33f0a6251e38 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -69,6 +69,7 @@ #define ADF_ERRSOU5 (0x3A000 + 0xD8) #define ADF_DEVICE_FUSECTL_OFFSET 0x40 #define ADF_DEVICE_LEGFUSE_OFFSET 0x4C +#define ADF_DEVICE_FUSECTL_MASK 0x80000000 #define ADF_PCI_MAX_BARS 3 #define ADF_DEVICE_NAME_LENGTH 32 #define ADF_ETR_MAX_RINGS_PER_BANK 16 From 841d6d10b38a2508c640ce7784bad825e2d76fe8 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 22 Dec 2016 15:00:24 +0000 Subject: [PATCH 022/142] crypto: qat - zero esram only for DH85x devices Zero embedded ram in DH85x devices. This is not needed for newer generations as it is done by HW. Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_hal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c index 1e480f140663..8c4fd255a601 100644 --- a/drivers/crypto/qat/qat_common/qat_hal.c +++ b/drivers/crypto/qat/qat_common/qat_hal.c @@ -456,7 +456,7 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle) unsigned int csr_val; int times = 30; - if (handle->pci_dev->device == ADF_C3XXX_PCI_DEVICE_ID) + if (handle->pci_dev->device != ADF_DH895XCC_PCI_DEVICE_ID) return 0; csr_val = ADF_CSR_RD(csr_addr, 0); @@ -716,7 +716,7 @@ int qat_hal_init(struct adf_accel_dev *accel_dev) (void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v + LOCAL_TO_XFER_REG_OFFSET); handle->pci_dev = pci_info->pci_dev; - if (handle->pci_dev->device != ADF_C3XXX_PCI_DEVICE_ID) { + if (handle->pci_dev->device == ADF_DH895XCC_PCI_DEVICE_ID) { sram_bar = &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)]; handle->hal_sram_addr_v = sram_bar->virt_addr; From 21aad0b6ca4d203b2486dfbf97921a2ecd768bcc Mon Sep 17 00:00:00 2001 From: Pablo Marcos Oltra Date: Thu, 22 Dec 2016 15:00:34 +0000 Subject: [PATCH 023/142] crypto: qat - modify format of dev top level debugfs entries Remove leading zeros in pci function number to be consistent with output from lspci. Signed-off-by: Pablo Marcos Oltra Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c3xxx/adf_drv.c | 2 +- drivers/crypto/qat/qat_c3xxxvf/adf_drv.c | 2 +- drivers/crypto/qat/qat_c62x/adf_drv.c | 2 +- drivers/crypto/qat/qat_c62xvf/adf_drv.c | 2 +- drivers/crypto/qat/qat_dh895xcc/adf_drv.c | 2 +- drivers/crypto/qat/qat_dh895xccvf/adf_drv.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index 640c3fc870fd..f172171668ee 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 949d77b79fbe..24ec908eb26c 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index 5b2d78a5b5aa..58a984c9c3ec 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index 7540ce13b0d0..b9f3e0e4fde9 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index 4d2de2838451..2ce01f010c74 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index 60df98632fa2..26ab17bfc6da 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); From 1043c5146877ea48e4c8569a1c7931e1bfb6acb5 Mon Sep 17 00:00:00 2001 From: Xin Zeng Date: Thu, 22 Dec 2016 15:01:02 +0000 Subject: [PATCH 024/142] crypto: qat - increase number of supported devices The unsigned long type for init_status and start_status in service_hndl are not long enough to represent more than 64 acceleration devices. Use an array instead. Signed-off-by: Xin Zeng Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../crypto/qat/qat_common/adf_cfg_common.h | 1 + .../crypto/qat/qat_common/adf_common_drv.h | 4 +-- drivers/crypto/qat/qat_common/adf_init.c | 28 +++++++++++-------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_cfg_common.h b/drivers/crypto/qat/qat_common/adf_cfg_common.h index 8c4f6573ce59..1211261de7c2 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_common.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_common.h @@ -61,6 +61,7 @@ #define ADF_CFG_AFFINITY_WHATEVER 0xFF #define MAX_DEVICE_NAME_SIZE 32 #define ADF_MAX_DEVICES (32 * 32) +#define ADF_DEVS_ARRAY_SIZE BITS_TO_LONGS(ADF_MAX_DEVICES) enum adf_cfg_val_type { ADF_DEC, diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 980e07475012..5c4c0a253129 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -87,8 +87,8 @@ enum adf_event { struct service_hndl { int (*event_hld)(struct adf_accel_dev *accel_dev, enum adf_event event); - unsigned long init_status; - unsigned long start_status; + unsigned long init_status[ADF_DEVS_ARRAY_SIZE]; + unsigned long start_status[ADF_DEVS_ARRAY_SIZE]; char *name; struct list_head list; }; diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 888c6675e7e5..26556c713049 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -64,8 +64,8 @@ static void adf_service_add(struct service_hndl *service) int adf_service_register(struct service_hndl *service) { - service->init_status = 0; - service->start_status = 0; + memset(service->init_status, 0, sizeof(service->init_status)); + memset(service->start_status, 0, sizeof(service->start_status)); adf_service_add(service); return 0; } @@ -79,9 +79,13 @@ static void adf_service_remove(struct service_hndl *service) int adf_service_unregister(struct service_hndl *service) { - if (service->init_status || service->start_status) { - pr_err("QAT: Could not remove active service\n"); - return -EFAULT; + int i; + + for (i = 0; i < ARRAY_SIZE(service->init_status); i++) { + if (service->init_status[i] || service->start_status[i]) { + pr_err("QAT: Could not remove active service\n"); + return -EFAULT; + } } adf_service_remove(service); return 0; @@ -163,7 +167,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) service->name); return -EFAULT; } - set_bit(accel_dev->accel_id, &service->init_status); + set_bit(accel_dev->accel_id, service->init_status); } hw_data->enable_error_correction(accel_dev); @@ -210,7 +214,7 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) service->name); return -EFAULT; } - set_bit(accel_dev->accel_id, &service->start_status); + set_bit(accel_dev->accel_id, service->start_status); } clear_bit(ADF_STATUS_STARTING, &accel_dev->status); @@ -259,14 +263,14 @@ void adf_dev_stop(struct adf_accel_dev *accel_dev) list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); - if (!test_bit(accel_dev->accel_id, &service->start_status)) + if (!test_bit(accel_dev->accel_id, service->start_status)) continue; ret = service->event_hld(accel_dev, ADF_EVENT_STOP); if (!ret) { - clear_bit(accel_dev->accel_id, &service->start_status); + clear_bit(accel_dev->accel_id, service->start_status); } else if (ret == -EAGAIN) { wait = true; - clear_bit(accel_dev->accel_id, &service->start_status); + clear_bit(accel_dev->accel_id, service->start_status); } } @@ -317,14 +321,14 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); - if (!test_bit(accel_dev->accel_id, &service->init_status)) + if (!test_bit(accel_dev->accel_id, service->init_status)) continue; if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) dev_err(&GET_DEV(accel_dev), "Failed to shutdown service %s\n", service->name); else - clear_bit(accel_dev->accel_id, &service->init_status); + clear_bit(accel_dev->accel_id, service->init_status); } hw_data->disable_iov(accel_dev); From d79b5d0bbf2e3aded13b0542160bde48e95d3d44 Mon Sep 17 00:00:00 2001 From: "Gonglei \\(Arei\\)" Date: Tue, 27 Dec 2016 14:49:07 +0800 Subject: [PATCH 025/142] crypto: virtio - support crypto engine framework crypto engine was introduced since 'commit 735d37b5424b ("crypto: engine - Introduce the block request crypto engine framework")' which uses work queue to realize the asynchronous processing for ablk_cipher and ahash. For virtio-crypto device, I register an engine for each data virtqueue so that we can use the capability of multiple data queues in future. Cc: Baolin Wang Cc: Herbert Xu Cc: Michael S. Tsirkin Signed-off-by: Gonglei Signed-off-by: Herbert Xu --- drivers/crypto/virtio/Kconfig | 1 + drivers/crypto/virtio/virtio_crypto_algs.c | 52 +++++++++----- drivers/crypto/virtio/virtio_crypto_common.h | 16 +++++ drivers/crypto/virtio/virtio_crypto_core.c | 74 ++++++++++++++++++-- 4 files changed, 121 insertions(+), 22 deletions(-) diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig index d80f73366ae2..5db07495ddc5 100644 --- a/drivers/crypto/virtio/Kconfig +++ b/drivers/crypto/virtio/Kconfig @@ -4,6 +4,7 @@ config CRYPTO_DEV_VIRTIO select CRYPTO_AEAD select CRYPTO_AUTHENC select CRYPTO_BLKCIPHER + select CRYPTO_ENGINE default m help This driver provides support for virtio crypto device. If you diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c index c2374df9abae..970d0cafd25a 100644 --- a/drivers/crypto/virtio/virtio_crypto_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_algs.c @@ -288,8 +288,7 @@ static int virtio_crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm, static int __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req, struct ablkcipher_request *req, - struct data_queue *data_vq, - __u8 op) + struct data_queue *data_vq) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); unsigned int ivsize = crypto_ablkcipher_ivsize(tfm); @@ -329,7 +328,7 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req, vc_req->req_data = req_data; vc_req->type = VIRTIO_CRYPTO_SYM_OP_CIPHER; /* Head of operation */ - if (op) { + if (vc_req->encrypt) { req_data->header.session_id = cpu_to_le64(ctx->enc_sess_info.session_id); req_data->header.opcode = @@ -424,19 +423,15 @@ static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req) struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm); struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); struct virtio_crypto *vcrypto = ctx->vcrypto; - int ret; /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; vc_req->ablkcipher_ctx = ctx; vc_req->ablkcipher_req = req; - ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq, 1); - if (ret < 0) { - pr_err("virtio_crypto: Encryption failed!\n"); - return ret; - } + vc_req->encrypt = true; + vc_req->dataq = data_vq; - return -EINPROGRESS; + return crypto_transfer_cipher_request_to_engine(data_vq->engine, req); } static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req) @@ -445,20 +440,16 @@ static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req) struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm); struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); struct virtio_crypto *vcrypto = ctx->vcrypto; - int ret; /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; vc_req->ablkcipher_ctx = ctx; vc_req->ablkcipher_req = req; - ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq, 0); - if (ret < 0) { - pr_err("virtio_crypto: Decryption failed!\n"); - return ret; - } + vc_req->encrypt = false; + vc_req->dataq = data_vq; - return -EINPROGRESS; + return crypto_transfer_cipher_request_to_engine(data_vq->engine, req); } static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm) @@ -484,6 +475,33 @@ static void virtio_crypto_ablkcipher_exit(struct crypto_tfm *tfm) ctx->vcrypto = NULL; } +int virtio_crypto_ablkcipher_crypt_req( + struct crypto_engine *engine, + struct ablkcipher_request *req) +{ + struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); + struct data_queue *data_vq = vc_req->dataq; + int ret; + + ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq); + if (ret < 0) + return ret; + + virtqueue_kick(data_vq->vq); + + return 0; +} + +void virtio_crypto_ablkcipher_finalize_req( + struct virtio_crypto_request *vc_req, + struct ablkcipher_request *req, + int err) +{ + crypto_finalize_cipher_request(vc_req->dataq->engine, req, err); + + virtcrypto_clear_request(vc_req); +} + static struct crypto_alg virtio_crypto_algs[] = { { .cra_name = "cbc(aes)", .cra_driver_name = "virtio_crypto_aes_cbc", diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index 3d6566b02876..da6d8c0ea407 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -25,6 +25,7 @@ #include #include #include +#include /* Internal representation of a data virtqueue */ @@ -37,6 +38,8 @@ struct data_queue { /* Name of the tx queue: dataq.$index */ char name[32]; + + struct crypto_engine *engine; }; struct virtio_crypto { @@ -97,6 +100,9 @@ struct virtio_crypto_request { struct virtio_crypto_op_data_req *req_data; struct scatterlist **sgs; uint8_t *iv; + /* Encryption? */ + bool encrypt; + struct data_queue *dataq; }; int virtcrypto_devmgr_add_dev(struct virtio_crypto *vcrypto_dev); @@ -110,6 +116,16 @@ int virtcrypto_dev_started(struct virtio_crypto *vcrypto_dev); struct virtio_crypto *virtcrypto_get_dev_node(int node); int virtcrypto_dev_start(struct virtio_crypto *vcrypto); void virtcrypto_dev_stop(struct virtio_crypto *vcrypto); +int virtio_crypto_ablkcipher_crypt_req( + struct crypto_engine *engine, + struct ablkcipher_request *req); +void virtio_crypto_ablkcipher_finalize_req( + struct virtio_crypto_request *vc_req, + struct ablkcipher_request *req, + int err); + +void +virtcrypto_clear_request(struct virtio_crypto_request *vc_req); static inline int virtio_crypto_get_current_node(void) { diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index fe70ec823b27..b5b153317376 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -25,7 +25,7 @@ #include "virtio_crypto_common.h" -static void +void virtcrypto_clear_request(struct virtio_crypto_request *vc_req) { if (vc_req) { @@ -66,12 +66,12 @@ static void virtcrypto_dataq_callback(struct virtqueue *vq) break; } ablk_req = vc_req->ablkcipher_req; - virtcrypto_clear_request(vc_req); spin_unlock_irqrestore( &vcrypto->data_vq[qid].lock, flags); /* Finish the encrypt or decrypt process */ - ablk_req->base.complete(&ablk_req->base, error); + virtio_crypto_ablkcipher_finalize_req(vc_req, + ablk_req, error); spin_lock_irqsave( &vcrypto->data_vq[qid].lock, flags); } @@ -87,6 +87,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) int ret = -ENOMEM; int i, total_vqs; const char **names; + struct device *dev = &vi->vdev->dev; /* * We expect 1 data virtqueue, followed by @@ -128,6 +129,15 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) for (i = 0; i < vi->max_data_queues; i++) { spin_lock_init(&vi->data_vq[i].lock); vi->data_vq[i].vq = vqs[i]; + /* Initialize crypto engine */ + vi->data_vq[i].engine = crypto_engine_alloc_init(dev, 1); + if (!vi->data_vq[i].engine) { + ret = -ENOMEM; + goto err_engine; + } + + vi->data_vq[i].engine->cipher_one_request = + virtio_crypto_ablkcipher_crypt_req; } kfree(names); @@ -136,6 +146,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) return 0; +err_engine: err_find: kfree(names); err_names: @@ -269,6 +280,38 @@ static int virtcrypto_update_status(struct virtio_crypto *vcrypto) return 0; } +static int virtcrypto_start_crypto_engines(struct virtio_crypto *vcrypto) +{ + int32_t i; + int ret; + + for (i = 0; i < vcrypto->max_data_queues; i++) { + if (vcrypto->data_vq[i].engine) { + ret = crypto_engine_start(vcrypto->data_vq[i].engine); + if (ret) + goto err; + } + } + + return 0; + +err: + while (--i >= 0) + if (vcrypto->data_vq[i].engine) + crypto_engine_exit(vcrypto->data_vq[i].engine); + + return ret; +} + +static void virtcrypto_clear_crypto_engines(struct virtio_crypto *vcrypto) +{ + u32 i; + + for (i = 0; i < vcrypto->max_data_queues; i++) + if (vcrypto->data_vq[i].engine) + crypto_engine_exit(vcrypto->data_vq[i].engine); +} + static void virtcrypto_del_vqs(struct virtio_crypto *vcrypto) { struct virtio_device *vdev = vcrypto->vdev; @@ -355,14 +398,21 @@ static int virtcrypto_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "Failed to initialize vqs.\n"); goto free_dev; } + + err = virtcrypto_start_crypto_engines(vcrypto); + if (err) + goto free_vqs; + virtio_device_ready(vdev); err = virtcrypto_update_status(vcrypto); if (err) - goto free_vqs; + goto free_engines; return 0; +free_engines: + virtcrypto_clear_crypto_engines(vcrypto); free_vqs: vcrypto->vdev->config->reset(vdev); virtcrypto_del_vqs(vcrypto); @@ -398,6 +448,7 @@ static void virtcrypto_remove(struct virtio_device *vdev) virtcrypto_dev_stop(vcrypto); vdev->config->reset(vdev); virtcrypto_free_unused_reqs(vcrypto); + virtcrypto_clear_crypto_engines(vcrypto); virtcrypto_del_vqs(vcrypto); virtcrypto_devmgr_rm_dev(vcrypto); kfree(vcrypto); @@ -420,6 +471,7 @@ static int virtcrypto_freeze(struct virtio_device *vdev) if (virtcrypto_dev_started(vcrypto)) virtcrypto_dev_stop(vcrypto); + virtcrypto_clear_crypto_engines(vcrypto); virtcrypto_del_vqs(vcrypto); return 0; } @@ -433,14 +485,26 @@ static int virtcrypto_restore(struct virtio_device *vdev) if (err) return err; + err = virtcrypto_start_crypto_engines(vcrypto); + if (err) + goto free_vqs; + virtio_device_ready(vdev); + err = virtcrypto_dev_start(vcrypto); if (err) { dev_err(&vdev->dev, "Failed to start virtio crypto device.\n"); - return -EFAULT; + goto free_engines; } return 0; + +free_engines: + virtcrypto_clear_crypto_engines(vcrypto); +free_vqs: + vcrypto->vdev->config->reset(vdev); + virtcrypto_del_vqs(vcrypto); + return err; } #endif From c821f6ab2e47946f35ee2f30781c5185e5d07f65 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 29 Dec 2016 14:09:08 +0000 Subject: [PATCH 026/142] crypto: skcipher - introduce walksize attribute for SIMD algos In some cases, SIMD algorithms can only perform optimally when allowed to operate on multiple input blocks in parallel. This is especially true for bit slicing algorithms, which typically take the same amount of time processing a single block or 8 blocks in parallel. However, other SIMD algorithms may benefit as well from bigger strides. So add a walksize attribute to the skcipher algorithm definition, and wire it up to the skcipher walk API. To avoid confusion between the skcipher and AEAD attributes, rename the skcipher_walk chunksize attribute to 'stride', and set it from the walksize (in the skcipher case) or from the chunksize (in the AEAD case). Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/skcipher.c | 20 +++++++++++------- include/crypto/internal/skcipher.h | 2 +- include/crypto/skcipher.h | 34 ++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 0e1e6c35188e..6ee6a1521e0b 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -185,12 +185,12 @@ void skcipher_walk_complete(struct skcipher_walk *walk, int err) data = p->data; if (!data) { data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1); - data = skcipher_get_spot(data, walk->chunksize); + data = skcipher_get_spot(data, walk->stride); } scatterwalk_copychunks(data, &p->dst, p->len, 1); - if (offset_in_page(p->data) + p->len + walk->chunksize > + if (offset_in_page(p->data) + p->len + walk->stride > PAGE_SIZE) free_page((unsigned long)p->data); @@ -299,7 +299,7 @@ static int skcipher_next_copy(struct skcipher_walk *walk) p->len = walk->nbytes; skcipher_queue_write(walk, p); - if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize > + if (offset_in_page(walk->page) + walk->nbytes + walk->stride > PAGE_SIZE) walk->page = NULL; else @@ -344,7 +344,7 @@ static int skcipher_walk_next(struct skcipher_walk *walk) SKCIPHER_WALK_DIFF); n = walk->total; - bsize = min(walk->chunksize, max(n, walk->blocksize)); + bsize = min(walk->stride, max(n, walk->blocksize)); n = scatterwalk_clamp(&walk->in, n); n = scatterwalk_clamp(&walk->out, n); @@ -393,7 +393,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk) unsigned a = crypto_tfm_ctx_alignment() - 1; unsigned alignmask = walk->alignmask; unsigned ivsize = walk->ivsize; - unsigned bs = walk->chunksize; + unsigned bs = walk->stride; unsigned aligned_bs; unsigned size; u8 *iv; @@ -463,7 +463,7 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, SKCIPHER_WALK_SLEEP : 0; walk->blocksize = crypto_skcipher_blocksize(tfm); - walk->chunksize = crypto_skcipher_chunksize(tfm); + walk->stride = crypto_skcipher_walksize(tfm); walk->ivsize = crypto_skcipher_ivsize(tfm); walk->alignmask = crypto_skcipher_alignmask(tfm); @@ -525,7 +525,7 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, walk->flags &= ~SKCIPHER_WALK_SLEEP; walk->blocksize = crypto_aead_blocksize(tfm); - walk->chunksize = crypto_aead_chunksize(tfm); + walk->stride = crypto_aead_chunksize(tfm); walk->ivsize = crypto_aead_ivsize(tfm); walk->alignmask = crypto_aead_alignmask(tfm); @@ -821,6 +821,7 @@ static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "max keysize : %u\n", skcipher->max_keysize); seq_printf(m, "ivsize : %u\n", skcipher->ivsize); seq_printf(m, "chunksize : %u\n", skcipher->chunksize); + seq_printf(m, "walksize : %u\n", skcipher->walksize); } #ifdef CONFIG_NET @@ -893,11 +894,14 @@ static int skcipher_prepare_alg(struct skcipher_alg *alg) { struct crypto_alg *base = &alg->base; - if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8) + if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8 || + alg->walksize > PAGE_SIZE / 8) return -EINVAL; if (!alg->chunksize) alg->chunksize = base->cra_blocksize; + if (!alg->walksize) + alg->walksize = alg->chunksize; base->cra_type = &crypto_skcipher_type2; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 8735979ed341..e42f7063f245 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -66,7 +66,7 @@ struct skcipher_walk { int flags; unsigned int blocksize; - unsigned int chunksize; + unsigned int stride; unsigned int alignmask; }; diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 750b14f1ada4..562001cb412b 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -115,6 +115,9 @@ struct crypto_skcipher { * IV of exactly that size to perform the encrypt or decrypt operation. * @chunksize: Equal to the block size except for stream ciphers such as * CTR where it is set to the underlying block size. + * @walksize: Equal to the chunk size except in cases where the algorithm is + * considerably more efficient if it can operate on multiple chunks + * in parallel. Should be a multiple of chunksize. * @base: Definition of a generic crypto algorithm. * * All fields except @ivsize are mandatory and must be filled. @@ -131,6 +134,7 @@ struct skcipher_alg { unsigned int max_keysize; unsigned int ivsize; unsigned int chunksize; + unsigned int walksize; struct crypto_alg base; }; @@ -289,6 +293,19 @@ static inline unsigned int crypto_skcipher_alg_chunksize( return alg->chunksize; } +static inline unsigned int crypto_skcipher_alg_walksize( + struct skcipher_alg *alg) +{ + if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_BLKCIPHER) + return alg->base.cra_blocksize; + + if (alg->base.cra_ablkcipher.encrypt) + return alg->base.cra_blocksize; + + return alg->walksize; +} + /** * crypto_skcipher_chunksize() - obtain chunk size * @tfm: cipher handle @@ -306,6 +323,23 @@ static inline unsigned int crypto_skcipher_chunksize( return crypto_skcipher_alg_chunksize(crypto_skcipher_alg(tfm)); } +/** + * crypto_skcipher_walksize() - obtain walk size + * @tfm: cipher handle + * + * In some cases, algorithms can only perform optimally when operating on + * multiple blocks in parallel. This is reflected by the walksize, which + * must be a multiple of the chunksize (or equal if the concern does not + * apply) + * + * Return: walk size in bytes + */ +static inline unsigned int crypto_skcipher_walksize( + struct crypto_skcipher *tfm) +{ + return crypto_skcipher_alg_walksize(crypto_skcipher_alg(tfm)); +} + /** * crypto_skcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle From d2110224a66779fc2c833d2c16caa73e2e23591e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 30 Dec 2016 14:12:00 -0600 Subject: [PATCH 027/142] crypto: testmgr - use kmemdup instead of kmalloc+memcpy It's recommended to use kmemdup instead of kmalloc followed by memcpy. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/testmgr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 44e888b0b041..881176ebd8a8 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1463,13 +1463,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, int ilen = ctemplate[i].inlen; void *input_vec; - input_vec = kmalloc(ilen, GFP_KERNEL); + input_vec = kmemdup(ctemplate[i].input, ilen, GFP_KERNEL); if (!input_vec) { ret = -ENOMEM; goto out; } - memcpy(input_vec, ctemplate[i].input, ilen); memset(output, 0, dlen); init_completion(&result.completion); sg_init_one(&src, input_vec, ilen); @@ -1525,13 +1524,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, int ilen = dtemplate[i].inlen; void *input_vec; - input_vec = kmalloc(ilen, GFP_KERNEL); + input_vec = kmemdup(dtemplate[i].input, ilen, GFP_KERNEL); if (!input_vec) { ret = -ENOMEM; goto out; } - memcpy(input_vec, dtemplate[i].input, ilen); memset(output, 0, dlen); init_completion(&result.completion); sg_init_one(&src, input_vec, ilen); From d8c34b949d8c9f61e099e00f22770e400adf2b76 Mon Sep 17 00:00:00 2001 From: Gideon Israel Dsouza Date: Sat, 31 Dec 2016 21:26:23 +0530 Subject: [PATCH 028/142] crypto: Replaced gcc specific attributes with macros from compiler.h Continuing from this commit: 52f5684c8e1e ("kernel: use macros from compiler.h instead of __attribute__((...))") I submitted 4 total patches. They are part of task I've taken up to increase compiler portability in the kernel. I've cleaned up the subsystems under /kernel /mm /block and /security, this patch targets /crypto. There is which provides macros for various gcc specific constructs. Eg: __weak for __attribute__((weak)). I've cleaned all instances of gcc specific attributes with the right macros for the crypto subsystem. I had to make one additional change into compiler-gcc.h for the case when one wants to use this: __attribute__((aligned) and not specify an alignment factor. From the gcc docs, this will result in the largest alignment for that data type on the target machine so I've named the macro __aligned_largest. Please advise if another name is more appropriate. Signed-off-by: Gideon Israel Dsouza Signed-off-by: Herbert Xu --- crypto/ablkcipher.c | 5 +++-- crypto/acompress.c | 3 ++- crypto/aead.c | 3 ++- crypto/ahash.c | 3 ++- crypto/akcipher.c | 3 ++- crypto/blkcipher.c | 7 ++++--- crypto/cts.c | 5 +++-- crypto/kpp.c | 3 ++- crypto/pcbc.c | 3 ++- crypto/rng.c | 3 ++- crypto/scompress.c | 3 ++- crypto/shash.c | 9 +++++---- crypto/skcipher.c | 3 ++- include/linux/compiler-gcc.h | 1 + 14 files changed, 34 insertions(+), 20 deletions(-) diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index d676fc59521a..d880a4897159 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -394,7 +395,7 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg) { struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher; @@ -468,7 +469,7 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg) { struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher; diff --git a/crypto/acompress.c b/crypto/acompress.c index 887783d8e9a9..47d11627cd20 100644 --- a/crypto/acompress.c +++ b/crypto/acompress.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,7 @@ static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/aead.c b/crypto/aead.c index 3f5c5ff004ab..f794b30a9407 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "internal.h" @@ -132,7 +133,7 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg) { struct aead_alg *aead = container_of(alg, struct aead_alg, base); diff --git a/crypto/ahash.c b/crypto/ahash.c index 2ce8bcb9049c..e58c4970c22b 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "internal.h" @@ -493,7 +494,7 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : ahash\n"); diff --git a/crypto/akcipher.c b/crypto/akcipher.c index def301ed1288..cfbdb06d8ca8 100644 --- a/crypto/akcipher.c +++ b/crypto/akcipher.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -47,7 +48,7 @@ static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index a832426820e8..6c43a0a17a55 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -1,6 +1,6 @@ /* * Block chaining cipher operations. - * + * * Generic encrypt/decrypt wrapper for ciphers, handles operations across * multiple page boundaries by using temporary blocks. In user context, * the kernel is given a chance to schedule us once per page. @@ -9,7 +9,7 @@ * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) + * Software Foundation; either version 2 of the License, or (at your option) * any later version. * */ @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "internal.h" @@ -534,7 +535,7 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : blkcipher\n"); diff --git a/crypto/cts.c b/crypto/cts.c index 00254d76b21b..a1335d6c35fb 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -49,6 +49,7 @@ #include #include #include +#include struct crypto_cts_ctx { struct crypto_skcipher *child; @@ -103,7 +104,7 @@ static int cts_cbc_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); - u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + u8 d[bsize * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; int lastn; @@ -183,7 +184,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); - u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + u8 d[bsize * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; u8 *space; diff --git a/crypto/kpp.c b/crypto/kpp.c index d36ce05eee43..a90edc27af77 100644 --- a/crypto/kpp.c +++ b/crypto/kpp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -47,7 +48,7 @@ static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/pcbc.c b/crypto/pcbc.c index e4538e07f7ca..11d248673ad4 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -20,6 +20,7 @@ #include #include #include +#include struct crypto_pcbc_ctx { struct crypto_cipher *child; @@ -146,7 +147,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req, unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 *iv = walk->iv; - u8 tmpbuf[bsize] __attribute__ ((aligned(__alignof__(u32)))); + u8 tmpbuf[bsize] __aligned(__alignof__(u32)); do { memcpy(tmpbuf, src, bsize); diff --git a/crypto/rng.c b/crypto/rng.c index b81cffb13bab..f46dac5288b9 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "internal.h" @@ -95,7 +96,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : rng\n"); diff --git a/crypto/scompress.c b/crypto/scompress.c index 35e396d154b7..6b048b36312d 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -57,7 +58,7 @@ static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/shash.c b/crypto/shash.c index a051541a4a17..5e31c8d776df 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "internal.h" @@ -67,7 +68,7 @@ EXPORT_SYMBOL_GPL(crypto_shash_setkey); static inline unsigned int shash_align_buffer_size(unsigned len, unsigned long mask) { - typedef u8 __attribute__ ((aligned)) u8_aligned; + typedef u8 __aligned_largest u8_aligned; return len + (mask & ~(__alignof__(u8_aligned) - 1)); } @@ -80,7 +81,7 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data, unsigned int unaligned_len = alignmask + 1 - ((unsigned long)data & alignmask); u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)] - __attribute__ ((aligned)); + __aligned_largest; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; @@ -116,7 +117,7 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out) struct shash_alg *shash = crypto_shash_alg(tfm); unsigned int ds = crypto_shash_digestsize(tfm); u8 ubuf[shash_align_buffer_size(ds, alignmask)] - __attribute__ ((aligned)); + __aligned_largest; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; @@ -403,7 +404,7 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) { struct shash_alg *salg = __crypto_shash_alg(alg); diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 6ee6a1521e0b..014af741fc6a 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -807,7 +808,7 @@ static void crypto_skcipher_free_instance(struct crypto_instance *inst) } static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) { struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg, diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 0444b1336268..fddd1a5eb322 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -116,6 +116,7 @@ */ #define __pure __attribute__((pure)) #define __aligned(x) __attribute__((aligned(x))) +#define __aligned_largest __attribute__((aligned)) #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) #define __attribute_const__ __attribute__((__const__)) From 4f44d86df083c3bc91d2fc315fc133c514d26a0d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 2 Jan 2017 14:06:57 -0300 Subject: [PATCH 029/142] crypto: picoxcell - Allow driver to build COMPILE_TEST is enabled Driver only has runtime but no build time dependency with ARCH_PICOXCELL. So it can be built for testing purposes if COMPILE_TEST option is enabled. This is useful to have more build coverage and make sure that the driver is not affected by changes that could cause build regressions. Suggested-by: Arnd Bergmann Signed-off-by: Javier Martinez Canillas Acked-by: Jamie Iles Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 8ded3af88b16..18d2d52c3356 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -339,7 +339,7 @@ config CRYPTO_DEV_OMAP_DES config CRYPTO_DEV_PICOXCELL tristate "Support for picoXcell IPSEC and Layer2 crypto engines" - depends on ARCH_PICOXCELL && HAVE_CLK + depends on (ARCH_PICOXCELL || COMPILE_TEST) && HAVE_CLK select CRYPTO_AEAD select CRYPTO_AES select CRYPTO_AUTHENC From 6b2be1ddbc427922231be712677d4fec37e9772a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 2 Jan 2017 14:06:58 -0300 Subject: [PATCH 030/142] crypto: picoxcell - Remove platform device ID table This driver is only used in the picoxcell platform and this is DT-only. So only a OF device ID table is needed and there's no need to have a platform device ID table. This patch removes the unneeded table. Suggested-by: Arnd Bergmann Signed-off-by: Javier Martinez Canillas Acked-by: Jamie Iles Signed-off-by: Herbert Xu --- drivers/crypto/picoxcell_crypto.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 47576098831f..539effbbfc7a 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1803,12 +1803,6 @@ static int spacc_remove(struct platform_device *pdev) return 0; } -static const struct platform_device_id spacc_id_table[] = { - { "picochip,spacc-ipsec", }, - { "picochip,spacc-l2", }, - { } -}; - static struct platform_driver spacc_driver = { .probe = spacc_probe, .remove = spacc_remove, @@ -1819,7 +1813,6 @@ static struct platform_driver spacc_driver = { #endif /* CONFIG_PM */ .of_match_table = of_match_ptr(spacc_of_id_table), }, - .id_table = spacc_id_table, }; module_platform_driver(spacc_driver); From 012ef70336a0dfb33ebe30cd80ec86aa1fe61f79 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 2 Jan 2017 14:06:59 -0300 Subject: [PATCH 031/142] crypto: picoxcell - Remove spacc_is_compatible() wrapper function The function is used to check either the platform device ID name or the OF node's compatible (depending how the device was registered) to know which device type was registered. But the driver is for a DT-only platform and so there's no need for this level of indirection since the devices can only be registered via OF. Suggested-by: Arnd Bergmann Signed-off-by: Javier Martinez Canillas Acked-by: Jamie Iles Signed-off-by: Herbert Xu --- drivers/crypto/picoxcell_crypto.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 539effbbfc7a..b6f14844702e 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1616,32 +1616,17 @@ static const struct of_device_id spacc_of_id_table[] = { MODULE_DEVICE_TABLE(of, spacc_of_id_table); #endif /* CONFIG_OF */ -static bool spacc_is_compatible(struct platform_device *pdev, - const char *spacc_type) -{ - const struct platform_device_id *platid = platform_get_device_id(pdev); - - if (platid && !strcmp(platid->name, spacc_type)) - return true; - -#ifdef CONFIG_OF - if (of_device_is_compatible(pdev->dev.of_node, spacc_type)) - return true; -#endif /* CONFIG_OF */ - - return false; -} - static int spacc_probe(struct platform_device *pdev) { int i, err, ret = -EINVAL; struct resource *mem, *irq; + struct device_node *np = pdev->dev.of_node; struct spacc_engine *engine = devm_kzalloc(&pdev->dev, sizeof(*engine), GFP_KERNEL); if (!engine) return -ENOMEM; - if (spacc_is_compatible(pdev, "picochip,spacc-ipsec")) { + if (of_device_is_compatible(np, "picochip,spacc-ipsec")) { engine->max_ctxs = SPACC_CRYPTO_IPSEC_MAX_CTXS; engine->cipher_pg_sz = SPACC_CRYPTO_IPSEC_CIPHER_PG_SZ; engine->hash_pg_sz = SPACC_CRYPTO_IPSEC_HASH_PG_SZ; @@ -1650,7 +1635,7 @@ static int spacc_probe(struct platform_device *pdev) engine->num_algs = ARRAY_SIZE(ipsec_engine_algs); engine->aeads = ipsec_engine_aeads; engine->num_aeads = ARRAY_SIZE(ipsec_engine_aeads); - } else if (spacc_is_compatible(pdev, "picochip,spacc-l2")) { + } else if (of_device_is_compatible(np, "picochip,spacc-l2")) { engine->max_ctxs = SPACC_CRYPTO_L2_MAX_CTXS; engine->cipher_pg_sz = SPACC_CRYPTO_L2_CIPHER_PG_SZ; engine->hash_pg_sz = SPACC_CRYPTO_L2_HASH_PG_SZ; From f283148a0774e9b2de5ae4b42bb7dff50410f8cc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 3 Jan 2017 13:21:22 +0000 Subject: [PATCH 032/142] crypto: mediatek - don't return garbage err on successful return In the case where keylen <= bs mtk_sha_setkey returns an uninitialized return value in err. Fix this by returning 0 instead of err. Issue detected by static analysis with cppcheck. Signed-off-by: Colin Ian King Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-sha.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index 89513632c8ed..8e1b440fedf0 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -878,7 +878,7 @@ static int mtk_sha_setkey(struct crypto_ahash *tfm, bctx->opad[i] ^= 0x5c; } - return err; + return 0; } static int mtk_sha_export(struct ahash_request *req, void *out) From e93acd6f6778825a4e0b5a1a9b70324bf901d516 Mon Sep 17 00:00:00 2001 From: Andrew Lutomirski Date: Tue, 10 Jan 2017 15:24:46 -0800 Subject: [PATCH 033/142] crypto: testmgr - Allocate only the required output size for hash tests There are some hashes (e.g. sha224) that have some internal trickery to make sure that only the correct number of output bytes are generated. If something goes wrong, they could potentially overrun the output buffer. Make the test more robust by allocating only enough space for the correct output size so that memory debugging will catch the error if the output is overrun. Tested by intentionally breaking sha224 to output all 256 internally-generated bits while running on KASAN. Cc: Ard Biesheuvel Cc: Herbert Xu Signed-off-by: Andy Lutomirski Signed-off-by: Herbert Xu --- crypto/testmgr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 881176ebd8a8..500a5277cc22 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -265,6 +265,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, const int align_offset) { const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm)); + size_t digest_size = crypto_ahash_digestsize(tfm); unsigned int i, j, k, temp; struct scatterlist sg[8]; char *result; @@ -275,7 +276,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, char *xbuf[XBUFSIZE]; int ret = -ENOMEM; - result = kmalloc(MAX_DIGEST_SIZE, GFP_KERNEL); + result = kmalloc(digest_size, GFP_KERNEL); if (!result) return ret; key = kmalloc(MAX_KEYLEN, GFP_KERNEL); @@ -305,7 +306,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, goto out; j++; - memset(result, 0, MAX_DIGEST_SIZE); + memset(result, 0, digest_size); hash_buff = xbuf[0]; hash_buff += align_offset; @@ -380,7 +381,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, continue; j++; - memset(result, 0, MAX_DIGEST_SIZE); + memset(result, 0, digest_size); temp = 0; sg_init_table(sg, template[i].np); @@ -458,7 +459,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, continue; j++; - memset(result, 0, MAX_DIGEST_SIZE); + memset(result, 0, digest_size); ret = -EINVAL; hash_buff = xbuf[0]; From b8fbe71f7535d4dfeed0bb8d924107dc58d502e2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 11 Jan 2017 20:28:06 +0800 Subject: [PATCH 034/142] crypto: x86/chacha20 - Manually align stack buffer The kernel on x86-64 cannot use gcc attribute align to align to a 16-byte boundary. This patch reverts to the old way of aligning it by hand. Fixes: 9ae433bc79f9 ("crypto: chacha20 - convert generic and...") Signed-off-by: Herbert Xu Reviewed-by: Ard Biesheuvel --- arch/x86/crypto/chacha20_glue.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 78f75b07dc25..1e6af1b35f7b 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -67,10 +67,13 @@ static int chacha20_simd(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 state[16] __aligned(CHACHA20_STATE_ALIGN); + u32 *state, state_buf[16 + 2] __aligned(8); struct skcipher_walk walk; int err; + BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16); + state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN); + if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) return crypto_chacha20_crypt(req); From 7dee9f618790d0b723b29c46da10bffc7974a772 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 14:50:19 +0100 Subject: [PATCH 035/142] crypto: mediatek - remove ARM dependencies Building the mediatek driver on an older ARM architecture results in a harmless warning: warning: (ARCH_OMAP2PLUS_TYPICAL && CRYPTO_DEV_MEDIATEK) selects NEON which has unmet direct dependencies (VFPv3 && CPU_V7) We could add an explicit dependency on CPU_V7, but it seems nicer to open up the build to additional configurations. This replaces the ARM optimized algorithm selection with the normal one that all other drivers use, and that in turn lets us relax the dependency on ARM and drop a number of the unrelated 'select' statements. Obviously a real user would still select those other optimized drivers as a fallback, but as there is no strict dependency, we can leave that up to the user. Fixes: 785e5c616c84 ("crypto: mediatek - Add crypto driver support for some MediaTek chips") Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 18d2d52c3356..ee5057a2914b 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -555,15 +555,12 @@ config CRYPTO_DEV_ROCKCHIP config CRYPTO_DEV_MEDIATEK tristate "MediaTek's EIP97 Cryptographic Engine driver" - depends on ARM && (ARCH_MEDIATEK || COMPILE_TEST) - select NEON - select KERNEL_MODE_NEON - select ARM_CRYPTO + depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST select CRYPTO_AES select CRYPTO_BLKCIPHER - select CRYPTO_SHA1_ARM_NEON - select CRYPTO_SHA256_ARM - select CRYPTO_SHA512_ARM + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_SHA512 select CRYPTO_HMAC help This driver allows you to utilize the hardware crypto accelerator From 41e05324fde983b1342f86239868f1959f6032a5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 14:55:20 +0100 Subject: [PATCH 036/142] crypto: mediatek - fix format string for 64-bit builds After I enabled COMPILE_TEST for non-ARM targets, I ran into these warnings: crypto/mediatek/mtk-aes.c: In function 'mtk_aes_info_map': crypto/mediatek/mtk-aes.c:224:28: error: format '%d' expects argument of type 'int', but argument 3 has type 'long unsigned int' [-Werror=format=] dev_err(cryp->dev, "dma %d bytes error\n", sizeof(*info)); crypto/mediatek/mtk-sha.c:344:28: error: format '%d' expects argument of type 'int', but argument 3 has type 'long unsigned int' [-Werror=format=] crypto/mediatek/mtk-sha.c:550:21: error: format '%u' expects argument of type 'unsigned int', but argument 4 has type 'size_t {aka long unsigned int}' [-Werror=format=] The correct format for size_t is %zu, so use that in all three cases. Fixes: 785e5c616c84 ("crypto: mediatek - Add crypto driver support for some MediaTek chips") Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-aes.c | 2 +- drivers/crypto/mediatek/mtk-sha.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c index 3271471060d9..1370cabeeb5b 100644 --- a/drivers/crypto/mediatek/mtk-aes.c +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -221,7 +221,7 @@ static int mtk_aes_info_map(struct mtk_cryp *cryp, aes->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(cryp->dev, aes->ct_dma))) { - dev_err(cryp->dev, "dma %d bytes error\n", sizeof(*info)); + dev_err(cryp->dev, "dma %zu bytes error\n", sizeof(*info)); return -EINVAL; } aes->tfm_dma = aes->ct_dma + sizeof(*ct); diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index 8e1b440fedf0..f1e188bc203c 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -341,7 +341,7 @@ static int mtk_sha_info_map(struct mtk_cryp *cryp, sha->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), DMA_BIDIRECTIONAL); if (unlikely(dma_mapping_error(cryp->dev, sha->ct_dma))) { - dev_err(cryp->dev, "dma %d bytes error\n", sizeof(*info)); + dev_err(cryp->dev, "dma %zu bytes error\n", sizeof(*info)); return -EINVAL; } sha->tfm_dma = sha->ct_dma + sizeof(*ct); @@ -547,7 +547,7 @@ static int mtk_sha_update_slow(struct mtk_cryp *cryp, final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; - dev_dbg(cryp->dev, "slow: bufcnt: %u\n", ctx->bufcnt); + dev_dbg(cryp->dev, "slow: bufcnt: %zu\n", ctx->bufcnt); if (final) { sha->flags |= SHA_FLAGS_FINAL; From b7171ce9eb523fd90e38f2d138d1b6ed2ff3eafd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jan 2017 16:41:49 +0000 Subject: [PATCH 037/142] crypto: arm64/chacha20 - implement NEON version based on SSE3 code This is a straight port to arm64/NEON of the x86 SSE3 implementation of the ChaCha20 stream cipher. It uses the new skcipher walksize attribute to process the input in strides of 4x the block size. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 6 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/chacha20-neon-core.S | 450 +++++++++++++++++++++++++ arch/arm64/crypto/chacha20-neon-glue.c | 127 +++++++ 4 files changed, 586 insertions(+) create mode 100644 arch/arm64/crypto/chacha20-neon-core.S create mode 100644 arch/arm64/crypto/chacha20-neon-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 450a85df041a..0bf0f531f539 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -72,4 +72,10 @@ config CRYPTO_CRC32_ARM64 depends on ARM64 select CRYPTO_HASH +config CRYPTO_CHACHA20_NEON + tristate "NEON accelerated ChaCha20 symmetric cipher" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_CHACHA20 + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index aa8888d7b744..9d2826c5fccf 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -41,6 +41,9 @@ sha256-arm64-y := sha256-glue.o sha256-core.o obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o sha512-arm64-y := sha512-glue.o sha512-core.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o +chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o + AFLAGS_aes-ce.o := -DINTERLEAVE=4 AFLAGS_aes-neon.o := -DINTERLEAVE=4 diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S new file mode 100644 index 000000000000..13c85e272c2a --- /dev/null +++ b/arch/arm64/crypto/chacha20-neon-core.S @@ -0,0 +1,450 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + + .text + .align 6 + +ENTRY(chacha20_block_xor_neon) + // x0: Input state matrix, s + // x1: 1 data block output, o + // x2: 1 data block input, i + + // + // This function encrypts one ChaCha20 block by loading the state matrix + // in four NEON registers. It performs matrix operation on four words in + // parallel, but requires shuffling to rearrange the words after each + // round. + // + + // x0..3 = s0..3 + adr x3, ROT8 + ld1 {v0.4s-v3.4s}, [x0] + ld1 {v8.4s-v11.4s}, [x0] + ld1 {v12.4s}, [x3] + + mov x3, #10 + +.Ldoubleround: + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + rev32 v3.8h, v3.8h + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #12 + sri v1.4s, v4.4s, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + tbl v3.16b, {v3.16b}, v12.16b + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #7 + sri v1.4s, v4.4s, #25 + + // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + ext v1.16b, v1.16b, v1.16b, #4 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + ext v2.16b, v2.16b, v2.16b, #8 + // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + ext v3.16b, v3.16b, v3.16b, #12 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + rev32 v3.8h, v3.8h + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #12 + sri v1.4s, v4.4s, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + tbl v3.16b, {v3.16b}, v12.16b + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #7 + sri v1.4s, v4.4s, #25 + + // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + ext v1.16b, v1.16b, v1.16b, #12 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + ext v2.16b, v2.16b, v2.16b, #8 + // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + ext v3.16b, v3.16b, v3.16b, #4 + + subs x3, x3, #1 + b.ne .Ldoubleround + + ld1 {v4.16b-v7.16b}, [x2] + + // o0 = i0 ^ (x0 + s0) + add v0.4s, v0.4s, v8.4s + eor v0.16b, v0.16b, v4.16b + + // o1 = i1 ^ (x1 + s1) + add v1.4s, v1.4s, v9.4s + eor v1.16b, v1.16b, v5.16b + + // o2 = i2 ^ (x2 + s2) + add v2.4s, v2.4s, v10.4s + eor v2.16b, v2.16b, v6.16b + + // o3 = i3 ^ (x3 + s3) + add v3.4s, v3.4s, v11.4s + eor v3.16b, v3.16b, v7.16b + + st1 {v0.16b-v3.16b}, [x1] + + ret +ENDPROC(chacha20_block_xor_neon) + + .align 6 +ENTRY(chacha20_4block_xor_neon) + // x0: Input state matrix, s + // x1: 4 data blocks output, o + // x2: 4 data blocks input, i + + // + // This function encrypts four consecutive ChaCha20 blocks by loading + // the state matrix in NEON registers four times. The algorithm performs + // each operation on the corresponding word of each state matrix, hence + // requires no word shuffling. For final XORing step we transpose the + // matrix by interleaving 32- and then 64-bit words, which allows us to + // do XOR in NEON registers. + // + adr x3, CTRINC // ... and ROT8 + ld1 {v30.4s-v31.4s}, [x3] + + // x0..15[0-3] = s0..3[0..3] + mov x4, x0 + ld4r { v0.4s- v3.4s}, [x4], #16 + ld4r { v4.4s- v7.4s}, [x4], #16 + ld4r { v8.4s-v11.4s}, [x4], #16 + ld4r {v12.4s-v15.4s}, [x4] + + // x12 += counter values 0-3 + add v12.4s, v12.4s, v30.4s + + mov x3, #10 + +.Ldoubleround4: + // x0 += x4, x12 = rotl32(x12 ^ x0, 16) + // x1 += x5, x13 = rotl32(x13 ^ x1, 16) + // x2 += x6, x14 = rotl32(x14 ^ x2, 16) + // x3 += x7, x15 = rotl32(x15 ^ x3, 16) + add v0.4s, v0.4s, v4.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + + eor v12.16b, v12.16b, v0.16b + eor v13.16b, v13.16b, v1.16b + eor v14.16b, v14.16b, v2.16b + eor v15.16b, v15.16b, v3.16b + + rev32 v12.8h, v12.8h + rev32 v13.8h, v13.8h + rev32 v14.8h, v14.8h + rev32 v15.8h, v15.8h + + // x8 += x12, x4 = rotl32(x4 ^ x8, 12) + // x9 += x13, x5 = rotl32(x5 ^ x9, 12) + // x10 += x14, x6 = rotl32(x6 ^ x10, 12) + // x11 += x15, x7 = rotl32(x7 ^ x11, 12) + add v8.4s, v8.4s, v12.4s + add v9.4s, v9.4s, v13.4s + add v10.4s, v10.4s, v14.4s + add v11.4s, v11.4s, v15.4s + + eor v16.16b, v4.16b, v8.16b + eor v17.16b, v5.16b, v9.16b + eor v18.16b, v6.16b, v10.16b + eor v19.16b, v7.16b, v11.16b + + shl v4.4s, v16.4s, #12 + shl v5.4s, v17.4s, #12 + shl v6.4s, v18.4s, #12 + shl v7.4s, v19.4s, #12 + + sri v4.4s, v16.4s, #20 + sri v5.4s, v17.4s, #20 + sri v6.4s, v18.4s, #20 + sri v7.4s, v19.4s, #20 + + // x0 += x4, x12 = rotl32(x12 ^ x0, 8) + // x1 += x5, x13 = rotl32(x13 ^ x1, 8) + // x2 += x6, x14 = rotl32(x14 ^ x2, 8) + // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + add v0.4s, v0.4s, v4.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + + eor v12.16b, v12.16b, v0.16b + eor v13.16b, v13.16b, v1.16b + eor v14.16b, v14.16b, v2.16b + eor v15.16b, v15.16b, v3.16b + + tbl v12.16b, {v12.16b}, v31.16b + tbl v13.16b, {v13.16b}, v31.16b + tbl v14.16b, {v14.16b}, v31.16b + tbl v15.16b, {v15.16b}, v31.16b + + // x8 += x12, x4 = rotl32(x4 ^ x8, 7) + // x9 += x13, x5 = rotl32(x5 ^ x9, 7) + // x10 += x14, x6 = rotl32(x6 ^ x10, 7) + // x11 += x15, x7 = rotl32(x7 ^ x11, 7) + add v8.4s, v8.4s, v12.4s + add v9.4s, v9.4s, v13.4s + add v10.4s, v10.4s, v14.4s + add v11.4s, v11.4s, v15.4s + + eor v16.16b, v4.16b, v8.16b + eor v17.16b, v5.16b, v9.16b + eor v18.16b, v6.16b, v10.16b + eor v19.16b, v7.16b, v11.16b + + shl v4.4s, v16.4s, #7 + shl v5.4s, v17.4s, #7 + shl v6.4s, v18.4s, #7 + shl v7.4s, v19.4s, #7 + + sri v4.4s, v16.4s, #25 + sri v5.4s, v17.4s, #25 + sri v6.4s, v18.4s, #25 + sri v7.4s, v19.4s, #25 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 16) + // x1 += x6, x12 = rotl32(x12 ^ x1, 16) + // x2 += x7, x13 = rotl32(x13 ^ x2, 16) + // x3 += x4, x14 = rotl32(x14 ^ x3, 16) + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v4.4s + + eor v15.16b, v15.16b, v0.16b + eor v12.16b, v12.16b, v1.16b + eor v13.16b, v13.16b, v2.16b + eor v14.16b, v14.16b, v3.16b + + rev32 v15.8h, v15.8h + rev32 v12.8h, v12.8h + rev32 v13.8h, v13.8h + rev32 v14.8h, v14.8h + + // x10 += x15, x5 = rotl32(x5 ^ x10, 12) + // x11 += x12, x6 = rotl32(x6 ^ x11, 12) + // x8 += x13, x7 = rotl32(x7 ^ x8, 12) + // x9 += x14, x4 = rotl32(x4 ^ x9, 12) + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v12.4s + add v8.4s, v8.4s, v13.4s + add v9.4s, v9.4s, v14.4s + + eor v16.16b, v5.16b, v10.16b + eor v17.16b, v6.16b, v11.16b + eor v18.16b, v7.16b, v8.16b + eor v19.16b, v4.16b, v9.16b + + shl v5.4s, v16.4s, #12 + shl v6.4s, v17.4s, #12 + shl v7.4s, v18.4s, #12 + shl v4.4s, v19.4s, #12 + + sri v5.4s, v16.4s, #20 + sri v6.4s, v17.4s, #20 + sri v7.4s, v18.4s, #20 + sri v4.4s, v19.4s, #20 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 8) + // x1 += x6, x12 = rotl32(x12 ^ x1, 8) + // x2 += x7, x13 = rotl32(x13 ^ x2, 8) + // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v4.4s + + eor v15.16b, v15.16b, v0.16b + eor v12.16b, v12.16b, v1.16b + eor v13.16b, v13.16b, v2.16b + eor v14.16b, v14.16b, v3.16b + + tbl v15.16b, {v15.16b}, v31.16b + tbl v12.16b, {v12.16b}, v31.16b + tbl v13.16b, {v13.16b}, v31.16b + tbl v14.16b, {v14.16b}, v31.16b + + // x10 += x15, x5 = rotl32(x5 ^ x10, 7) + // x11 += x12, x6 = rotl32(x6 ^ x11, 7) + // x8 += x13, x7 = rotl32(x7 ^ x8, 7) + // x9 += x14, x4 = rotl32(x4 ^ x9, 7) + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v12.4s + add v8.4s, v8.4s, v13.4s + add v9.4s, v9.4s, v14.4s + + eor v16.16b, v5.16b, v10.16b + eor v17.16b, v6.16b, v11.16b + eor v18.16b, v7.16b, v8.16b + eor v19.16b, v4.16b, v9.16b + + shl v5.4s, v16.4s, #7 + shl v6.4s, v17.4s, #7 + shl v7.4s, v18.4s, #7 + shl v4.4s, v19.4s, #7 + + sri v5.4s, v16.4s, #25 + sri v6.4s, v17.4s, #25 + sri v7.4s, v18.4s, #25 + sri v4.4s, v19.4s, #25 + + subs x3, x3, #1 + b.ne .Ldoubleround4 + + ld4r {v16.4s-v19.4s}, [x0], #16 + ld4r {v20.4s-v23.4s}, [x0], #16 + + // x12 += counter values 0-3 + add v12.4s, v12.4s, v30.4s + + // x0[0-3] += s0[0] + // x1[0-3] += s0[1] + // x2[0-3] += s0[2] + // x3[0-3] += s0[3] + add v0.4s, v0.4s, v16.4s + add v1.4s, v1.4s, v17.4s + add v2.4s, v2.4s, v18.4s + add v3.4s, v3.4s, v19.4s + + ld4r {v24.4s-v27.4s}, [x0], #16 + ld4r {v28.4s-v31.4s}, [x0] + + // x4[0-3] += s1[0] + // x5[0-3] += s1[1] + // x6[0-3] += s1[2] + // x7[0-3] += s1[3] + add v4.4s, v4.4s, v20.4s + add v5.4s, v5.4s, v21.4s + add v6.4s, v6.4s, v22.4s + add v7.4s, v7.4s, v23.4s + + // x8[0-3] += s2[0] + // x9[0-3] += s2[1] + // x10[0-3] += s2[2] + // x11[0-3] += s2[3] + add v8.4s, v8.4s, v24.4s + add v9.4s, v9.4s, v25.4s + add v10.4s, v10.4s, v26.4s + add v11.4s, v11.4s, v27.4s + + // x12[0-3] += s3[0] + // x13[0-3] += s3[1] + // x14[0-3] += s3[2] + // x15[0-3] += s3[3] + add v12.4s, v12.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v14.4s, v14.4s, v30.4s + add v15.4s, v15.4s, v31.4s + + // interleave 32-bit words in state n, n+1 + zip1 v16.4s, v0.4s, v1.4s + zip2 v17.4s, v0.4s, v1.4s + zip1 v18.4s, v2.4s, v3.4s + zip2 v19.4s, v2.4s, v3.4s + zip1 v20.4s, v4.4s, v5.4s + zip2 v21.4s, v4.4s, v5.4s + zip1 v22.4s, v6.4s, v7.4s + zip2 v23.4s, v6.4s, v7.4s + zip1 v24.4s, v8.4s, v9.4s + zip2 v25.4s, v8.4s, v9.4s + zip1 v26.4s, v10.4s, v11.4s + zip2 v27.4s, v10.4s, v11.4s + zip1 v28.4s, v12.4s, v13.4s + zip2 v29.4s, v12.4s, v13.4s + zip1 v30.4s, v14.4s, v15.4s + zip2 v31.4s, v14.4s, v15.4s + + // interleave 64-bit words in state n, n+2 + zip1 v0.2d, v16.2d, v18.2d + zip2 v4.2d, v16.2d, v18.2d + zip1 v8.2d, v17.2d, v19.2d + zip2 v12.2d, v17.2d, v19.2d + ld1 {v16.16b-v19.16b}, [x2], #64 + + zip1 v1.2d, v20.2d, v22.2d + zip2 v5.2d, v20.2d, v22.2d + zip1 v9.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + ld1 {v20.16b-v23.16b}, [x2], #64 + + zip1 v2.2d, v24.2d, v26.2d + zip2 v6.2d, v24.2d, v26.2d + zip1 v10.2d, v25.2d, v27.2d + zip2 v14.2d, v25.2d, v27.2d + ld1 {v24.16b-v27.16b}, [x2], #64 + + zip1 v3.2d, v28.2d, v30.2d + zip2 v7.2d, v28.2d, v30.2d + zip1 v11.2d, v29.2d, v31.2d + zip2 v15.2d, v29.2d, v31.2d + ld1 {v28.16b-v31.16b}, [x2] + + // xor with corresponding input, write to output + eor v16.16b, v16.16b, v0.16b + eor v17.16b, v17.16b, v1.16b + eor v18.16b, v18.16b, v2.16b + eor v19.16b, v19.16b, v3.16b + eor v20.16b, v20.16b, v4.16b + eor v21.16b, v21.16b, v5.16b + st1 {v16.16b-v19.16b}, [x1], #64 + eor v22.16b, v22.16b, v6.16b + eor v23.16b, v23.16b, v7.16b + eor v24.16b, v24.16b, v8.16b + eor v25.16b, v25.16b, v9.16b + st1 {v20.16b-v23.16b}, [x1], #64 + eor v26.16b, v26.16b, v10.16b + eor v27.16b, v27.16b, v11.16b + eor v28.16b, v28.16b, v12.16b + st1 {v24.16b-v27.16b}, [x1], #64 + eor v29.16b, v29.16b, v13.16b + eor v30.16b, v30.16b, v14.16b + eor v31.16b, v31.16b, v15.16b + st1 {v28.16b-v31.16b}, [x1] + + ret +ENDPROC(chacha20_4block_xor_neon) + +CTRINC: .word 0, 1, 2, 3 +ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c new file mode 100644 index 000000000000..a7f2337d46cf --- /dev/null +++ b/arch/arm64/crypto/chacha20-neon-glue.c @@ -0,0 +1,127 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include +#include + +asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); +asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); + +static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes) +{ + u8 buf[CHACHA20_BLOCK_SIZE]; + + while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + chacha20_4block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE * 4; + src += CHACHA20_BLOCK_SIZE * 4; + dst += CHACHA20_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA20_BLOCK_SIZE) { + chacha20_block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE; + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha20_block_xor_neon(state, buf, buf); + memcpy(dst, buf, bytes); + } +} + +static int chacha20_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + u32 state[16]; + int err; + + if (req->cryptlen <= CHACHA20_BLOCK_SIZE) + return crypto_chacha20_crypt(req); + + err = skcipher_walk_virt(&walk, req, true); + + crypto_chacha20_init(state, ctx, walk.iv); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + kernel_neon_end(); + + return err; +} + +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .walksize = 4 * CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_neon, + .decrypt = chacha20_neon, +}; + +static int __init chacha20_simd_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + + return crypto_register_skcipher(&alg); +} + +static void __exit chacha20_simd_mod_fini(void) +{ + crypto_unregister_skcipher(&alg); +} + +module_init(chacha20_simd_mod_init); +module_exit(chacha20_simd_mod_fini); + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); From afaf712e99a7e52e2520335ddefee6a9d84a6db5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jan 2017 16:41:50 +0000 Subject: [PATCH 038/142] crypto: arm/chacha20 - implement NEON version based on SSE3 code This is a straight port to ARM/NEON of the x86 SSE3 implementation of the ChaCha20 stream cipher. It uses the new skcipher walksize attribute to process the input in strides of 4x the block size. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 6 + arch/arm/crypto/Makefile | 2 + arch/arm/crypto/chacha20-neon-core.S | 523 +++++++++++++++++++++++++++ arch/arm/crypto/chacha20-neon-glue.c | 128 +++++++ 4 files changed, 659 insertions(+) create mode 100644 arch/arm/crypto/chacha20-neon-core.S create mode 100644 arch/arm/crypto/chacha20-neon-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 13f1b4c289d4..2f3339f015d3 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -130,4 +130,10 @@ config CRYPTO_CRC32_ARM_CE depends on KERNEL_MODE_NEON && CRC32 select CRYPTO_HASH +config CRYPTO_CHACHA20_NEON + tristate "NEON accelerated ChaCha20 symmetric cipher" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_CHACHA20 + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b578a1820ab1..8d74e55eacd4 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -40,6 +41,7 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o +chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S new file mode 100644 index 000000000000..3fecb2124c35 --- /dev/null +++ b/arch/arm/crypto/chacha20-neon-core.S @@ -0,0 +1,523 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + + .text + .fpu neon + .align 5 + +ENTRY(chacha20_block_xor_neon) + // r0: Input state matrix, s + // r1: 1 data block output, o + // r2: 1 data block input, i + + // + // This function encrypts one ChaCha20 block by loading the state matrix + // in four NEON registers. It performs matrix operation on four words in + // parallel, but requireds shuffling to rearrange the words after each + // round. + // + + // x0..3 = s0..3 + add ip, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [ip] + + vmov q8, q0 + vmov q9, q1 + vmov q10, q2 + vmov q11, q3 + + mov r3, #10 + +.Ldoubleround: + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #16 + vsri.u32 q3, q4, #16 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #12 + vsri.u32 q1, q4, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #8 + vsri.u32 q3, q4, #24 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #7 + vsri.u32 q1, q4, #25 + + // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vext.8 q1, q1, q1, #4 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vext.8 q2, q2, q2, #8 + // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vext.8 q3, q3, q3, #12 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #16 + vsri.u32 q3, q4, #16 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #12 + vsri.u32 q1, q4, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #8 + vsri.u32 q3, q4, #24 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #7 + vsri.u32 q1, q4, #25 + + // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vext.8 q1, q1, q1, #12 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vext.8 q2, q2, q2, #8 + // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vext.8 q3, q3, q3, #4 + + subs r3, r3, #1 + bne .Ldoubleround + + add ip, r2, #0x20 + vld1.8 {q4-q5}, [r2] + vld1.8 {q6-q7}, [ip] + + // o0 = i0 ^ (x0 + s0) + vadd.i32 q0, q0, q8 + veor q0, q0, q4 + + // o1 = i1 ^ (x1 + s1) + vadd.i32 q1, q1, q9 + veor q1, q1, q5 + + // o2 = i2 ^ (x2 + s2) + vadd.i32 q2, q2, q10 + veor q2, q2, q6 + + // o3 = i3 ^ (x3 + s3) + vadd.i32 q3, q3, q11 + veor q3, q3, q7 + + add ip, r1, #0x20 + vst1.8 {q0-q1}, [r1] + vst1.8 {q2-q3}, [ip] + + bx lr +ENDPROC(chacha20_block_xor_neon) + + .align 5 +ENTRY(chacha20_4block_xor_neon) + push {r4-r6, lr} + mov ip, sp // preserve the stack pointer + sub r3, sp, #0x20 // allocate a 32 byte buffer + bic r3, r3, #0x1f // aligned to 32 bytes + mov sp, r3 + + // r0: Input state matrix, s + // r1: 4 data blocks output, o + // r2: 4 data blocks input, i + + // + // This function encrypts four consecutive ChaCha20 blocks by loading + // the state matrix in NEON registers four times. The algorithm performs + // each operation on the corresponding word of each state matrix, hence + // requires no word shuffling. For final XORing step we transpose the + // matrix by interleaving 32- and then 64-bit words, which allows us to + // do XOR in NEON registers. + // + + // x0..15[0-3] = s0..3[0..3] + add r3, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [r3] + + adr r3, CTRINC + vdup.32 q15, d7[1] + vdup.32 q14, d7[0] + vld1.32 {q11}, [r3, :128] + vdup.32 q13, d6[1] + vdup.32 q12, d6[0] + vadd.i32 q12, q12, q11 // x12 += counter values 0-3 + vdup.32 q11, d5[1] + vdup.32 q10, d5[0] + vdup.32 q9, d4[1] + vdup.32 q8, d4[0] + vdup.32 q7, d3[1] + vdup.32 q6, d3[0] + vdup.32 q5, d2[1] + vdup.32 q4, d2[0] + vdup.32 q3, d1[1] + vdup.32 q2, d1[0] + vdup.32 q1, d0[1] + vdup.32 q0, d0[0] + + mov r3, #10 + +.Ldoubleround4: + // x0 += x4, x12 = rotl32(x12 ^ x0, 16) + // x1 += x5, x13 = rotl32(x13 ^ x1, 16) + // x2 += x6, x14 = rotl32(x14 ^ x2, 16) + // x3 += x7, x15 = rotl32(x15 ^ x3, 16) + vadd.i32 q0, q0, q4 + vadd.i32 q1, q1, q5 + vadd.i32 q2, q2, q6 + vadd.i32 q3, q3, q7 + + veor q12, q12, q0 + veor q13, q13, q1 + veor q14, q14, q2 + veor q15, q15, q3 + + vrev32.16 q12, q12 + vrev32.16 q13, q13 + vrev32.16 q14, q14 + vrev32.16 q15, q15 + + // x8 += x12, x4 = rotl32(x4 ^ x8, 12) + // x9 += x13, x5 = rotl32(x5 ^ x9, 12) + // x10 += x14, x6 = rotl32(x6 ^ x10, 12) + // x11 += x15, x7 = rotl32(x7 ^ x11, 12) + vadd.i32 q8, q8, q12 + vadd.i32 q9, q9, q13 + vadd.i32 q10, q10, q14 + vadd.i32 q11, q11, q15 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q4, q8 + veor q9, q5, q9 + vshl.u32 q4, q8, #12 + vshl.u32 q5, q9, #12 + vsri.u32 q4, q8, #20 + vsri.u32 q5, q9, #20 + + veor q8, q6, q10 + veor q9, q7, q11 + vshl.u32 q6, q8, #12 + vshl.u32 q7, q9, #12 + vsri.u32 q6, q8, #20 + vsri.u32 q7, q9, #20 + + // x0 += x4, x12 = rotl32(x12 ^ x0, 8) + // x1 += x5, x13 = rotl32(x13 ^ x1, 8) + // x2 += x6, x14 = rotl32(x14 ^ x2, 8) + // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vadd.i32 q0, q0, q4 + vadd.i32 q1, q1, q5 + vadd.i32 q2, q2, q6 + vadd.i32 q3, q3, q7 + + veor q8, q12, q0 + veor q9, q13, q1 + vshl.u32 q12, q8, #8 + vshl.u32 q13, q9, #8 + vsri.u32 q12, q8, #24 + vsri.u32 q13, q9, #24 + + veor q8, q14, q2 + veor q9, q15, q3 + vshl.u32 q14, q8, #8 + vshl.u32 q15, q9, #8 + vsri.u32 q14, q8, #24 + vsri.u32 q15, q9, #24 + + vld1.32 {q8-q9}, [sp, :256] + + // x8 += x12, x4 = rotl32(x4 ^ x8, 7) + // x9 += x13, x5 = rotl32(x5 ^ x9, 7) + // x10 += x14, x6 = rotl32(x6 ^ x10, 7) + // x11 += x15, x7 = rotl32(x7 ^ x11, 7) + vadd.i32 q8, q8, q12 + vadd.i32 q9, q9, q13 + vadd.i32 q10, q10, q14 + vadd.i32 q11, q11, q15 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q4, q8 + veor q9, q5, q9 + vshl.u32 q4, q8, #7 + vshl.u32 q5, q9, #7 + vsri.u32 q4, q8, #25 + vsri.u32 q5, q9, #25 + + veor q8, q6, q10 + veor q9, q7, q11 + vshl.u32 q6, q8, #7 + vshl.u32 q7, q9, #7 + vsri.u32 q6, q8, #25 + vsri.u32 q7, q9, #25 + + vld1.32 {q8-q9}, [sp, :256] + + // x0 += x5, x15 = rotl32(x15 ^ x0, 16) + // x1 += x6, x12 = rotl32(x12 ^ x1, 16) + // x2 += x7, x13 = rotl32(x13 ^ x2, 16) + // x3 += x4, x14 = rotl32(x14 ^ x3, 16) + vadd.i32 q0, q0, q5 + vadd.i32 q1, q1, q6 + vadd.i32 q2, q2, q7 + vadd.i32 q3, q3, q4 + + veor q15, q15, q0 + veor q12, q12, q1 + veor q13, q13, q2 + veor q14, q14, q3 + + vrev32.16 q15, q15 + vrev32.16 q12, q12 + vrev32.16 q13, q13 + vrev32.16 q14, q14 + + // x10 += x15, x5 = rotl32(x5 ^ x10, 12) + // x11 += x12, x6 = rotl32(x6 ^ x11, 12) + // x8 += x13, x7 = rotl32(x7 ^ x8, 12) + // x9 += x14, x4 = rotl32(x4 ^ x9, 12) + vadd.i32 q10, q10, q15 + vadd.i32 q11, q11, q12 + vadd.i32 q8, q8, q13 + vadd.i32 q9, q9, q14 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q7, q8 + veor q9, q4, q9 + vshl.u32 q7, q8, #12 + vshl.u32 q4, q9, #12 + vsri.u32 q7, q8, #20 + vsri.u32 q4, q9, #20 + + veor q8, q5, q10 + veor q9, q6, q11 + vshl.u32 q5, q8, #12 + vshl.u32 q6, q9, #12 + vsri.u32 q5, q8, #20 + vsri.u32 q6, q9, #20 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 8) + // x1 += x6, x12 = rotl32(x12 ^ x1, 8) + // x2 += x7, x13 = rotl32(x13 ^ x2, 8) + // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vadd.i32 q0, q0, q5 + vadd.i32 q1, q1, q6 + vadd.i32 q2, q2, q7 + vadd.i32 q3, q3, q4 + + veor q8, q15, q0 + veor q9, q12, q1 + vshl.u32 q15, q8, #8 + vshl.u32 q12, q9, #8 + vsri.u32 q15, q8, #24 + vsri.u32 q12, q9, #24 + + veor q8, q13, q2 + veor q9, q14, q3 + vshl.u32 q13, q8, #8 + vshl.u32 q14, q9, #8 + vsri.u32 q13, q8, #24 + vsri.u32 q14, q9, #24 + + vld1.32 {q8-q9}, [sp, :256] + + // x10 += x15, x5 = rotl32(x5 ^ x10, 7) + // x11 += x12, x6 = rotl32(x6 ^ x11, 7) + // x8 += x13, x7 = rotl32(x7 ^ x8, 7) + // x9 += x14, x4 = rotl32(x4 ^ x9, 7) + vadd.i32 q10, q10, q15 + vadd.i32 q11, q11, q12 + vadd.i32 q8, q8, q13 + vadd.i32 q9, q9, q14 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q7, q8 + veor q9, q4, q9 + vshl.u32 q7, q8, #7 + vshl.u32 q4, q9, #7 + vsri.u32 q7, q8, #25 + vsri.u32 q4, q9, #25 + + veor q8, q5, q10 + veor q9, q6, q11 + vshl.u32 q5, q8, #7 + vshl.u32 q6, q9, #7 + vsri.u32 q5, q8, #25 + vsri.u32 q6, q9, #25 + + subs r3, r3, #1 + beq 0f + + vld1.32 {q8-q9}, [sp, :256] + b .Ldoubleround4 + + // x0[0-3] += s0[0] + // x1[0-3] += s0[1] + // x2[0-3] += s0[2] + // x3[0-3] += s0[3] +0: ldmia r0!, {r3-r6} + vdup.32 q8, r3 + vdup.32 q9, r4 + vadd.i32 q0, q0, q8 + vadd.i32 q1, q1, q9 + vdup.32 q8, r5 + vdup.32 q9, r6 + vadd.i32 q2, q2, q8 + vadd.i32 q3, q3, q9 + + // x4[0-3] += s1[0] + // x5[0-3] += s1[1] + // x6[0-3] += s1[2] + // x7[0-3] += s1[3] + ldmia r0!, {r3-r6} + vdup.32 q8, r3 + vdup.32 q9, r4 + vadd.i32 q4, q4, q8 + vadd.i32 q5, q5, q9 + vdup.32 q8, r5 + vdup.32 q9, r6 + vadd.i32 q6, q6, q8 + vadd.i32 q7, q7, q9 + + // interleave 32-bit words in state n, n+1 + vzip.32 q0, q1 + vzip.32 q2, q3 + vzip.32 q4, q5 + vzip.32 q6, q7 + + // interleave 64-bit words in state n, n+2 + vswp d1, d4 + vswp d3, d6 + vswp d9, d12 + vswp d11, d14 + + // xor with corresponding input, write to output + vld1.8 {q8-q9}, [r2]! + veor q8, q8, q0 + veor q9, q9, q4 + vst1.8 {q8-q9}, [r1]! + + vld1.32 {q8-q9}, [sp, :256] + + // x8[0-3] += s2[0] + // x9[0-3] += s2[1] + // x10[0-3] += s2[2] + // x11[0-3] += s2[3] + ldmia r0!, {r3-r6} + vdup.32 q0, r3 + vdup.32 q4, r4 + vadd.i32 q8, q8, q0 + vadd.i32 q9, q9, q4 + vdup.32 q0, r5 + vdup.32 q4, r6 + vadd.i32 q10, q10, q0 + vadd.i32 q11, q11, q4 + + // x12[0-3] += s3[0] + // x13[0-3] += s3[1] + // x14[0-3] += s3[2] + // x15[0-3] += s3[3] + ldmia r0!, {r3-r6} + vdup.32 q0, r3 + vdup.32 q4, r4 + adr r3, CTRINC + vadd.i32 q12, q12, q0 + vld1.32 {q0}, [r3, :128] + vadd.i32 q13, q13, q4 + vadd.i32 q12, q12, q0 // x12 += counter values 0-3 + + vdup.32 q0, r5 + vdup.32 q4, r6 + vadd.i32 q14, q14, q0 + vadd.i32 q15, q15, q4 + + // interleave 32-bit words in state n, n+1 + vzip.32 q8, q9 + vzip.32 q10, q11 + vzip.32 q12, q13 + vzip.32 q14, q15 + + // interleave 64-bit words in state n, n+2 + vswp d17, d20 + vswp d19, d22 + vswp d25, d28 + vswp d27, d30 + + vmov q4, q1 + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q8 + veor q1, q1, q12 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q2 + veor q1, q1, q6 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q10 + veor q1, q1, q14 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q4 + veor q1, q1, q5 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q9 + veor q1, q1, q13 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q3 + veor q1, q1, q7 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2] + veor q0, q0, q11 + veor q1, q1, q15 + vst1.8 {q0-q1}, [r1] + + mov sp, ip + pop {r4-r6, pc} +ENDPROC(chacha20_4block_xor_neon) + + .align 4 +CTRINC: .word 0, 1, 2, 3 diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c new file mode 100644 index 000000000000..592f75ae4fa1 --- /dev/null +++ b/arch/arm/crypto/chacha20-neon-glue.c @@ -0,0 +1,128 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); +asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); + +static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes) +{ + u8 buf[CHACHA20_BLOCK_SIZE]; + + while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + chacha20_4block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE * 4; + src += CHACHA20_BLOCK_SIZE * 4; + dst += CHACHA20_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA20_BLOCK_SIZE) { + chacha20_block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE; + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha20_block_xor_neon(state, buf, buf); + memcpy(dst, buf, bytes); + } +} + +static int chacha20_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + u32 state[16]; + int err; + + if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha20_crypt(req); + + err = skcipher_walk_virt(&walk, req, true); + + crypto_chacha20_init(state, ctx, walk.iv); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + kernel_neon_end(); + + return err; +} + +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .walksize = 4 * CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_neon, + .decrypt = chacha20_neon, +}; + +static int __init chacha20_simd_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + return crypto_register_skcipher(&alg); +} + +static void __exit chacha20_simd_mod_fini(void) +{ + crypto_unregister_skcipher(&alg); +} + +module_init(chacha20_simd_mod_init); +module_exit(chacha20_simd_mod_fini); + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); From 293614ce3eda94a3c9b38d5c18fdc06eb1397221 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jan 2017 16:41:51 +0000 Subject: [PATCH 039/142] crypto: arm64/aes-blk - expose AES-CTR as synchronous cipher as well In addition to wrapping the AES-CTR cipher into the async SIMD wrapper, which exposes it as an async skcipher that defers processing to process context, expose our AES-CTR implementation directly as a synchronous cipher as well, but with a lower priority. This makes the AES-CTR transform usable in places where synchronous transforms are required, such as the MAC802.11 encryption code, which executes in sotfirq context, where SIMD processing is allowed on arm64. Users of the async transform will keep the existing behavior. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 4e3f8adb1793..5164aaf82c6a 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -325,6 +325,23 @@ static struct skcipher_alg aes_algs[] = { { .setkey = skcipher_aes_setkey, .encrypt = ctr_encrypt, .decrypt = ctr_encrypt, +}, { + .base = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-" MODE, + .cra_priority = PRIO - 1, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, }, { .base = { .cra_name = "__xts(aes)", @@ -350,8 +367,9 @@ static void aes_exit(void) { int i; - for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++) - simd_skcipher_free(aes_simd_algs[i]); + for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) + if (aes_simd_algs[i]) + simd_skcipher_free(aes_simd_algs[i]); crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } @@ -370,6 +388,9 @@ static int __init aes_init(void) return err; for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) + continue; + algname = aes_algs[i].base.cra_name + 2; drvname = aes_algs[i].base.cra_driver_name + 2; basename = aes_algs[i].base.cra_driver_name; From bed593c0e852f5c1efd3ca4e984fd744c51cf6ee Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jan 2017 16:41:52 +0000 Subject: [PATCH 040/142] crypto: arm64/aes - add scalar implementation This adds a scalar implementation of AES, based on the precomputed tables that are exposed by the generic AES code. Since rotates are cheap on arm64, this implementation only uses the 4 core tables (of 1 KB each), and avoids the prerotated ones, reducing the D-cache footprint by 75%. On Cortex-A57, this code manages 13.0 cycles per byte, which is ~34% faster than the generic C code. (Note that this is still >13x slower than the code that uses the optional ARMv8 Crypto Extensions, which manages <1 cycles per byte.) Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 4 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/aes-cipher-core.S | 127 ++++++++++++++++++++++++++++ arch/arm64/crypto/aes-cipher-glue.c | 69 +++++++++++++++ 4 files changed, 203 insertions(+) create mode 100644 arch/arm64/crypto/aes-cipher-core.S create mode 100644 arch/arm64/crypto/aes-cipher-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 0bf0f531f539..0826f8e599a6 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -41,6 +41,10 @@ config CRYPTO_CRC32_ARM64_CE depends on KERNEL_MODE_NEON && CRC32 select CRYPTO_HASH +config CRYPTO_AES_ARM64 + tristate "AES core cipher using scalar instructions" + select CRYPTO_AES + config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 9d2826c5fccf..a893507629eb 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -44,6 +44,9 @@ sha512-arm64-y := sha512-glue.o sha512-core.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o +obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o +aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o + AFLAGS_aes-ce.o := -DINTERLEAVE=4 AFLAGS_aes-neon.o := -DINTERLEAVE=4 diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S new file mode 100644 index 000000000000..37590ab8121a --- /dev/null +++ b/arch/arm64/crypto/aes-cipher-core.S @@ -0,0 +1,127 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + + .text + + rk .req x0 + out .req x1 + in .req x2 + rounds .req x3 + tt .req x4 + lt .req x2 + + .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc + ldp \out0, \out1, [rk], #8 + + ubfx w13, \in0, #0, #8 + ubfx w14, \in1, #8, #8 + ldr w13, [tt, w13, uxtw #2] + ldr w14, [tt, w14, uxtw #2] + + .if \enc + ubfx w17, \in1, #0, #8 + ubfx w18, \in2, #8, #8 + .else + ubfx w17, \in3, #0, #8 + ubfx w18, \in0, #8, #8 + .endif + ldr w17, [tt, w17, uxtw #2] + ldr w18, [tt, w18, uxtw #2] + + ubfx w15, \in2, #16, #8 + ubfx w16, \in3, #24, #8 + ldr w15, [tt, w15, uxtw #2] + ldr w16, [tt, w16, uxtw #2] + + .if \enc + ubfx \t0, \in3, #16, #8 + ubfx \t1, \in0, #24, #8 + .else + ubfx \t0, \in1, #16, #8 + ubfx \t1, \in2, #24, #8 + .endif + ldr \t0, [tt, \t0, uxtw #2] + ldr \t1, [tt, \t1, uxtw #2] + + eor \out0, \out0, w13 + eor \out1, \out1, w17 + eor \out0, \out0, w14, ror #24 + eor \out1, \out1, w18, ror #24 + eor \out0, \out0, w15, ror #16 + eor \out1, \out1, \t0, ror #16 + eor \out0, \out0, w16, ror #8 + eor \out1, \out1, \t1, ror #8 + .endm + + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1 + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1 + .endm + + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0 + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0 + .endm + + .macro do_crypt, round, ttab, ltab + ldp w5, w6, [in] + ldp w7, w8, [in, #8] + ldp w9, w10, [rk], #16 + ldp w11, w12, [rk, #-8] + +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) + + eor w5, w5, w9 + eor w6, w6, w10 + eor w7, w7, w11 + eor w8, w8, w12 + + ldr tt, =\ttab + ldr lt, =\ltab + + tbnz rounds, #1, 1f + +0: \round w9, w10, w11, w12, w5, w6, w7, w8 + \round w5, w6, w7, w8, w9, w10, w11, w12 + +1: subs rounds, rounds, #4 + \round w9, w10, w11, w12, w5, w6, w7, w8 + csel tt, tt, lt, hi + \round w5, w6, w7, w8, w9, w10, w11, w12 + b.hi 0b + +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) + + stp w5, w6, [out] + stp w7, w8, [out, #8] + ret + + .align 4 + .ltorg + .endm + + .align 5 +ENTRY(__aes_arm64_encrypt) + do_crypt fround, crypto_ft_tab, crypto_fl_tab +ENDPROC(__aes_arm64_encrypt) + + .align 5 +ENTRY(__aes_arm64_decrypt) + do_crypt iround, crypto_it_tab, crypto_il_tab +ENDPROC(__aes_arm64_decrypt) diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c new file mode 100644 index 000000000000..7288e7cbebff --- /dev/null +++ b/arch/arm64/crypto/aes-cipher-glue.c @@ -0,0 +1,69 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); +EXPORT_SYMBOL(__aes_arm64_encrypt); + +asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); +EXPORT_SYMBOL(__aes_arm64_decrypt); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm64_encrypt(ctx->key_enc, out, in, rounds); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm64_decrypt(ctx->key_dec, out, in, rounds); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-arm64", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + + .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, + .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, + .cra_cipher.cia_setkey = crypto_aes_set_key, + .cra_cipher.cia_encrypt = aes_encrypt, + .cra_cipher.cia_decrypt = aes_decrypt +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Scalar AES cipher for arm64"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("aes"); From 81edb42629758bacdf813dd5e4542ae26e3ad73a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jan 2017 16:41:53 +0000 Subject: [PATCH 041/142] crypto: arm/aes - replace scalar AES cipher This replaces the scalar AES cipher that originates in the OpenSSL project with a new implementation that is ~15% (*) faster (on modern cores), and reuses the lookup tables and the key schedule generation routines from the generic C implementation (which is usually compiled in anyway due to networking and other subsystems depending on it). Note that the bit sliced NEON code for AES still depends on the scalar cipher that this patch replaces, so it is not removed entirely yet. * On Cortex-A57, the performance increases from 17.0 to 14.9 cycles per byte for 128-bit keys. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 20 +--- arch/arm/crypto/Makefile | 4 +- arch/arm/crypto/aes-cipher-core.S | 179 ++++++++++++++++++++++++++++++ arch/arm/crypto/aes-cipher-glue.c | 74 ++++++++++++ arch/arm/crypto/aes_glue.c | 98 ---------------- 5 files changed, 256 insertions(+), 119 deletions(-) create mode 100644 arch/arm/crypto/aes-cipher-core.S create mode 100644 arch/arm/crypto/aes-cipher-glue.c delete mode 100644 arch/arm/crypto/aes_glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 2f3339f015d3..f1de658c3c8f 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -62,33 +62,15 @@ config CRYPTO_SHA512_ARM using optimized ARM assembler and NEON, when available. config CRYPTO_AES_ARM - tristate "AES cipher algorithms (ARM-asm)" - depends on ARM + tristate "Scalar AES cipher for ARM" select CRYPTO_ALGAPI select CRYPTO_AES help Use optimized AES assembler routines for ARM platforms. - AES cipher algorithms (FIPS-197). AES uses the Rijndael - algorithm. - - Rijndael appears to be consistently a very good performer in - both hardware and software across a wide range of computing - environments regardless of its use in feedback or non-feedback - modes. Its key setup time is excellent, and its key agility is - good. Rijndael's very low memory requirements make it very well - suited for restricted-space environments, in which it also - demonstrates excellent performance. Rijndael's operations are - among the easiest to defend against power and timing attacks. - - The AES specifies three key sizes: 128, 192 and 256 bits - - See for more information. - config CRYPTO_AES_ARM_BS tristate "Bit sliced AES using NEON instructions" depends on KERNEL_MODE_NEON - select CRYPTO_AES_ARM select CRYPTO_BLKCIPHER select CRYPTO_SIMD help diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 8d74e55eacd4..8f5de2db701c 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -27,8 +27,8 @@ $(warning $(ce-obj-y) $(ce-obj-m)) endif endif -aes-arm-y := aes-armv4.o aes_glue.o -aes-arm-bs-y := aesbs-core.o aesbs-glue.o +aes-arm-y := aes-cipher-core.o aes-cipher-glue.o +aes-arm-bs-y := aes-armv4.o aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S new file mode 100644 index 000000000000..b04261e1e068 --- /dev/null +++ b/arch/arm/crypto/aes-cipher-core.S @@ -0,0 +1,179 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + + .text + .align 5 + + rk .req r0 + rounds .req r1 + in .req r2 + out .req r3 + tt .req ip + + t0 .req lr + t1 .req r2 + t2 .req r3 + + .macro __select, out, in, idx + .if __LINUX_ARM_ARCH__ < 7 + and \out, \in, #0xff << (8 * \idx) + .else + ubfx \out, \in, #(8 * \idx), #8 + .endif + .endm + + .macro __load, out, in, idx + .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 + ldr \out, [tt, \in, lsr #(8 * \idx) - 2] + .else + ldr \out, [tt, \in, lsl #2] + .endif + .endm + + .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc + __select \out0, \in0, 0 + __select t0, \in1, 1 + __load \out0, \out0, 0 + __load t0, t0, 1 + + .if \enc + __select \out1, \in1, 0 + __select t1, \in2, 1 + .else + __select \out1, \in3, 0 + __select t1, \in0, 1 + .endif + __load \out1, \out1, 0 + __select t2, \in2, 2 + __load t1, t1, 1 + __load t2, t2, 2 + + eor \out0, \out0, t0, ror #24 + + __select t0, \in3, 3 + .if \enc + __select \t3, \in3, 2 + __select \t4, \in0, 3 + .else + __select \t3, \in1, 2 + __select \t4, \in2, 3 + .endif + __load \t3, \t3, 2 + __load t0, t0, 3 + __load \t4, \t4, 3 + + eor \out1, \out1, t1, ror #24 + eor \out0, \out0, t2, ror #16 + ldm rk!, {t1, t2} + eor \out1, \out1, \t3, ror #16 + eor \out0, \out0, t0, ror #8 + eor \out1, \out1, \t4, ror #8 + eor \out0, \out0, t1 + eor \out1, \out1, t2 + .endm + + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1 + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1 + .endm + + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0 + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0 + .endm + + .macro __rev, out, in + .if __LINUX_ARM_ARCH__ < 6 + lsl t0, \in, #24 + and t1, \in, #0xff00 + and t2, \in, #0xff0000 + orr \out, t0, \in, lsr #24 + orr \out, \out, t1, lsl #8 + orr \out, \out, t2, lsr #8 + .else + rev \out, \in + .endif + .endm + + .macro __adrl, out, sym, c + .if __LINUX_ARM_ARCH__ < 7 + ldr\c \out, =\sym + .else + movw\c \out, #:lower16:\sym + movt\c \out, #:upper16:\sym + .endif + .endm + + .macro do_crypt, round, ttab, ltab + push {r3-r11, lr} + + ldr r4, [in] + ldr r5, [in, #4] + ldr r6, [in, #8] + ldr r7, [in, #12] + + ldm rk!, {r8-r11} + +#ifdef CONFIG_CPU_BIG_ENDIAN + __rev r4, r4 + __rev r5, r5 + __rev r6, r6 + __rev r7, r7 +#endif + + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + + __adrl tt, \ttab + + tst rounds, #2 + bne 1f + +0: \round r8, r9, r10, r11, r4, r5, r6, r7 + \round r4, r5, r6, r7, r8, r9, r10, r11 + +1: subs rounds, rounds, #4 + \round r8, r9, r10, r11, r4, r5, r6, r7 + __adrl tt, \ltab, ls + \round r4, r5, r6, r7, r8, r9, r10, r11 + bhi 0b + +#ifdef CONFIG_CPU_BIG_ENDIAN + __rev r4, r4 + __rev r5, r5 + __rev r6, r6 + __rev r7, r7 +#endif + + ldr out, [sp] + + str r4, [out] + str r5, [out, #4] + str r6, [out, #8] + str r7, [out, #12] + + pop {r3-r11, pc} + + .align 3 + .ltorg + .endm + +ENTRY(__aes_arm_encrypt) + do_crypt fround, crypto_ft_tab, crypto_fl_tab +ENDPROC(__aes_arm_encrypt) + +ENTRY(__aes_arm_decrypt) + do_crypt iround, crypto_it_tab, crypto_il_tab +ENDPROC(__aes_arm_decrypt) diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c new file mode 100644 index 000000000000..c222f6e072ad --- /dev/null +++ b/arch/arm/crypto/aes-cipher-glue.c @@ -0,0 +1,74 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out); +EXPORT_SYMBOL(__aes_arm_encrypt); + +asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out); +EXPORT_SYMBOL(__aes_arm_decrypt); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm_encrypt(ctx->key_enc, rounds, in, out); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm_decrypt(ctx->key_dec, rounds, in, out); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-arm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + + .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, + .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, + .cra_cipher.cia_setkey = crypto_aes_set_key, + .cra_cipher.cia_encrypt = aes_encrypt, + .cra_cipher.cia_decrypt = aes_decrypt, + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + .cra_alignmask = 3, +#endif +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Scalar AES cipher for ARM"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c deleted file mode 100644 index 0409b8f89782..000000000000 --- a/arch/arm/crypto/aes_glue.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Glue Code for the asm optimized version of the AES Cipher Algorithm - */ - -#include -#include -#include - -#include "aes_glue.h" - -EXPORT_SYMBOL(AES_encrypt); -EXPORT_SYMBOL(AES_decrypt); -EXPORT_SYMBOL(private_AES_set_encrypt_key); -EXPORT_SYMBOL(private_AES_set_decrypt_key); - -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct AES_CTX *ctx = crypto_tfm_ctx(tfm); - AES_encrypt(src, dst, &ctx->enc_key); -} - -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct AES_CTX *ctx = crypto_tfm_ctx(tfm); - AES_decrypt(src, dst, &ctx->dec_key); -} - -static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct AES_CTX *ctx = crypto_tfm_ctx(tfm); - - switch (key_len) { - case AES_KEYSIZE_128: - key_len = 128; - break; - case AES_KEYSIZE_192: - key_len = 192; - break; - case AES_KEYSIZE_256: - key_len = 256; - break; - default: - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - /* private_AES_set_decrypt_key expects an encryption key as input */ - ctx->dec_key = ctx->enc_key; - if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - return 0; -} - -static struct crypto_alg aes_alg = { - .cra_name = "aes", - .cra_driver_name = "aes-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct AES_CTX), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt - } - } -}; - -static int __init aes_init(void) -{ - return crypto_register_alg(&aes_alg); -} - -static void __exit aes_fini(void) -{ - crypto_unregister_alg(&aes_alg); -} - -module_init(aes_init); -module_exit(aes_fini); - -MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_CRYPTO("aes"); -MODULE_ALIAS_CRYPTO("aes-asm"); -MODULE_AUTHOR("David McCullough "); From 1abee99eafab67fb1c98f9ecfc43cd5735384a86 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jan 2017 16:41:55 +0000 Subject: [PATCH 042/142] crypto: arm64/aes - reimplement bit-sliced ARM/NEON implementation for arm64 This is a reimplementation of the NEON version of the bit-sliced AES algorithm. This code is heavily based on Andy Polyakov's OpenSSL version for ARM, which is also available in the kernel. This is an alternative for the existing NEON implementation for arm64 authored by me, which suffers from poor performance due to its reliance on the pathologically slow four register variant of the tbl/tbx NEON instruction. This version is about ~30% (*) faster than the generic C code, but only in cases where the input can be 8x interleaved (this is a fundamental property of bit slicing). For this reason, only the chaining modes ECB, XTS and CTR are implemented. (The significance of ECB is that it could potentially be used by other chaining modes) * Measured on Cortex-A57. Note that this is still an order of magnitude slower than the implementations that use the dedicated AES instructions introduced in ARMv8, but those are part of an optional extension, and so it is good to have a fallback. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 7 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/aes-neonbs-core.S | 963 ++++++++++++++++++++++++++++ arch/arm64/crypto/aes-neonbs-glue.c | 420 ++++++++++++ 4 files changed, 1393 insertions(+) create mode 100644 arch/arm64/crypto/aes-neonbs-core.S create mode 100644 arch/arm64/crypto/aes-neonbs-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 0826f8e599a6..5de75c3dcbd4 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -82,4 +82,11 @@ config CRYPTO_CHACHA20_NEON select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 +config CRYPTO_AES_ARM64_BS + tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES_ARM64 + select CRYPTO_SIMD + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index a893507629eb..d1ae1b9cbe70 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -47,6 +47,9 @@ chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o +obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o +aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o + AFLAGS_aes-ce.o := -DINTERLEAVE=4 AFLAGS_aes-neon.o := -DINTERLEAVE=4 diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S new file mode 100644 index 000000000000..8d0cdaa2768d --- /dev/null +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -0,0 +1,963 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2016 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * The algorithm implemented here is described in detail by the paper + * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and + * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) + * + * This implementation is based primarily on the OpenSSL implementation + * for 32-bit ARM written by Andy Polyakov + */ + +#include +#include + + .text + + rounds .req x11 + bskey .req x12 + + .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + eor \b2, \b2, \b1 + eor \b5, \b5, \b6 + eor \b3, \b3, \b0 + eor \b6, \b6, \b2 + eor \b5, \b5, \b0 + eor \b6, \b6, \b3 + eor \b3, \b3, \b7 + eor \b7, \b7, \b5 + eor \b3, \b3, \b4 + eor \b4, \b4, \b5 + eor \b2, \b2, \b7 + eor \b3, \b3, \b1 + eor \b1, \b1, \b5 + .endm + + .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + eor \b0, \b0, \b6 + eor \b1, \b1, \b4 + eor \b4, \b4, \b6 + eor \b2, \b2, \b0 + eor \b6, \b6, \b1 + eor \b1, \b1, \b5 + eor \b5, \b5, \b3 + eor \b3, \b3, \b7 + eor \b7, \b7, \b5 + eor \b2, \b2, \b5 + eor \b4, \b4, \b7 + .endm + + .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 + eor \b1, \b1, \b7 + eor \b4, \b4, \b7 + eor \b7, \b7, \b5 + eor \b1, \b1, \b3 + eor \b2, \b2, \b5 + eor \b3, \b3, \b7 + eor \b6, \b6, \b1 + eor \b2, \b2, \b0 + eor \b5, \b5, \b3 + eor \b4, \b4, \b6 + eor \b0, \b0, \b6 + eor \b1, \b1, \b4 + .endm + + .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 + eor \b1, \b1, \b5 + eor \b2, \b2, \b7 + eor \b3, \b3, \b1 + eor \b4, \b4, \b5 + eor \b7, \b7, \b5 + eor \b3, \b3, \b4 + eor \b5, \b5, \b0 + eor \b3, \b3, \b7 + eor \b6, \b6, \b2 + eor \b2, \b2, \b1 + eor \b6, \b6, \b3 + eor \b3, \b3, \b0 + eor \b5, \b5, \b6 + .endm + + .macro mul_gf4, x0, x1, y0, y1, t0, t1 + eor \t0, \y0, \y1 + and \t0, \t0, \x0 + eor \x0, \x0, \x1 + and \t1, \x1, \y0 + and \x0, \x0, \y1 + eor \x1, \t1, \t0 + eor \x0, \x0, \t1 + .endm + + .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 + eor \t0, \y0, \y1 + eor \t1, \y2, \y3 + and \t0, \t0, \x0 + and \t1, \t1, \x2 + eor \x0, \x0, \x1 + eor \x2, \x2, \x3 + and \x1, \x1, \y0 + and \x3, \x3, \y2 + and \x0, \x0, \y1 + and \x2, \x2, \y3 + eor \x1, \x1, \x0 + eor \x2, \x2, \x3 + eor \x0, \x0, \t0 + eor \x3, \x3, \t1 + .endm + + .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, t0, t1, t2, t3 + eor \t0, \x0, \x2 + eor \t1, \x1, \x3 + mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 + eor \y0, \y0, \y2 + eor \y1, \y1, \y3 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 + eor \x0, \x0, \t0 + eor \x2, \x2, \t0 + eor \x1, \x1, \t1 + eor \x3, \x3, \t1 + eor \t0, \x4, \x6 + eor \t1, \x5, \x7 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 + eor \y0, \y0, \y2 + eor \y1, \y1, \y3 + mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 + eor \x4, \x4, \t0 + eor \x6, \x6, \t0 + eor \x5, \x5, \t1 + eor \x7, \x7, \t1 + .endm + + .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + eor \t3, \x4, \x6 + eor \t0, \x5, \x7 + eor \t1, \x1, \x3 + eor \s1, \x7, \x6 + eor \s0, \x0, \x2 + eor \s3, \t3, \t0 + orr \t2, \t0, \t1 + and \s2, \t3, \s0 + orr \t3, \t3, \s0 + eor \s0, \s0, \t1 + and \t0, \t0, \t1 + eor \t1, \x3, \x2 + and \s3, \s3, \s0 + and \s1, \s1, \t1 + eor \t1, \x4, \x5 + eor \s0, \x1, \x0 + eor \t3, \t3, \s1 + eor \t2, \t2, \s1 + and \s1, \t1, \s0 + orr \t1, \t1, \s0 + eor \t3, \t3, \s3 + eor \t0, \t0, \s1 + eor \t2, \t2, \s2 + eor \t1, \t1, \s3 + eor \t0, \t0, \s2 + and \s0, \x7, \x3 + eor \t1, \t1, \s2 + and \s1, \x6, \x2 + and \s2, \x5, \x1 + orr \s3, \x4, \x0 + eor \t3, \t3, \s0 + eor \t1, \t1, \s2 + eor \s0, \t0, \s3 + eor \t2, \t2, \s1 + and \s2, \t3, \t1 + eor \s1, \t2, \s2 + eor \s3, \s0, \s2 + bsl \s1, \t1, \s0 + not \t0, \s0 + bsl \s0, \s1, \s3 + bsl \t0, \s1, \s3 + bsl \s3, \t3, \t2 + eor \t3, \t3, \t2 + and \s2, \s0, \s3 + eor \t1, \t1, \t0 + eor \s2, \s2, \t3 + mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + .endm + + .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ + \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b + inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \ + \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ + \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ + \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b + out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ + \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b + .endm + + .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ + \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b + inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \ + \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ + \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ + \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b + inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ + \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b + .endm + + .macro enc_next_rk + ldp q16, q17, [bskey], #128 + ldp q18, q19, [bskey, #-96] + ldp q20, q21, [bskey, #-64] + ldp q22, q23, [bskey, #-32] + .endm + + .macro dec_next_rk + ldp q16, q17, [bskey, #-128]! + ldp q18, q19, [bskey, #32] + ldp q20, q21, [bskey, #64] + ldp q22, q23, [bskey, #96] + .endm + + .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7 + eor \x0\().16b, \x0\().16b, v16.16b + eor \x1\().16b, \x1\().16b, v17.16b + eor \x2\().16b, \x2\().16b, v18.16b + eor \x3\().16b, \x3\().16b, v19.16b + eor \x4\().16b, \x4\().16b, v20.16b + eor \x5\().16b, \x5\().16b, v21.16b + eor \x6\().16b, \x6\().16b, v22.16b + eor \x7\().16b, \x7\().16b, v23.16b + .endm + + .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask + tbl \x0\().16b, {\x0\().16b}, \mask\().16b + tbl \x1\().16b, {\x1\().16b}, \mask\().16b + tbl \x2\().16b, {\x2\().16b}, \mask\().16b + tbl \x3\().16b, {\x3\().16b}, \mask\().16b + tbl \x4\().16b, {\x4\().16b}, \mask\().16b + tbl \x5\().16b, {\x5\().16b}, \mask\().16b + tbl \x6\().16b, {\x6\().16b}, \mask\().16b + tbl \x7\().16b, {\x7\().16b}, \mask\().16b + .endm + + .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7, inv + ext \t0\().16b, \x0\().16b, \x0\().16b, #12 + ext \t1\().16b, \x1\().16b, \x1\().16b, #12 + eor \x0\().16b, \x0\().16b, \t0\().16b + ext \t2\().16b, \x2\().16b, \x2\().16b, #12 + eor \x1\().16b, \x1\().16b, \t1\().16b + ext \t3\().16b, \x3\().16b, \x3\().16b, #12 + eor \x2\().16b, \x2\().16b, \t2\().16b + ext \t4\().16b, \x4\().16b, \x4\().16b, #12 + eor \x3\().16b, \x3\().16b, \t3\().16b + ext \t5\().16b, \x5\().16b, \x5\().16b, #12 + eor \x4\().16b, \x4\().16b, \t4\().16b + ext \t6\().16b, \x6\().16b, \x6\().16b, #12 + eor \x5\().16b, \x5\().16b, \t5\().16b + ext \t7\().16b, \x7\().16b, \x7\().16b, #12 + eor \x6\().16b, \x6\().16b, \t6\().16b + eor \t1\().16b, \t1\().16b, \x0\().16b + eor \x7\().16b, \x7\().16b, \t7\().16b + ext \x0\().16b, \x0\().16b, \x0\().16b, #8 + eor \t2\().16b, \t2\().16b, \x1\().16b + eor \t0\().16b, \t0\().16b, \x7\().16b + eor \t1\().16b, \t1\().16b, \x7\().16b + ext \x1\().16b, \x1\().16b, \x1\().16b, #8 + eor \t5\().16b, \t5\().16b, \x4\().16b + eor \x0\().16b, \x0\().16b, \t0\().16b + eor \t6\().16b, \t6\().16b, \x5\().16b + eor \x1\().16b, \x1\().16b, \t1\().16b + ext \t0\().16b, \x4\().16b, \x4\().16b, #8 + eor \t4\().16b, \t4\().16b, \x3\().16b + ext \t1\().16b, \x5\().16b, \x5\().16b, #8 + eor \t7\().16b, \t7\().16b, \x6\().16b + ext \x4\().16b, \x3\().16b, \x3\().16b, #8 + eor \t3\().16b, \t3\().16b, \x2\().16b + ext \x5\().16b, \x7\().16b, \x7\().16b, #8 + eor \t4\().16b, \t4\().16b, \x7\().16b + ext \x3\().16b, \x6\().16b, \x6\().16b, #8 + eor \t3\().16b, \t3\().16b, \x7\().16b + ext \x6\().16b, \x2\().16b, \x2\().16b, #8 + eor \x7\().16b, \t1\().16b, \t5\().16b + .ifb \inv + eor \x2\().16b, \t0\().16b, \t4\().16b + eor \x4\().16b, \x4\().16b, \t3\().16b + eor \x5\().16b, \x5\().16b, \t7\().16b + eor \x3\().16b, \x3\().16b, \t6\().16b + eor \x6\().16b, \x6\().16b, \t2\().16b + .else + eor \t3\().16b, \t3\().16b, \x4\().16b + eor \x5\().16b, \x5\().16b, \t7\().16b + eor \x2\().16b, \x3\().16b, \t6\().16b + eor \x3\().16b, \t0\().16b, \t4\().16b + eor \x4\().16b, \x6\().16b, \t2\().16b + mov \x6\().16b, \t3\().16b + .endif + .endm + + .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7 + ext \t0\().16b, \x0\().16b, \x0\().16b, #8 + ext \t6\().16b, \x6\().16b, \x6\().16b, #8 + ext \t7\().16b, \x7\().16b, \x7\().16b, #8 + eor \t0\().16b, \t0\().16b, \x0\().16b + ext \t1\().16b, \x1\().16b, \x1\().16b, #8 + eor \t6\().16b, \t6\().16b, \x6\().16b + ext \t2\().16b, \x2\().16b, \x2\().16b, #8 + eor \t7\().16b, \t7\().16b, \x7\().16b + ext \t3\().16b, \x3\().16b, \x3\().16b, #8 + eor \t1\().16b, \t1\().16b, \x1\().16b + ext \t4\().16b, \x4\().16b, \x4\().16b, #8 + eor \t2\().16b, \t2\().16b, \x2\().16b + ext \t5\().16b, \x5\().16b, \x5\().16b, #8 + eor \t3\().16b, \t3\().16b, \x3\().16b + eor \t4\().16b, \t4\().16b, \x4\().16b + eor \t5\().16b, \t5\().16b, \x5\().16b + eor \x0\().16b, \x0\().16b, \t6\().16b + eor \x1\().16b, \x1\().16b, \t6\().16b + eor \x2\().16b, \x2\().16b, \t0\().16b + eor \x4\().16b, \x4\().16b, \t2\().16b + eor \x3\().16b, \x3\().16b, \t1\().16b + eor \x1\().16b, \x1\().16b, \t7\().16b + eor \x2\().16b, \x2\().16b, \t7\().16b + eor \x4\().16b, \x4\().16b, \t6\().16b + eor \x5\().16b, \x5\().16b, \t3\().16b + eor \x3\().16b, \x3\().16b, \t6\().16b + eor \x6\().16b, \x6\().16b, \t4\().16b + eor \x4\().16b, \x4\().16b, \t7\().16b + eor \x5\().16b, \x5\().16b, \t7\().16b + eor \x7\().16b, \x7\().16b, \t5\().16b + mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 + .endm + + .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 + ushr \t0\().2d, \b0\().2d, #\n + ushr \t1\().2d, \b1\().2d, #\n + eor \t0\().16b, \t0\().16b, \a0\().16b + eor \t1\().16b, \t1\().16b, \a1\().16b + and \t0\().16b, \t0\().16b, \mask\().16b + and \t1\().16b, \t1\().16b, \mask\().16b + eor \a0\().16b, \a0\().16b, \t0\().16b + shl \t0\().2d, \t0\().2d, #\n + eor \a1\().16b, \a1\().16b, \t1\().16b + shl \t1\().2d, \t1\().2d, #\n + eor \b0\().16b, \b0\().16b, \t0\().16b + eor \b1\().16b, \b1\().16b, \t1\().16b + .endm + + .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 + movi \t0\().16b, #0x55 + movi \t1\().16b, #0x33 + swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 + swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 + movi \t0\().16b, #0x0f + swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 + swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 + swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 + swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 + .endm + + + .align 6 +M0: .octa 0x0004080c0105090d02060a0e03070b0f + +M0SR: .octa 0x0004080c05090d010a0e02060f03070b +SR: .octa 0x0f0e0d0c0a09080b0504070600030201 +SRM0: .octa 0x01060b0c0207080d0304090e00050a0f + +M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03 +ISR: .octa 0x0f0e0d0c080b0a090504070602010003 +ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f + + /* + * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) + */ +ENTRY(aesbs_convert_key) + ld1 {v7.4s}, [x1], #16 // load round 0 key + ld1 {v17.4s}, [x1], #16 // load round 1 key + + movi v8.16b, #0x01 // bit masks + movi v9.16b, #0x02 + movi v10.16b, #0x04 + movi v11.16b, #0x08 + movi v12.16b, #0x10 + movi v13.16b, #0x20 + movi v14.16b, #0x40 + movi v15.16b, #0x80 + ldr q16, M0 + + sub x2, x2, #1 + str q7, [x0], #16 // save round 0 key + +.Lkey_loop: + tbl v7.16b ,{v17.16b}, v16.16b + ld1 {v17.4s}, [x1], #16 // load next round key + + cmtst v0.16b, v7.16b, v8.16b + cmtst v1.16b, v7.16b, v9.16b + cmtst v2.16b, v7.16b, v10.16b + cmtst v3.16b, v7.16b, v11.16b + cmtst v4.16b, v7.16b, v12.16b + cmtst v5.16b, v7.16b, v13.16b + cmtst v6.16b, v7.16b, v14.16b + cmtst v7.16b, v7.16b, v15.16b + not v0.16b, v0.16b + not v1.16b, v1.16b + not v5.16b, v5.16b + not v6.16b, v6.16b + + subs x2, x2, #1 + stp q0, q1, [x0], #128 + stp q2, q3, [x0, #-96] + stp q4, q5, [x0, #-64] + stp q6, q7, [x0, #-32] + b.ne .Lkey_loop + + movi v7.16b, #0x63 // compose .L63 + eor v17.16b, v17.16b, v7.16b + str q17, [x0] + ret +ENDPROC(aesbs_convert_key) + + .align 4 +aesbs_encrypt8: + ldr q9, [bskey], #16 // round 0 key + ldr q8, M0SR + ldr q24, SR + + eor v10.16b, v0.16b, v9.16b // xor with round0 key + eor v11.16b, v1.16b, v9.16b + tbl v0.16b, {v10.16b}, v8.16b + eor v12.16b, v2.16b, v9.16b + tbl v1.16b, {v11.16b}, v8.16b + eor v13.16b, v3.16b, v9.16b + tbl v2.16b, {v12.16b}, v8.16b + eor v14.16b, v4.16b, v9.16b + tbl v3.16b, {v13.16b}, v8.16b + eor v15.16b, v5.16b, v9.16b + tbl v4.16b, {v14.16b}, v8.16b + eor v10.16b, v6.16b, v9.16b + tbl v5.16b, {v15.16b}, v8.16b + eor v11.16b, v7.16b, v9.16b + tbl v6.16b, {v10.16b}, v8.16b + tbl v7.16b, {v11.16b}, v8.16b + + bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 + + sub rounds, rounds, #1 + b .Lenc_sbox + +.Lenc_loop: + shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 +.Lenc_sbox: + sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ + v13, v14, v15 + subs rounds, rounds, #1 + b.cc .Lenc_done + + enc_next_rk + + mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \ + v13, v14, v15 + + add_round_key v0, v1, v2, v3, v4, v5, v6, v7 + + b.ne .Lenc_loop + ldr q24, SRM0 + b .Lenc_loop + +.Lenc_done: + ldr q12, [bskey] // last round key + + bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11 + + eor v0.16b, v0.16b, v12.16b + eor v1.16b, v1.16b, v12.16b + eor v4.16b, v4.16b, v12.16b + eor v6.16b, v6.16b, v12.16b + eor v3.16b, v3.16b, v12.16b + eor v7.16b, v7.16b, v12.16b + eor v2.16b, v2.16b, v12.16b + eor v5.16b, v5.16b, v12.16b + ret +ENDPROC(aesbs_encrypt8) + + .align 4 +aesbs_decrypt8: + lsl x9, rounds, #7 + add bskey, bskey, x9 + + ldr q9, [bskey, #-112]! // round 0 key + ldr q8, M0ISR + ldr q24, ISR + + eor v10.16b, v0.16b, v9.16b // xor with round0 key + eor v11.16b, v1.16b, v9.16b + tbl v0.16b, {v10.16b}, v8.16b + eor v12.16b, v2.16b, v9.16b + tbl v1.16b, {v11.16b}, v8.16b + eor v13.16b, v3.16b, v9.16b + tbl v2.16b, {v12.16b}, v8.16b + eor v14.16b, v4.16b, v9.16b + tbl v3.16b, {v13.16b}, v8.16b + eor v15.16b, v5.16b, v9.16b + tbl v4.16b, {v14.16b}, v8.16b + eor v10.16b, v6.16b, v9.16b + tbl v5.16b, {v15.16b}, v8.16b + eor v11.16b, v7.16b, v9.16b + tbl v6.16b, {v10.16b}, v8.16b + tbl v7.16b, {v11.16b}, v8.16b + + bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 + + sub rounds, rounds, #1 + b .Ldec_sbox + +.Ldec_loop: + shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 +.Ldec_sbox: + inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ + v13, v14, v15 + subs rounds, rounds, #1 + b.cc .Ldec_done + + dec_next_rk + + add_round_key v0, v1, v6, v4, v2, v7, v3, v5 + + inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \ + v13, v14, v15 + + b.ne .Ldec_loop + ldr q24, ISRM0 + b .Ldec_loop +.Ldec_done: + ldr q12, [bskey, #-16] // last round key + + bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11 + + eor v0.16b, v0.16b, v12.16b + eor v1.16b, v1.16b, v12.16b + eor v6.16b, v6.16b, v12.16b + eor v4.16b, v4.16b, v12.16b + eor v2.16b, v2.16b, v12.16b + eor v7.16b, v7.16b, v12.16b + eor v3.16b, v3.16b, v12.16b + eor v5.16b, v5.16b, v12.16b + ret +ENDPROC(aesbs_decrypt8) + + /* + * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + */ + .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + stp x29, x30, [sp, #-16]! + mov x29, sp + +99: mov x5, #1 + lsl x5, x5, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x5, x5, xzr, mi + + ld1 {v0.16b}, [x1], #16 + tbnz x5, #1, 0f + ld1 {v1.16b}, [x1], #16 + tbnz x5, #2, 0f + ld1 {v2.16b}, [x1], #16 + tbnz x5, #3, 0f + ld1 {v3.16b}, [x1], #16 + tbnz x5, #4, 0f + ld1 {v4.16b}, [x1], #16 + tbnz x5, #5, 0f + ld1 {v5.16b}, [x1], #16 + tbnz x5, #6, 0f + ld1 {v6.16b}, [x1], #16 + tbnz x5, #7, 0f + ld1 {v7.16b}, [x1], #16 + +0: mov bskey, x2 + mov rounds, x3 + bl \do8 + + st1 {\o0\().16b}, [x0], #16 + tbnz x5, #1, 1f + st1 {\o1\().16b}, [x0], #16 + tbnz x5, #2, 1f + st1 {\o2\().16b}, [x0], #16 + tbnz x5, #3, 1f + st1 {\o3\().16b}, [x0], #16 + tbnz x5, #4, 1f + st1 {\o4\().16b}, [x0], #16 + tbnz x5, #5, 1f + st1 {\o5\().16b}, [x0], #16 + tbnz x5, #6, 1f + st1 {\o6\().16b}, [x0], #16 + tbnz x5, #7, 1f + st1 {\o7\().16b}, [x0], #16 + + cbnz x4, 99b + +1: ldp x29, x30, [sp], #16 + ret + .endm + + .align 4 +ENTRY(aesbs_ecb_encrypt) + __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 +ENDPROC(aesbs_ecb_encrypt) + + .align 4 +ENTRY(aesbs_ecb_decrypt) + __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 +ENDPROC(aesbs_ecb_decrypt) + + /* + * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ + .align 4 +ENTRY(aesbs_cbc_decrypt) + stp x29, x30, [sp, #-16]! + mov x29, sp + +99: mov x6, #1 + lsl x6, x6, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x6, x6, xzr, mi + + ld1 {v0.16b}, [x1], #16 + mov v25.16b, v0.16b + tbnz x6, #1, 0f + ld1 {v1.16b}, [x1], #16 + mov v26.16b, v1.16b + tbnz x6, #2, 0f + ld1 {v2.16b}, [x1], #16 + mov v27.16b, v2.16b + tbnz x6, #3, 0f + ld1 {v3.16b}, [x1], #16 + mov v28.16b, v3.16b + tbnz x6, #4, 0f + ld1 {v4.16b}, [x1], #16 + mov v29.16b, v4.16b + tbnz x6, #5, 0f + ld1 {v5.16b}, [x1], #16 + mov v30.16b, v5.16b + tbnz x6, #6, 0f + ld1 {v6.16b}, [x1], #16 + mov v31.16b, v6.16b + tbnz x6, #7, 0f + ld1 {v7.16b}, [x1] + +0: mov bskey, x2 + mov rounds, x3 + bl aesbs_decrypt8 + + ld1 {v24.16b}, [x5] // load IV + + eor v1.16b, v1.16b, v25.16b + eor v6.16b, v6.16b, v26.16b + eor v4.16b, v4.16b, v27.16b + eor v2.16b, v2.16b, v28.16b + eor v7.16b, v7.16b, v29.16b + eor v0.16b, v0.16b, v24.16b + eor v3.16b, v3.16b, v30.16b + eor v5.16b, v5.16b, v31.16b + + st1 {v0.16b}, [x0], #16 + mov v24.16b, v25.16b + tbnz x6, #1, 1f + st1 {v1.16b}, [x0], #16 + mov v24.16b, v26.16b + tbnz x6, #2, 1f + st1 {v6.16b}, [x0], #16 + mov v24.16b, v27.16b + tbnz x6, #3, 1f + st1 {v4.16b}, [x0], #16 + mov v24.16b, v28.16b + tbnz x6, #4, 1f + st1 {v2.16b}, [x0], #16 + mov v24.16b, v29.16b + tbnz x6, #5, 1f + st1 {v7.16b}, [x0], #16 + mov v24.16b, v30.16b + tbnz x6, #6, 1f + st1 {v3.16b}, [x0], #16 + mov v24.16b, v31.16b + tbnz x6, #7, 1f + ld1 {v24.16b}, [x1], #16 + st1 {v5.16b}, [x0], #16 +1: st1 {v24.16b}, [x5] // store IV + + cbnz x4, 99b + + ldp x29, x30, [sp], #16 + ret +ENDPROC(aesbs_cbc_decrypt) + + .macro next_tweak, out, in, const, tmp + sshr \tmp\().2d, \in\().2d, #63 + and \tmp\().16b, \tmp\().16b, \const\().16b + add \out\().2d, \in\().2d, \in\().2d + ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 + eor \out\().16b, \out\().16b, \tmp\().16b + .endm + + .align 4 +.Lxts_mul_x: +CPU_LE( .quad 1, 0x87 ) +CPU_BE( .quad 0x87, 1 ) + + /* + * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ +__xts_crypt8: + mov x6, #1 + lsl x6, x6, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x6, x6, xzr, mi + + ld1 {v0.16b}, [x1], #16 + next_tweak v26, v25, v30, v31 + eor v0.16b, v0.16b, v25.16b + tbnz x6, #1, 0f + + ld1 {v1.16b}, [x1], #16 + next_tweak v27, v26, v30, v31 + eor v1.16b, v1.16b, v26.16b + tbnz x6, #2, 0f + + ld1 {v2.16b}, [x1], #16 + next_tweak v28, v27, v30, v31 + eor v2.16b, v2.16b, v27.16b + tbnz x6, #3, 0f + + ld1 {v3.16b}, [x1], #16 + next_tweak v29, v28, v30, v31 + eor v3.16b, v3.16b, v28.16b + tbnz x6, #4, 0f + + ld1 {v4.16b}, [x1], #16 + str q29, [sp, #16] + eor v4.16b, v4.16b, v29.16b + next_tweak v29, v29, v30, v31 + tbnz x6, #5, 0f + + ld1 {v5.16b}, [x1], #16 + str q29, [sp, #32] + eor v5.16b, v5.16b, v29.16b + next_tweak v29, v29, v30, v31 + tbnz x6, #6, 0f + + ld1 {v6.16b}, [x1], #16 + str q29, [sp, #48] + eor v6.16b, v6.16b, v29.16b + next_tweak v29, v29, v30, v31 + tbnz x6, #7, 0f + + ld1 {v7.16b}, [x1], #16 + str q29, [sp, #64] + eor v7.16b, v7.16b, v29.16b + next_tweak v29, v29, v30, v31 + +0: mov bskey, x2 + mov rounds, x3 + br x7 +ENDPROC(__xts_crypt8) + + .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + stp x29, x30, [sp, #-80]! + mov x29, sp + + ldr q30, .Lxts_mul_x + ld1 {v25.16b}, [x5] + +99: adr x7, \do8 + bl __xts_crypt8 + + ldp q16, q17, [sp, #16] + ldp q18, q19, [sp, #48] + + eor \o0\().16b, \o0\().16b, v25.16b + eor \o1\().16b, \o1\().16b, v26.16b + eor \o2\().16b, \o2\().16b, v27.16b + eor \o3\().16b, \o3\().16b, v28.16b + + st1 {\o0\().16b}, [x0], #16 + mov v25.16b, v26.16b + tbnz x6, #1, 1f + st1 {\o1\().16b}, [x0], #16 + mov v25.16b, v27.16b + tbnz x6, #2, 1f + st1 {\o2\().16b}, [x0], #16 + mov v25.16b, v28.16b + tbnz x6, #3, 1f + st1 {\o3\().16b}, [x0], #16 + mov v25.16b, v29.16b + tbnz x6, #4, 1f + + eor \o4\().16b, \o4\().16b, v16.16b + eor \o5\().16b, \o5\().16b, v17.16b + eor \o6\().16b, \o6\().16b, v18.16b + eor \o7\().16b, \o7\().16b, v19.16b + + st1 {\o4\().16b}, [x0], #16 + tbnz x6, #5, 1f + st1 {\o5\().16b}, [x0], #16 + tbnz x6, #6, 1f + st1 {\o6\().16b}, [x0], #16 + tbnz x6, #7, 1f + st1 {\o7\().16b}, [x0], #16 + + cbnz x4, 99b + +1: st1 {v25.16b}, [x5] + ldp x29, x30, [sp], #80 + ret + .endm + +ENTRY(aesbs_xts_encrypt) + __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 +ENDPROC(aesbs_xts_encrypt) + +ENTRY(aesbs_xts_decrypt) + __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 +ENDPROC(aesbs_xts_decrypt) + + .macro next_ctr, v + mov \v\().d[1], x8 + adds x8, x8, #1 + mov \v\().d[0], x7 + adc x7, x7, xzr + rev64 \v\().16b, \v\().16b + .endm + + /* + * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + * int rounds, int blocks, u8 iv[], bool final) + */ +ENTRY(aesbs_ctr_encrypt) + stp x29, x30, [sp, #-16]! + mov x29, sp + + add x4, x4, x6 // do one extra block if final + + ldp x7, x8, [x5] + ld1 {v0.16b}, [x5] +CPU_LE( rev x7, x7 ) +CPU_LE( rev x8, x8 ) + adds x8, x8, #1 + adc x7, x7, xzr + +99: mov x9, #1 + lsl x9, x9, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x9, x9, xzr, le + + next_ctr v1 + next_ctr v2 + next_ctr v3 + next_ctr v4 + next_ctr v5 + next_ctr v6 + next_ctr v7 + +0: mov bskey, x2 + mov rounds, x3 + bl aesbs_encrypt8 + + lsr x9, x9, x6 // disregard the extra block + tbnz x9, #0, 0f + + ld1 {v8.16b}, [x1], #16 + eor v0.16b, v0.16b, v8.16b + st1 {v0.16b}, [x0], #16 + tbnz x9, #1, 1f + + ld1 {v9.16b}, [x1], #16 + eor v1.16b, v1.16b, v9.16b + st1 {v1.16b}, [x0], #16 + tbnz x9, #2, 2f + + ld1 {v10.16b}, [x1], #16 + eor v4.16b, v4.16b, v10.16b + st1 {v4.16b}, [x0], #16 + tbnz x9, #3, 3f + + ld1 {v11.16b}, [x1], #16 + eor v6.16b, v6.16b, v11.16b + st1 {v6.16b}, [x0], #16 + tbnz x9, #4, 4f + + ld1 {v12.16b}, [x1], #16 + eor v3.16b, v3.16b, v12.16b + st1 {v3.16b}, [x0], #16 + tbnz x9, #5, 5f + + ld1 {v13.16b}, [x1], #16 + eor v7.16b, v7.16b, v13.16b + st1 {v7.16b}, [x0], #16 + tbnz x9, #6, 6f + + ld1 {v14.16b}, [x1], #16 + eor v2.16b, v2.16b, v14.16b + st1 {v2.16b}, [x0], #16 + tbnz x9, #7, 7f + + ld1 {v15.16b}, [x1], #16 + eor v5.16b, v5.16b, v15.16b + st1 {v5.16b}, [x0], #16 + + next_ctr v0 + cbnz x4, 99b + +0: st1 {v0.16b}, [x5] +8: ldp x29, x30, [sp], #16 + ret + + /* + * If we are handling the tail of the input (x6 == 1), return the + * final keystream block back to the caller via the IV buffer. + */ +1: cbz x6, 8b + st1 {v1.16b}, [x5] + b 8b +2: cbz x6, 8b + st1 {v4.16b}, [x5] + b 8b +3: cbz x6, 8b + st1 {v6.16b}, [x5] + b 8b +4: cbz x6, 8b + st1 {v3.16b}, [x5] + b 8b +5: cbz x6, 8b + st1 {v7.16b}, [x5] + b 8b +6: cbz x6, 8b + st1 {v2.16b}, [x5] + b 8b +7: cbz x6, 8b + st1 {v5.16b}, [x5] + b 8b +ENDPROC(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c new file mode 100644 index 000000000000..323dd76ae5f0 --- /dev/null +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -0,0 +1,420 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2016 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +MODULE_ALIAS_CRYPTO("ecb(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("ctr(aes)"); +MODULE_ALIAS_CRYPTO("xts(aes)"); + +asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds); + +asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); +asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); + +asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], bool final); + +asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); +asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); + +struct aesbs_ctx { + u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32]; + int rounds; +} __aligned(AES_BLOCK_SIZE); + +struct aesbs_cbc_ctx { + struct aesbs_ctx key; + u32 enc[AES_MAX_KEYLENGTH_U32]; +}; + +struct aesbs_xts_ctx { + struct aesbs_ctx key; + u32 twkey[AES_MAX_KEYLENGTH_U32]; +}; + +static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->rounds = 6 + key_len / 4; + + kernel_neon_begin(); + aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); + kernel_neon_end(); + + return 0; +} + +static int __ecb_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks)) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, + ctx->rounds, blocks); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_encrypt); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_decrypt); +} + +static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->key.rounds = 6 + key_len / 4; + + memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); + + kernel_neon_begin(); + aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); + kernel_neon_end(); + + return 0; +} + +static void cbc_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + + __aes_arm64_encrypt(ctx->enc, dst, src, ctx->key.rounds); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + return crypto_cbc_encrypt_walk(req, cbc_encrypt_one); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ctr_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + bool final = (walk.total % AES_BLOCK_SIZE) != 0; + + if (walk.nbytes < walk.total) { + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + final = false; + } + + aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->rk, ctx->rounds, blocks, walk.iv, final); + + if (final) { + u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + + if (dst != src) + memcpy(dst, src, walk.total % AES_BLOCK_SIZE); + crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, 0); + break; + } + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = xts_verify_key(tfm, in_key, key_len); + if (err) + return err; + + key_len /= 2; + err = crypto_aes_expand_key(&rk, in_key + key_len, key_len); + if (err) + return err; + + memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey)); + + return aesbs_setkey(tfm, in_key, key_len); +} + +static int __xts_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[])) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + __aes_arm64_encrypt(ctx->twkey, walk.iv, walk.iv, ctx->key.rounds); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, + ctx->key.rounds, blocks, walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_encrypt); +} + +static int xts_decrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_decrypt); +} + +static struct skcipher_alg aes_algs[] = { { + .base.cra_name = "__ecb(aes)", + .base.cra_driver_name = "__ecb-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, +}, { + .base.cra_name = "__cbc(aes)", + .base.cra_driver_name = "__cbc-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_cbc_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, +}, { + .base.cra_name = "__ctr(aes)", + .base.cra_driver_name = "__ctr-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-neonbs", + .base.cra_priority = 250 - 1, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base.cra_name = "__xts(aes)", + .base.cra_driver_name = "__xts-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_xts_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, +} }; + +static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; + +static void aes_exit(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) + if (aes_simd_algs[i]) + simd_skcipher_free(aes_simd_algs[i]); + + crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static int __init aes_init(void) +{ + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; + int err; + int i; + + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + + err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) + continue; + + algname = aes_algs[i].base.cra_name + 2; + drvname = aes_algs[i].base.cra_driver_name + 2; + basename = aes_algs[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aes_simd_algs[i] = simd; + } + return 0; + +unregister_simds: + aes_exit(); + return err; +} + +module_init(aes_init); +module_exit(aes_exit); From cc477bf645736739e69d31fdf715281ef0dd5f9b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jan 2017 16:41:54 +0000 Subject: [PATCH 043/142] crypto: arm/aes - replace bit-sliced OpenSSL NEON code This replaces the unwieldy generated implementation of bit-sliced AES in CBC/CTR/XTS modes that originated in the OpenSSL project with a new version that is heavily based on the OpenSSL implementation, but has a number of advantages over the old version: - it does not rely on the scalar AES cipher that also originated in the OpenSSL project and contains redundant lookup tables and key schedule generation routines (which we already have in crypto/aes_generic.) - it uses the same expanded key schedule for encryption and decryption, reducing the size of the per-key data structure by 1696 bytes - it adds an implementation of AES in ECB mode, which can be wrapped by other generic chaining mode implementations - it moves the handling of corner cases that are non critical to performance to the glue layer written in C - it was written directly in assembler rather than generated from a Perl script Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 1 + arch/arm/crypto/Makefile | 7 +- arch/arm/crypto/aes-armv4.S | 1089 ----------- arch/arm/crypto/aes-neonbs-core.S | 1021 +++++++++++ arch/arm/crypto/aes-neonbs-glue.c | 405 ++++ arch/arm/crypto/aes_glue.h | 19 - arch/arm/crypto/aesbs-core.S_shipped | 2548 -------------------------- arch/arm/crypto/aesbs-glue.c | 367 ---- arch/arm/crypto/bsaes-armv7.pl | 2471 ------------------------- 9 files changed, 1429 insertions(+), 6499 deletions(-) delete mode 100644 arch/arm/crypto/aes-armv4.S create mode 100644 arch/arm/crypto/aes-neonbs-core.S create mode 100644 arch/arm/crypto/aes-neonbs-glue.c delete mode 100644 arch/arm/crypto/aes_glue.h delete mode 100644 arch/arm/crypto/aesbs-core.S_shipped delete mode 100644 arch/arm/crypto/aesbs-glue.c delete mode 100644 arch/arm/crypto/bsaes-armv7.pl diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index f1de658c3c8f..a8fce93137fb 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -73,6 +73,7 @@ config CRYPTO_AES_ARM_BS depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_SIMD + select CRYPTO_AES_ARM help Use a faster and more secure NEON based implementation of AES in CBC, CTR and XTS modes diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 8f5de2db701c..1822c4697278 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -28,7 +28,7 @@ endif endif aes-arm-y := aes-cipher-core.o aes-cipher-glue.o -aes-arm-bs-y := aes-armv4.o aesbs-core.o aesbs-glue.o +aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o @@ -46,13 +46,10 @@ chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) -$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl - $(call cmd,perl) - $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(call cmd,perl) $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S +.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S deleted file mode 100644 index ebb9761fb572..000000000000 --- a/arch/arm/crypto/aes-armv4.S +++ /dev/null @@ -1,1089 +0,0 @@ -#define __ARM_ARCH__ __LINUX_ARM_ARCH__ -@ ==================================================================== -@ Written by Andy Polyakov for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see http://www.openssl.org/~appro/cryptogams/. -@ ==================================================================== - -@ AES for ARMv4 - -@ January 2007. -@ -@ Code uses single 1K S-box and is >2 times faster than code generated -@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which -@ allows to merge logical or arithmetic operation with shift or rotate -@ in one instruction and emit combined result every cycle. The module -@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit -@ key [on single-issue Xscale PXA250 core]. - -@ May 2007. -@ -@ AES_set_[en|de]crypt_key is added. - -@ July 2010. -@ -@ Rescheduling for dual-issue pipeline resulted in 12% improvement on -@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. - -@ February 2011. -@ -@ Profiler-assisted and platform-specific optimization resulted in 16% -@ improvement on Cortex A8 core and ~21.5 cycles per byte. - -@ A little glue here to select the correct code below for the ARM CPU -@ that is being targetted. - -#include -#include - -.text - -.type AES_Te,%object -.align 5 -AES_Te: -.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d -.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 -.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d -.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a -.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 -.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b -.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea -.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b -.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a -.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f -.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 -.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f -.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e -.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 -.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d -.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f -.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e -.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb -.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce -.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 -.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c -.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed -.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b -.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a -.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 -.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 -.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 -.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 -.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a -.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 -.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 -.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d -.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f -.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 -.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 -.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 -.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f -.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 -.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c -.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 -.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e -.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 -.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 -.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b -.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 -.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 -.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 -.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 -.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 -.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 -.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 -.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 -.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa -.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 -.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 -.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 -.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 -.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 -.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 -.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a -.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 -.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 -.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 -.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a -@ Te4[256] -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -@ rcon[] -.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 -.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 -.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 -.size AES_Te,.-AES_Te - -@ void AES_encrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.align 5 -ENTRY(AES_encrypt) - adr r3,AES_encrypt - stmdb sp!,{r1,r4-r12,lr} - mov r12,r0 @ inp - mov r11,r2 - sub r10,r3,#AES_encrypt-AES_Te @ Te -#if __ARM_ARCH__<7 - ldrb r0,[r12,#3] @ load input data in endian-neutral - ldrb r4,[r12,#2] @ manner... - ldrb r5,[r12,#1] - ldrb r6,[r12,#0] - orr r0,r0,r4,lsl#8 - ldrb r1,[r12,#7] - orr r0,r0,r5,lsl#16 - ldrb r4,[r12,#6] - orr r0,r0,r6,lsl#24 - ldrb r5,[r12,#5] - ldrb r6,[r12,#4] - orr r1,r1,r4,lsl#8 - ldrb r2,[r12,#11] - orr r1,r1,r5,lsl#16 - ldrb r4,[r12,#10] - orr r1,r1,r6,lsl#24 - ldrb r5,[r12,#9] - ldrb r6,[r12,#8] - orr r2,r2,r4,lsl#8 - ldrb r3,[r12,#15] - orr r2,r2,r5,lsl#16 - ldrb r4,[r12,#14] - orr r2,r2,r6,lsl#24 - ldrb r5,[r12,#13] - ldrb r6,[r12,#12] - orr r3,r3,r4,lsl#8 - orr r3,r3,r5,lsl#16 - orr r3,r3,r6,lsl#24 -#else - ldr r0,[r12,#0] - ldr r1,[r12,#4] - ldr r2,[r12,#8] - ldr r3,[r12,#12] -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif -#endif - bl _armv4_AES_encrypt - - ldr r12,[sp],#4 @ pop out -#if __ARM_ARCH__>=7 -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif - str r0,[r12,#0] - str r1,[r12,#4] - str r2,[r12,#8] - str r3,[r12,#12] -#else - mov r4,r0,lsr#24 @ write output in endian-neutral - mov r5,r0,lsr#16 @ manner... - mov r6,r0,lsr#8 - strb r4,[r12,#0] - strb r5,[r12,#1] - mov r4,r1,lsr#24 - strb r6,[r12,#2] - mov r5,r1,lsr#16 - strb r0,[r12,#3] - mov r6,r1,lsr#8 - strb r4,[r12,#4] - strb r5,[r12,#5] - mov r4,r2,lsr#24 - strb r6,[r12,#6] - mov r5,r2,lsr#16 - strb r1,[r12,#7] - mov r6,r2,lsr#8 - strb r4,[r12,#8] - strb r5,[r12,#9] - mov r4,r3,lsr#24 - strb r6,[r12,#10] - mov r5,r3,lsr#16 - strb r2,[r12,#11] - mov r6,r3,lsr#8 - strb r4,[r12,#12] - strb r5,[r12,#13] - strb r6,[r12,#14] - strb r3,[r12,#15] -#endif - ldmia sp!,{r4-r12,pc} -ENDPROC(AES_encrypt) - -.type _armv4_AES_encrypt,%function -.align 2 -_armv4_AES_encrypt: - str lr,[sp,#-4]! @ push lr - ldmia r11!,{r4-r7} - eor r0,r0,r4 - ldr r12,[r11,#240-16] - eor r1,r1,r5 - eor r2,r2,r6 - eor r3,r3,r7 - sub r12,r12,#1 - mov lr,#255 - - and r7,lr,r0 - and r8,lr,r0,lsr#8 - and r9,lr,r0,lsr#16 - mov r0,r0,lsr#24 -.Lenc_loop: - ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] - and r7,lr,r1,lsr#16 @ i0 - ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] - and r8,lr,r1 - ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] - and r9,lr,r1,lsr#8 - ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] - mov r1,r1,lsr#24 - - ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] - ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] - ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] - eor r0,r0,r7,ror#8 - ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] - and r7,lr,r2,lsr#8 @ i0 - eor r5,r5,r8,ror#8 - and r8,lr,r2,lsr#16 @ i1 - eor r6,r6,r9,ror#8 - and r9,lr,r2 - ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] - eor r1,r1,r4,ror#24 - ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] - mov r2,r2,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] - eor r0,r0,r7,ror#16 - ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] - and r7,lr,r3 @ i0 - eor r1,r1,r8,ror#8 - and r8,lr,r3,lsr#8 @ i1 - eor r6,r6,r9,ror#16 - and r9,lr,r3,lsr#16 @ i2 - ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] - eor r2,r2,r5,ror#16 - ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] - mov r3,r3,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] - eor r0,r0,r7,ror#24 - ldr r7,[r11],#16 - eor r1,r1,r8,ror#16 - ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] - eor r2,r2,r9,ror#8 - ldr r4,[r11,#-12] - eor r3,r3,r6,ror#8 - - ldr r5,[r11,#-8] - eor r0,r0,r7 - ldr r6,[r11,#-4] - and r7,lr,r0 - eor r1,r1,r4 - and r8,lr,r0,lsr#8 - eor r2,r2,r5 - and r9,lr,r0,lsr#16 - eor r3,r3,r6 - mov r0,r0,lsr#24 - - subs r12,r12,#1 - bne .Lenc_loop - - add r10,r10,#2 - - ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] - and r7,lr,r1,lsr#16 @ i0 - ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] - and r8,lr,r1 - ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] - and r9,lr,r1,lsr#8 - ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] - mov r1,r1,lsr#24 - - ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] - ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] - ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] - eor r0,r7,r0,lsl#8 - ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] - and r7,lr,r2,lsr#8 @ i0 - eor r5,r8,r5,lsl#8 - and r8,lr,r2,lsr#16 @ i1 - eor r6,r9,r6,lsl#8 - and r9,lr,r2 - ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] - eor r1,r4,r1,lsl#24 - ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] - mov r2,r2,lsr#24 - - ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] - eor r0,r7,r0,lsl#8 - ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] - and r7,lr,r3 @ i0 - eor r1,r1,r8,lsl#16 - and r8,lr,r3,lsr#8 @ i1 - eor r6,r9,r6,lsl#8 - and r9,lr,r3,lsr#16 @ i2 - ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] - eor r2,r5,r2,lsl#24 - ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] - mov r3,r3,lsr#24 - - ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] - eor r0,r7,r0,lsl#8 - ldr r7,[r11,#0] - ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] - eor r1,r1,r8,lsl#8 - ldr r4,[r11,#4] - eor r2,r2,r9,lsl#16 - ldr r5,[r11,#8] - eor r3,r6,r3,lsl#24 - ldr r6,[r11,#12] - - eor r0,r0,r7 - eor r1,r1,r4 - eor r2,r2,r5 - eor r3,r3,r6 - - sub r10,r10,#2 - ldr pc,[sp],#4 @ pop and return -.size _armv4_AES_encrypt,.-_armv4_AES_encrypt - -.align 5 -ENTRY(private_AES_set_encrypt_key) -_armv4_AES_set_encrypt_key: - adr r3,_armv4_AES_set_encrypt_key - teq r0,#0 - moveq r0,#-1 - beq .Labrt - teq r2,#0 - moveq r0,#-1 - beq .Labrt - - teq r1,#128 - beq .Lok - teq r1,#192 - beq .Lok - teq r1,#256 - movne r0,#-1 - bne .Labrt - -.Lok: stmdb sp!,{r4-r12,lr} - sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 - - mov r12,r0 @ inp - mov lr,r1 @ bits - mov r11,r2 @ key - -#if __ARM_ARCH__<7 - ldrb r0,[r12,#3] @ load input data in endian-neutral - ldrb r4,[r12,#2] @ manner... - ldrb r5,[r12,#1] - ldrb r6,[r12,#0] - orr r0,r0,r4,lsl#8 - ldrb r1,[r12,#7] - orr r0,r0,r5,lsl#16 - ldrb r4,[r12,#6] - orr r0,r0,r6,lsl#24 - ldrb r5,[r12,#5] - ldrb r6,[r12,#4] - orr r1,r1,r4,lsl#8 - ldrb r2,[r12,#11] - orr r1,r1,r5,lsl#16 - ldrb r4,[r12,#10] - orr r1,r1,r6,lsl#24 - ldrb r5,[r12,#9] - ldrb r6,[r12,#8] - orr r2,r2,r4,lsl#8 - ldrb r3,[r12,#15] - orr r2,r2,r5,lsl#16 - ldrb r4,[r12,#14] - orr r2,r2,r6,lsl#24 - ldrb r5,[r12,#13] - ldrb r6,[r12,#12] - orr r3,r3,r4,lsl#8 - str r0,[r11],#16 - orr r3,r3,r5,lsl#16 - str r1,[r11,#-12] - orr r3,r3,r6,lsl#24 - str r2,[r11,#-8] - str r3,[r11,#-4] -#else - ldr r0,[r12,#0] - ldr r1,[r12,#4] - ldr r2,[r12,#8] - ldr r3,[r12,#12] -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif - str r0,[r11],#16 - str r1,[r11,#-12] - str r2,[r11,#-8] - str r3,[r11,#-4] -#endif - - teq lr,#128 - bne .Lnot128 - mov r12,#10 - str r12,[r11,#240-16] - add r6,r10,#256 @ rcon - mov lr,#255 - -.L128_loop: - and r5,lr,r3,lsr#24 - and r7,lr,r3,lsr#16 - ldrb r5,[r10,r5] - and r8,lr,r3,lsr#8 - ldrb r7,[r10,r7] - and r9,lr,r3 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#24 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r6],#4 @ rcon[i++] - orr r5,r5,r9,lsl#8 - eor r5,r5,r4 - eor r0,r0,r5 @ rk[4]=rk[0]^... - eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] - str r0,[r11],#16 - eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] - str r1,[r11,#-12] - eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] - str r2,[r11,#-8] - subs r12,r12,#1 - str r3,[r11,#-4] - bne .L128_loop - sub r2,r11,#176 - b .Ldone - -.Lnot128: -#if __ARM_ARCH__<7 - ldrb r8,[r12,#19] - ldrb r4,[r12,#18] - ldrb r5,[r12,#17] - ldrb r6,[r12,#16] - orr r8,r8,r4,lsl#8 - ldrb r9,[r12,#23] - orr r8,r8,r5,lsl#16 - ldrb r4,[r12,#22] - orr r8,r8,r6,lsl#24 - ldrb r5,[r12,#21] - ldrb r6,[r12,#20] - orr r9,r9,r4,lsl#8 - orr r9,r9,r5,lsl#16 - str r8,[r11],#8 - orr r9,r9,r6,lsl#24 - str r9,[r11,#-4] -#else - ldr r8,[r12,#16] - ldr r9,[r12,#20] -#ifdef __ARMEL__ - rev r8,r8 - rev r9,r9 -#endif - str r8,[r11],#8 - str r9,[r11,#-4] -#endif - - teq lr,#192 - bne .Lnot192 - mov r12,#12 - str r12,[r11,#240-24] - add r6,r10,#256 @ rcon - mov lr,#255 - mov r12,#8 - -.L192_loop: - and r5,lr,r9,lsr#24 - and r7,lr,r9,lsr#16 - ldrb r5,[r10,r5] - and r8,lr,r9,lsr#8 - ldrb r7,[r10,r7] - and r9,lr,r9 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#24 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r6],#4 @ rcon[i++] - orr r5,r5,r9,lsl#8 - eor r9,r5,r4 - eor r0,r0,r9 @ rk[6]=rk[0]^... - eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] - str r0,[r11],#24 - eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] - str r1,[r11,#-20] - eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] - str r2,[r11,#-16] - subs r12,r12,#1 - str r3,[r11,#-12] - subeq r2,r11,#216 - beq .Ldone - - ldr r7,[r11,#-32] - ldr r8,[r11,#-28] - eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] - eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] - str r7,[r11,#-8] - str r9,[r11,#-4] - b .L192_loop - -.Lnot192: -#if __ARM_ARCH__<7 - ldrb r8,[r12,#27] - ldrb r4,[r12,#26] - ldrb r5,[r12,#25] - ldrb r6,[r12,#24] - orr r8,r8,r4,lsl#8 - ldrb r9,[r12,#31] - orr r8,r8,r5,lsl#16 - ldrb r4,[r12,#30] - orr r8,r8,r6,lsl#24 - ldrb r5,[r12,#29] - ldrb r6,[r12,#28] - orr r9,r9,r4,lsl#8 - orr r9,r9,r5,lsl#16 - str r8,[r11],#8 - orr r9,r9,r6,lsl#24 - str r9,[r11,#-4] -#else - ldr r8,[r12,#24] - ldr r9,[r12,#28] -#ifdef __ARMEL__ - rev r8,r8 - rev r9,r9 -#endif - str r8,[r11],#8 - str r9,[r11,#-4] -#endif - - mov r12,#14 - str r12,[r11,#240-32] - add r6,r10,#256 @ rcon - mov lr,#255 - mov r12,#7 - -.L256_loop: - and r5,lr,r9,lsr#24 - and r7,lr,r9,lsr#16 - ldrb r5,[r10,r5] - and r8,lr,r9,lsr#8 - ldrb r7,[r10,r7] - and r9,lr,r9 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#24 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r6],#4 @ rcon[i++] - orr r5,r5,r9,lsl#8 - eor r9,r5,r4 - eor r0,r0,r9 @ rk[8]=rk[0]^... - eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] - str r0,[r11],#32 - eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] - str r1,[r11,#-28] - eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] - str r2,[r11,#-24] - subs r12,r12,#1 - str r3,[r11,#-20] - subeq r2,r11,#256 - beq .Ldone - - and r5,lr,r3 - and r7,lr,r3,lsr#8 - ldrb r5,[r10,r5] - and r8,lr,r3,lsr#16 - ldrb r7,[r10,r7] - and r9,lr,r3,lsr#24 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#8 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r11,#-48] - orr r5,r5,r9,lsl#24 - - ldr r7,[r11,#-44] - ldr r8,[r11,#-40] - eor r4,r4,r5 @ rk[12]=rk[4]^... - ldr r9,[r11,#-36] - eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] - str r4,[r11,#-16] - eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] - str r7,[r11,#-12] - eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] - str r8,[r11,#-8] - str r9,[r11,#-4] - b .L256_loop - -.Ldone: mov r0,#0 - ldmia sp!,{r4-r12,lr} -.Labrt: ret lr -ENDPROC(private_AES_set_encrypt_key) - -.align 5 -ENTRY(private_AES_set_decrypt_key) - str lr,[sp,#-4]! @ push lr -#if 0 - @ kernel does both of these in setkey so optimise this bit out by - @ expecting the key to already have the enc_key work done (see aes_glue.c) - bl _armv4_AES_set_encrypt_key -#else - mov r0,#0 -#endif - teq r0,#0 - ldrne lr,[sp],#4 @ pop lr - bne .Labrt - - stmdb sp!,{r4-r12} - - ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, - mov r11,r2 @ which is AES_KEY *key - mov r7,r2 - add r8,r2,r12,lsl#4 - -.Linv: ldr r0,[r7] - ldr r1,[r7,#4] - ldr r2,[r7,#8] - ldr r3,[r7,#12] - ldr r4,[r8] - ldr r5,[r8,#4] - ldr r6,[r8,#8] - ldr r9,[r8,#12] - str r0,[r8],#-16 - str r1,[r8,#16+4] - str r2,[r8,#16+8] - str r3,[r8,#16+12] - str r4,[r7],#16 - str r5,[r7,#-12] - str r6,[r7,#-8] - str r9,[r7,#-4] - teq r7,r8 - bne .Linv - ldr r0,[r11,#16]! @ prefetch tp1 - mov r7,#0x80 - mov r8,#0x1b - orr r7,r7,#0x8000 - orr r8,r8,#0x1b00 - orr r7,r7,r7,lsl#16 - orr r8,r8,r8,lsl#16 - sub r12,r12,#1 - mvn r9,r7 - mov r12,r12,lsl#2 @ (rounds-1)*4 - -.Lmix: and r4,r0,r7 - and r1,r0,r9 - sub r4,r4,r4,lsr#7 - and r4,r4,r8 - eor r1,r4,r1,lsl#1 @ tp2 - - and r4,r1,r7 - and r2,r1,r9 - sub r4,r4,r4,lsr#7 - and r4,r4,r8 - eor r2,r4,r2,lsl#1 @ tp4 - - and r4,r2,r7 - and r3,r2,r9 - sub r4,r4,r4,lsr#7 - and r4,r4,r8 - eor r3,r4,r3,lsl#1 @ tp8 - - eor r4,r1,r2 - eor r5,r0,r3 @ tp9 - eor r4,r4,r3 @ tpe - eor r4,r4,r1,ror#24 - eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) - eor r4,r4,r2,ror#16 - eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) - eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) - - ldr r0,[r11,#4] @ prefetch tp1 - str r4,[r11],#4 - subs r12,r12,#1 - bne .Lmix - - mov r0,#0 - ldmia sp!,{r4-r12,pc} -ENDPROC(private_AES_set_decrypt_key) - -.type AES_Td,%object -.align 5 -AES_Td: -.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 -.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 -.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 -.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f -.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 -.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 -.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da -.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 -.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd -.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 -.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 -.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 -.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 -.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a -.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 -.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c -.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 -.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a -.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 -.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 -.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 -.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff -.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 -.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb -.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 -.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e -.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 -.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a -.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e -.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 -.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d -.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 -.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd -.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 -.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 -.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 -.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d -.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 -.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 -.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef -.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 -.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 -.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 -.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 -.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 -.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b -.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 -.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 -.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 -.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 -.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 -.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f -.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df -.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f -.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e -.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 -.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 -.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c -.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf -.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 -.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f -.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 -.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 -.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 -@ Td4[256] -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.size AES_Td,.-AES_Td - -@ void AES_decrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.align 5 -ENTRY(AES_decrypt) - adr r3,AES_decrypt - stmdb sp!,{r1,r4-r12,lr} - mov r12,r0 @ inp - mov r11,r2 - sub r10,r3,#AES_decrypt-AES_Td @ Td -#if __ARM_ARCH__<7 - ldrb r0,[r12,#3] @ load input data in endian-neutral - ldrb r4,[r12,#2] @ manner... - ldrb r5,[r12,#1] - ldrb r6,[r12,#0] - orr r0,r0,r4,lsl#8 - ldrb r1,[r12,#7] - orr r0,r0,r5,lsl#16 - ldrb r4,[r12,#6] - orr r0,r0,r6,lsl#24 - ldrb r5,[r12,#5] - ldrb r6,[r12,#4] - orr r1,r1,r4,lsl#8 - ldrb r2,[r12,#11] - orr r1,r1,r5,lsl#16 - ldrb r4,[r12,#10] - orr r1,r1,r6,lsl#24 - ldrb r5,[r12,#9] - ldrb r6,[r12,#8] - orr r2,r2,r4,lsl#8 - ldrb r3,[r12,#15] - orr r2,r2,r5,lsl#16 - ldrb r4,[r12,#14] - orr r2,r2,r6,lsl#24 - ldrb r5,[r12,#13] - ldrb r6,[r12,#12] - orr r3,r3,r4,lsl#8 - orr r3,r3,r5,lsl#16 - orr r3,r3,r6,lsl#24 -#else - ldr r0,[r12,#0] - ldr r1,[r12,#4] - ldr r2,[r12,#8] - ldr r3,[r12,#12] -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif -#endif - bl _armv4_AES_decrypt - - ldr r12,[sp],#4 @ pop out -#if __ARM_ARCH__>=7 -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif - str r0,[r12,#0] - str r1,[r12,#4] - str r2,[r12,#8] - str r3,[r12,#12] -#else - mov r4,r0,lsr#24 @ write output in endian-neutral - mov r5,r0,lsr#16 @ manner... - mov r6,r0,lsr#8 - strb r4,[r12,#0] - strb r5,[r12,#1] - mov r4,r1,lsr#24 - strb r6,[r12,#2] - mov r5,r1,lsr#16 - strb r0,[r12,#3] - mov r6,r1,lsr#8 - strb r4,[r12,#4] - strb r5,[r12,#5] - mov r4,r2,lsr#24 - strb r6,[r12,#6] - mov r5,r2,lsr#16 - strb r1,[r12,#7] - mov r6,r2,lsr#8 - strb r4,[r12,#8] - strb r5,[r12,#9] - mov r4,r3,lsr#24 - strb r6,[r12,#10] - mov r5,r3,lsr#16 - strb r2,[r12,#11] - mov r6,r3,lsr#8 - strb r4,[r12,#12] - strb r5,[r12,#13] - strb r6,[r12,#14] - strb r3,[r12,#15] -#endif - ldmia sp!,{r4-r12,pc} -ENDPROC(AES_decrypt) - -.type _armv4_AES_decrypt,%function -.align 2 -_armv4_AES_decrypt: - str lr,[sp,#-4]! @ push lr - ldmia r11!,{r4-r7} - eor r0,r0,r4 - ldr r12,[r11,#240-16] - eor r1,r1,r5 - eor r2,r2,r6 - eor r3,r3,r7 - sub r12,r12,#1 - mov lr,#255 - - and r7,lr,r0,lsr#16 - and r8,lr,r0,lsr#8 - and r9,lr,r0 - mov r0,r0,lsr#24 -.Ldec_loop: - ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] - and r7,lr,r1 @ i0 - ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] - and r8,lr,r1,lsr#16 - ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] - and r9,lr,r1,lsr#8 - ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] - mov r1,r1,lsr#24 - - ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] - ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] - ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] - eor r0,r0,r7,ror#24 - ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] - and r7,lr,r2,lsr#8 @ i0 - eor r5,r8,r5,ror#8 - and r8,lr,r2 @ i1 - eor r6,r9,r6,ror#8 - and r9,lr,r2,lsr#16 - ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] - eor r1,r1,r4,ror#8 - ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] - mov r2,r2,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] - eor r0,r0,r7,ror#16 - ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] - and r7,lr,r3,lsr#16 @ i0 - eor r1,r1,r8,ror#24 - and r8,lr,r3,lsr#8 @ i1 - eor r6,r9,r6,ror#8 - and r9,lr,r3 @ i2 - ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] - eor r2,r2,r5,ror#8 - ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] - mov r3,r3,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] - eor r0,r0,r7,ror#8 - ldr r7,[r11],#16 - eor r1,r1,r8,ror#16 - ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] - eor r2,r2,r9,ror#24 - - ldr r4,[r11,#-12] - eor r0,r0,r7 - ldr r5,[r11,#-8] - eor r3,r3,r6,ror#8 - ldr r6,[r11,#-4] - and r7,lr,r0,lsr#16 - eor r1,r1,r4 - and r8,lr,r0,lsr#8 - eor r2,r2,r5 - and r9,lr,r0 - eor r3,r3,r6 - mov r0,r0,lsr#24 - - subs r12,r12,#1 - bne .Ldec_loop - - add r10,r10,#1024 - - ldr r5,[r10,#0] @ prefetch Td4 - ldr r6,[r10,#32] - ldr r4,[r10,#64] - ldr r5,[r10,#96] - ldr r6,[r10,#128] - ldr r4,[r10,#160] - ldr r5,[r10,#192] - ldr r6,[r10,#224] - - ldrb r0,[r10,r0] @ Td4[s0>>24] - ldrb r4,[r10,r7] @ Td4[s0>>16] - and r7,lr,r1 @ i0 - ldrb r5,[r10,r8] @ Td4[s0>>8] - and r8,lr,r1,lsr#16 - ldrb r6,[r10,r9] @ Td4[s0>>0] - and r9,lr,r1,lsr#8 - - ldrb r7,[r10,r7] @ Td4[s1>>0] - ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24] - THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24] - THUMB( ldrb r1,[r1] ) - ldrb r8,[r10,r8] @ Td4[s1>>16] - eor r0,r7,r0,lsl#24 - ldrb r9,[r10,r9] @ Td4[s1>>8] - eor r1,r4,r1,lsl#8 - and r7,lr,r2,lsr#8 @ i0 - eor r5,r5,r8,lsl#8 - and r8,lr,r2 @ i1 - ldrb r7,[r10,r7] @ Td4[s2>>8] - eor r6,r6,r9,lsl#8 - ldrb r8,[r10,r8] @ Td4[s2>>0] - and r9,lr,r2,lsr#16 - - ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24] - THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24] - THUMB( ldrb r2,[r2] ) - eor r0,r0,r7,lsl#8 - ldrb r9,[r10,r9] @ Td4[s2>>16] - eor r1,r8,r1,lsl#16 - and r7,lr,r3,lsr#16 @ i0 - eor r2,r5,r2,lsl#16 - and r8,lr,r3,lsr#8 @ i1 - ldrb r7,[r10,r7] @ Td4[s3>>16] - eor r6,r6,r9,lsl#16 - ldrb r8,[r10,r8] @ Td4[s3>>8] - and r9,lr,r3 @ i2 - - ldrb r9,[r10,r9] @ Td4[s3>>0] - ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24] - THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24] - THUMB( ldrb r3,[r3] ) - eor r0,r0,r7,lsl#16 - ldr r7,[r11,#0] - eor r1,r1,r8,lsl#8 - ldr r4,[r11,#4] - eor r2,r9,r2,lsl#8 - ldr r5,[r11,#8] - eor r3,r6,r3,lsl#24 - ldr r6,[r11,#12] - - eor r0,r0,r7 - eor r1,r1,r4 - eor r2,r2,r5 - eor r3,r3,r6 - - sub r10,r10,#1024 - ldr pc,[sp],#4 @ pop and return -.size _armv4_AES_decrypt,.-_armv4_AES_decrypt -.asciz "AES for ARMv4, CRYPTOGAMS by " -.align 2 diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S new file mode 100644 index 000000000000..c9477044fbba --- /dev/null +++ b/arch/arm/crypto/aes-neonbs-core.S @@ -0,0 +1,1021 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * The algorithm implemented here is described in detail by the paper + * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and + * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) + * + * This implementation is based primarily on the OpenSSL implementation + * for 32-bit ARM written by Andy Polyakov + */ + +#include +#include + + .text + .fpu neon + + rounds .req ip + bskey .req r4 + + q0l .req d0 + q0h .req d1 + q1l .req d2 + q1h .req d3 + q2l .req d4 + q2h .req d5 + q3l .req d6 + q3h .req d7 + q4l .req d8 + q4h .req d9 + q5l .req d10 + q5h .req d11 + q6l .req d12 + q6h .req d13 + q7l .req d14 + q7h .req d15 + q8l .req d16 + q8h .req d17 + q9l .req d18 + q9h .req d19 + q10l .req d20 + q10h .req d21 + q11l .req d22 + q11h .req d23 + q12l .req d24 + q12h .req d25 + q13l .req d26 + q13h .req d27 + q14l .req d28 + q14h .req d29 + q15l .req d30 + q15h .req d31 + + .macro __tbl, out, tbl, in, tmp + .ifc \out, \tbl + .ifb \tmp + .error __tbl needs temp register if out == tbl + .endif + vmov \tmp, \out + .endif + vtbl.8 \out\()l, {\tbl}, \in\()l + .ifc \out, \tbl + vtbl.8 \out\()h, {\tmp}, \in\()h + .else + vtbl.8 \out\()h, {\tbl}, \in\()h + .endif + .endm + + .macro __ldr, out, sym + vldr \out\()l, \sym + vldr \out\()h, \sym + 8 + .endm + + .macro __adr, reg, lbl + adr \reg, \lbl +THUMB( orr \reg, \reg, #1 ) + .endm + + .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + veor \b2, \b2, \b1 + veor \b5, \b5, \b6 + veor \b3, \b3, \b0 + veor \b6, \b6, \b2 + veor \b5, \b5, \b0 + veor \b6, \b6, \b3 + veor \b3, \b3, \b7 + veor \b7, \b7, \b5 + veor \b3, \b3, \b4 + veor \b4, \b4, \b5 + veor \b2, \b2, \b7 + veor \b3, \b3, \b1 + veor \b1, \b1, \b5 + .endm + + .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + veor \b0, \b0, \b6 + veor \b1, \b1, \b4 + veor \b4, \b4, \b6 + veor \b2, \b2, \b0 + veor \b6, \b6, \b1 + veor \b1, \b1, \b5 + veor \b5, \b5, \b3 + veor \b3, \b3, \b7 + veor \b7, \b7, \b5 + veor \b2, \b2, \b5 + veor \b4, \b4, \b7 + .endm + + .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 + veor \b1, \b1, \b7 + veor \b4, \b4, \b7 + veor \b7, \b7, \b5 + veor \b1, \b1, \b3 + veor \b2, \b2, \b5 + veor \b3, \b3, \b7 + veor \b6, \b6, \b1 + veor \b2, \b2, \b0 + veor \b5, \b5, \b3 + veor \b4, \b4, \b6 + veor \b0, \b0, \b6 + veor \b1, \b1, \b4 + .endm + + .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 + veor \b1, \b1, \b5 + veor \b2, \b2, \b7 + veor \b3, \b3, \b1 + veor \b4, \b4, \b5 + veor \b7, \b7, \b5 + veor \b3, \b3, \b4 + veor \b5, \b5, \b0 + veor \b3, \b3, \b7 + veor \b6, \b6, \b2 + veor \b2, \b2, \b1 + veor \b6, \b6, \b3 + veor \b3, \b3, \b0 + veor \b5, \b5, \b6 + .endm + + .macro mul_gf4, x0, x1, y0, y1, t0, t1 + veor \t0, \y0, \y1 + vand \t0, \t0, \x0 + veor \x0, \x0, \x1 + vand \t1, \x1, \y0 + vand \x0, \x0, \y1 + veor \x1, \t1, \t0 + veor \x0, \x0, \t1 + .endm + + .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 + veor \t0, \y0, \y1 + veor \t1, \y2, \y3 + vand \t0, \t0, \x0 + vand \t1, \t1, \x2 + veor \x0, \x0, \x1 + veor \x2, \x2, \x3 + vand \x1, \x1, \y0 + vand \x3, \x3, \y2 + vand \x0, \x0, \y1 + vand \x2, \x2, \y3 + veor \x1, \x1, \x0 + veor \x2, \x2, \x3 + veor \x0, \x0, \t0 + veor \x3, \x3, \t1 + .endm + + .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, t0, t1, t2, t3 + veor \t0, \x0, \x2 + veor \t1, \x1, \x3 + mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 + veor \y0, \y0, \y2 + veor \y1, \y1, \y3 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 + veor \x0, \x0, \t0 + veor \x2, \x2, \t0 + veor \x1, \x1, \t1 + veor \x3, \x3, \t1 + veor \t0, \x4, \x6 + veor \t1, \x5, \x7 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 + veor \y0, \y0, \y2 + veor \y1, \y1, \y3 + mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 + veor \x4, \x4, \t0 + veor \x6, \x6, \t0 + veor \x5, \x5, \t1 + veor \x7, \x7, \t1 + .endm + + .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + veor \t3, \x4, \x6 + veor \t0, \x5, \x7 + veor \t1, \x1, \x3 + veor \s1, \x7, \x6 + veor \s0, \x0, \x2 + veor \s3, \t3, \t0 + vorr \t2, \t0, \t1 + vand \s2, \t3, \s0 + vorr \t3, \t3, \s0 + veor \s0, \s0, \t1 + vand \t0, \t0, \t1 + veor \t1, \x3, \x2 + vand \s3, \s3, \s0 + vand \s1, \s1, \t1 + veor \t1, \x4, \x5 + veor \s0, \x1, \x0 + veor \t3, \t3, \s1 + veor \t2, \t2, \s1 + vand \s1, \t1, \s0 + vorr \t1, \t1, \s0 + veor \t3, \t3, \s3 + veor \t0, \t0, \s1 + veor \t2, \t2, \s2 + veor \t1, \t1, \s3 + veor \t0, \t0, \s2 + vand \s0, \x7, \x3 + veor \t1, \t1, \s2 + vand \s1, \x6, \x2 + vand \s2, \x5, \x1 + vorr \s3, \x4, \x0 + veor \t3, \t3, \s0 + veor \t1, \t1, \s2 + veor \s0, \t0, \s3 + veor \t2, \t2, \s1 + vand \s2, \t3, \t1 + veor \s1, \t2, \s2 + veor \s3, \s0, \s2 + vbsl \s1, \t1, \s0 + vmvn \t0, \s0 + vbsl \s0, \s1, \s3 + vbsl \t0, \s1, \s3 + vbsl \s3, \t3, \t2 + veor \t3, \t3, \t2 + vand \s2, \s0, \s3 + veor \t1, \t1, \t0 + veor \s2, \s2, \t3 + mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + .endm + + .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 + inv_gf256 \b6, \b5, \b0, \b3, \b7, \b1, \b4, \b2, \ + \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + out_bs_ch \b7, \b1, \b4, \b2, \b6, \b5, \b0, \b3 + .endm + + .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + inv_in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 + inv_gf256 \b5, \b1, \b2, \b6, \b3, \b7, \b0, \b4, \ + \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + inv_out_bs_ch \b3, \b7, \b0, \b4, \b5, \b1, \b2, \b6 + .endm + + .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, mask + vld1.8 {\t0-\t1}, [bskey, :256]! + veor \t0, \t0, \x0 + vld1.8 {\t2-\t3}, [bskey, :256]! + veor \t1, \t1, \x1 + __tbl \x0, \t0, \mask + veor \t2, \t2, \x2 + __tbl \x1, \t1, \mask + vld1.8 {\t0-\t1}, [bskey, :256]! + veor \t3, \t3, \x3 + __tbl \x2, \t2, \mask + __tbl \x3, \t3, \mask + vld1.8 {\t2-\t3}, [bskey, :256]! + veor \t0, \t0, \x4 + veor \t1, \t1, \x5 + __tbl \x4, \t0, \mask + veor \t2, \t2, \x6 + __tbl \x5, \t1, \mask + veor \t3, \t3, \x7 + __tbl \x6, \t2, \mask + __tbl \x7, \t3, \mask + .endm + + .macro inv_shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, mask + __tbl \x0, \x0, \mask, \t0 + __tbl \x1, \x1, \mask, \t1 + __tbl \x2, \x2, \mask, \t2 + __tbl \x3, \x3, \mask, \t3 + __tbl \x4, \x4, \mask, \t0 + __tbl \x5, \x5, \mask, \t1 + __tbl \x6, \x6, \mask, \t2 + __tbl \x7, \x7, \mask, \t3 + .endm + + .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7, inv + vext.8 \t0, \x0, \x0, #12 + vext.8 \t1, \x1, \x1, #12 + veor \x0, \x0, \t0 + vext.8 \t2, \x2, \x2, #12 + veor \x1, \x1, \t1 + vext.8 \t3, \x3, \x3, #12 + veor \x2, \x2, \t2 + vext.8 \t4, \x4, \x4, #12 + veor \x3, \x3, \t3 + vext.8 \t5, \x5, \x5, #12 + veor \x4, \x4, \t4 + vext.8 \t6, \x6, \x6, #12 + veor \x5, \x5, \t5 + vext.8 \t7, \x7, \x7, #12 + veor \x6, \x6, \t6 + veor \t1, \t1, \x0 + veor.8 \x7, \x7, \t7 + vext.8 \x0, \x0, \x0, #8 + veor \t2, \t2, \x1 + veor \t0, \t0, \x7 + veor \t1, \t1, \x7 + vext.8 \x1, \x1, \x1, #8 + veor \t5, \t5, \x4 + veor \x0, \x0, \t0 + veor \t6, \t6, \x5 + veor \x1, \x1, \t1 + vext.8 \t0, \x4, \x4, #8 + veor \t4, \t4, \x3 + vext.8 \t1, \x5, \x5, #8 + veor \t7, \t7, \x6 + vext.8 \x4, \x3, \x3, #8 + veor \t3, \t3, \x2 + vext.8 \x5, \x7, \x7, #8 + veor \t4, \t4, \x7 + vext.8 \x3, \x6, \x6, #8 + veor \t3, \t3, \x7 + vext.8 \x6, \x2, \x2, #8 + veor \x7, \t1, \t5 + .ifb \inv + veor \x2, \t0, \t4 + veor \x4, \x4, \t3 + veor \x5, \x5, \t7 + veor \x3, \x3, \t6 + veor \x6, \x6, \t2 + .else + veor \t3, \t3, \x4 + veor \x5, \x5, \t7 + veor \x2, \x3, \t6 + veor \x3, \t0, \t4 + veor \x4, \x6, \t2 + vmov \x6, \t3 + .endif + .endm + + .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7 + vld1.8 {\t0-\t1}, [bskey, :256]! + veor \x0, \x0, \t0 + vld1.8 {\t2-\t3}, [bskey, :256]! + veor \x1, \x1, \t1 + vld1.8 {\t4-\t5}, [bskey, :256]! + veor \x2, \x2, \t2 + vld1.8 {\t6-\t7}, [bskey, :256] + sub bskey, bskey, #224 + veor \x3, \x3, \t3 + veor \x4, \x4, \t4 + veor \x5, \x5, \t5 + veor \x6, \x6, \t6 + veor \x7, \x7, \t7 + vext.8 \t0, \x0, \x0, #8 + vext.8 \t6, \x6, \x6, #8 + vext.8 \t7, \x7, \x7, #8 + veor \t0, \t0, \x0 + vext.8 \t1, \x1, \x1, #8 + veor \t6, \t6, \x6 + vext.8 \t2, \x2, \x2, #8 + veor \t7, \t7, \x7 + vext.8 \t3, \x3, \x3, #8 + veor \t1, \t1, \x1 + vext.8 \t4, \x4, \x4, #8 + veor \t2, \t2, \x2 + vext.8 \t5, \x5, \x5, #8 + veor \t3, \t3, \x3 + veor \t4, \t4, \x4 + veor \t5, \t5, \x5 + veor \x0, \x0, \t6 + veor \x1, \x1, \t6 + veor \x2, \x2, \t0 + veor \x4, \x4, \t2 + veor \x3, \x3, \t1 + veor \x1, \x1, \t7 + veor \x2, \x2, \t7 + veor \x4, \x4, \t6 + veor \x5, \x5, \t3 + veor \x3, \x3, \t6 + veor \x6, \x6, \t4 + veor \x4, \x4, \t7 + veor \x5, \x5, \t7 + veor \x7, \x7, \t5 + mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 + .endm + + .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 + vshr.u64 \t0, \b0, #\n + vshr.u64 \t1, \b1, #\n + veor \t0, \t0, \a0 + veor \t1, \t1, \a1 + vand \t0, \t0, \mask + vand \t1, \t1, \mask + veor \a0, \a0, \t0 + vshl.s64 \t0, \t0, #\n + veor \a1, \a1, \t1 + vshl.s64 \t1, \t1, #\n + veor \b0, \b0, \t0 + veor \b1, \b1, \t1 + .endm + + .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 + vmov.i8 \t0, #0x55 + vmov.i8 \t1, #0x33 + swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 + swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 + vmov.i8 \t0, #0x0f + swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 + swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 + swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 + swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 + .endm + + .align 4 +M0: .quad 0x02060a0e03070b0f, 0x0004080c0105090d + + /* + * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) + */ +ENTRY(aesbs_convert_key) + vld1.32 {q7}, [r1]! // load round 0 key + vld1.32 {q15}, [r1]! // load round 1 key + + vmov.i8 q8, #0x01 // bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + __ldr q14, M0 + + sub r2, r2, #1 + vst1.8 {q7}, [r0, :128]! // save round 0 key + +.Lkey_loop: + __tbl q7, q15, q14 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.32 {q15}, [r1]! // load next round key + vmvn q0, q0 + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 + + subs r2, r2, #1 + vst1.8 {q0-q1}, [r0, :256]! + vst1.8 {q2-q3}, [r0, :256]! + vst1.8 {q4-q5}, [r0, :256]! + vst1.8 {q6-q7}, [r0, :256]! + bne .Lkey_loop + + vmov.i8 q7, #0x63 // compose .L63 + veor q15, q15, q7 + vst1.8 {q15}, [r0, :128] + bx lr +ENDPROC(aesbs_convert_key) + + .align 4 +M0SR: .quad 0x0a0e02060f03070b, 0x0004080c05090d01 + +aesbs_encrypt8: + vld1.8 {q9}, [bskey, :128]! // round 0 key + __ldr q8, M0SR + + veor q10, q0, q9 // xor with round0 key + veor q11, q1, q9 + __tbl q0, q10, q8 + veor q12, q2, q9 + __tbl q1, q11, q8 + veor q13, q3, q9 + __tbl q2, q12, q8 + veor q14, q4, q9 + __tbl q3, q13, q8 + veor q15, q5, q9 + __tbl q4, q14, q8 + veor q10, q6, q9 + __tbl q5, q15, q8 + veor q11, q7, q9 + __tbl q6, q10, q8 + __tbl q7, q11, q8 + + bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 + + sub rounds, rounds, #1 + b .Lenc_sbox + + .align 5 +SR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +SRM0: .quad 0x0304090e00050a0f, 0x01060b0c0207080d + +.Lenc_last: + __ldr q12, SRM0 +.Lenc_loop: + shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 +.Lenc_sbox: + sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ + q13, q14, q15 + subs rounds, rounds, #1 + bcc .Lenc_done + + mix_cols q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11, q12, \ + q13, q14, q15 + + beq .Lenc_last + __ldr q12, SR + b .Lenc_loop + +.Lenc_done: + vld1.8 {q12}, [bskey, :128] // last round key + + bitslice q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11 + + veor q0, q0, q12 + veor q1, q1, q12 + veor q4, q4, q12 + veor q6, q6, q12 + veor q3, q3, q12 + veor q7, q7, q12 + veor q2, q2, q12 + veor q5, q5, q12 + bx lr +ENDPROC(aesbs_encrypt8) + + .align 4 +M0ISR: .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 + +aesbs_decrypt8: + add bskey, bskey, rounds, lsl #7 + sub bskey, bskey, #112 + vld1.8 {q9}, [bskey, :128] // round 0 key + sub bskey, bskey, #128 + __ldr q8, M0ISR + + veor q10, q0, q9 // xor with round0 key + veor q11, q1, q9 + __tbl q0, q10, q8 + veor q12, q2, q9 + __tbl q1, q11, q8 + veor q13, q3, q9 + __tbl q2, q12, q8 + veor q14, q4, q9 + __tbl q3, q13, q8 + veor q15, q5, q9 + __tbl q4, q14, q8 + veor q10, q6, q9 + __tbl q5, q15, q8 + veor q11, q7, q9 + __tbl q6, q10, q8 + __tbl q7, q11, q8 + + bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 + + sub rounds, rounds, #1 + b .Ldec_sbox + + .align 5 +ISR: .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +ISRM0: .quad 0x01040b0e0205080f, 0x0306090c00070a0d + +.Ldec_last: + __ldr q12, ISRM0 +.Ldec_loop: + inv_shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 +.Ldec_sbox: + inv_sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ + q13, q14, q15 + subs rounds, rounds, #1 + bcc .Ldec_done + + inv_mix_cols q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11, q12, \ + q13, q14, q15 + + beq .Ldec_last + __ldr q12, ISR + b .Ldec_loop + +.Ldec_done: + add bskey, bskey, #112 + vld1.8 {q12}, [bskey, :128] // last round key + + bitslice q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11 + + veor q0, q0, q12 + veor q1, q1, q12 + veor q6, q6, q12 + veor q4, q4, q12 + veor q2, q2, q12 + veor q7, q7, q12 + veor q3, q3, q12 + veor q5, q5, q12 + bx lr +ENDPROC(aesbs_decrypt8) + + /* + * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + */ + .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + push {r4-r6, lr} + ldr r5, [sp, #16] // number of blocks + +99: __adr ip, 0f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q0}, [r1]! + vld1.8 {q1}, [r1]! + vld1.8 {q2}, [r1]! + vld1.8 {q3}, [r1]! + vld1.8 {q4}, [r1]! + vld1.8 {q5}, [r1]! + vld1.8 {q6}, [r1]! + vld1.8 {q7}, [r1]! + +0: mov bskey, r2 + mov rounds, r3 + bl \do8 + + __adr ip, 1f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vst1.8 {\o0}, [r0]! + vst1.8 {\o1}, [r0]! + vst1.8 {\o2}, [r0]! + vst1.8 {\o3}, [r0]! + vst1.8 {\o4}, [r0]! + vst1.8 {\o5}, [r0]! + vst1.8 {\o6}, [r0]! + vst1.8 {\o7}, [r0]! + +1: subs r5, r5, #8 + bgt 99b + + pop {r4-r6, pc} + .endm + + .align 4 +ENTRY(aesbs_ecb_encrypt) + __ecb_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 +ENDPROC(aesbs_ecb_encrypt) + + .align 4 +ENTRY(aesbs_ecb_decrypt) + __ecb_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 +ENDPROC(aesbs_ecb_decrypt) + + /* + * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + * int rounds, int blocks, u8 iv[]) + */ + .align 4 +ENTRY(aesbs_cbc_decrypt) + mov ip, sp + push {r4-r6, lr} + ldm ip, {r5-r6} // load args 4-5 + +99: __adr ip, 0f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + mov lr, r1 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q0}, [lr]! + vld1.8 {q1}, [lr]! + vld1.8 {q2}, [lr]! + vld1.8 {q3}, [lr]! + vld1.8 {q4}, [lr]! + vld1.8 {q5}, [lr]! + vld1.8 {q6}, [lr]! + vld1.8 {q7}, [lr] + +0: mov bskey, r2 + mov rounds, r3 + bl aesbs_decrypt8 + + vld1.8 {q8}, [r6] + vmov q9, q8 + vmov q10, q8 + vmov q11, q8 + vmov q12, q8 + vmov q13, q8 + vmov q14, q8 + vmov q15, q8 + + __adr ip, 1f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q9}, [r1]! + vld1.8 {q10}, [r1]! + vld1.8 {q11}, [r1]! + vld1.8 {q12}, [r1]! + vld1.8 {q13}, [r1]! + vld1.8 {q14}, [r1]! + vld1.8 {q15}, [r1]! + W(nop) + +1: __adr ip, 2f + sub ip, ip, lr, lsl #3 + bxlt ip // computed goto if blocks < 8 + + veor q0, q0, q8 + vst1.8 {q0}, [r0]! + veor q1, q1, q9 + vst1.8 {q1}, [r0]! + veor q6, q6, q10 + vst1.8 {q6}, [r0]! + veor q4, q4, q11 + vst1.8 {q4}, [r0]! + veor q2, q2, q12 + vst1.8 {q2}, [r0]! + veor q7, q7, q13 + vst1.8 {q7}, [r0]! + veor q3, q3, q14 + vst1.8 {q3}, [r0]! + veor q5, q5, q15 + vld1.8 {q8}, [r1]! // load next round's iv +2: vst1.8 {q5}, [r0]! + + subs r5, r5, #8 + vst1.8 {q8}, [r6] // store next round's iv + bgt 99b + + pop {r4-r6, pc} +ENDPROC(aesbs_cbc_decrypt) + + .macro next_ctr, q + vmov \q\()h[1], r10 + adds r10, r10, #1 + vmov \q\()h[0], r9 + adcs r9, r9, #0 + vmov \q\()l[1], r8 + adcs r8, r8, #0 + vmov \q\()l[0], r7 + adc r7, r7, #0 + vrev32.8 \q, \q + .endm + + /* + * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + * int rounds, int blocks, u8 ctr[], bool final) + */ +ENTRY(aesbs_ctr_encrypt) + mov ip, sp + push {r4-r10, lr} + + ldm ip, {r5-r7} // load args 4-6 + add r5, r5, r7 // one extra block if final == 1 + + vld1.8 {q0}, [r6] // load counter + vrev32.8 q1, q0 + vmov r9, r10, d3 + vmov r7, r8, d2 + + adds r10, r10, #1 + adcs r9, r9, #0 + adcs r8, r8, #0 + adc r7, r7, #0 + +99: vmov q1, q0 + vmov q2, q0 + vmov q3, q0 + vmov q4, q0 + vmov q5, q0 + vmov q6, q0 + vmov q7, q0 + + __adr ip, 0f + sub lr, r5, #1 + and lr, lr, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #5 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + next_ctr q1 + next_ctr q2 + next_ctr q3 + next_ctr q4 + next_ctr q5 + next_ctr q6 + next_ctr q7 + +0: mov bskey, r2 + mov rounds, r3 + bl aesbs_encrypt8 + + __adr ip, 1f + and lr, r5, #7 + cmp r5, #8 + movgt r4, #0 + ldrle r4, [sp, #40] // load final in the last round + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q8}, [r1]! + vld1.8 {q9}, [r1]! + vld1.8 {q10}, [r1]! + vld1.8 {q11}, [r1]! + vld1.8 {q12}, [r1]! + vld1.8 {q13}, [r1]! + vld1.8 {q14}, [r1]! + teq r4, #0 // skip last block if 'final' +1: bne 2f + vld1.8 {q15}, [r1]! + +2: __adr ip, 3f + cmp r5, #8 + sub ip, ip, lr, lsl #3 + bxlt ip // computed goto if blocks < 8 + + veor q0, q0, q8 + vst1.8 {q0}, [r0]! + veor q1, q1, q9 + vst1.8 {q1}, [r0]! + veor q4, q4, q10 + vst1.8 {q4}, [r0]! + veor q6, q6, q11 + vst1.8 {q6}, [r0]! + veor q3, q3, q12 + vst1.8 {q3}, [r0]! + veor q7, q7, q13 + vst1.8 {q7}, [r0]! + veor q2, q2, q14 + vst1.8 {q2}, [r0]! + teq r4, #0 // skip last block if 'final' + W(bne) 4f +3: veor q5, q5, q15 + vst1.8 {q5}, [r0]! + + next_ctr q0 + + subs r5, r5, #8 + bgt 99b + + vmov q5, q0 + +4: vst1.8 {q5}, [r6] + pop {r4-r10, pc} +ENDPROC(aesbs_ctr_encrypt) + + .macro next_tweak, out, in, const, tmp + vshr.s64 \tmp, \in, #63 + vand \tmp, \tmp, \const + vadd.u64 \out, \in, \in + vext.8 \tmp, \tmp, \tmp, #8 + veor \out, \out, \tmp + .endm + + .align 4 +.Lxts_mul_x: + .quad 1, 0x87 + + /* + * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ +__xts_prepare8: + vld1.8 {q14}, [r7] // load iv + __ldr q15, .Lxts_mul_x // load tweak mask + vmov q12, q14 + + __adr ip, 0f + and r4, r6, #7 + cmp r6, #8 + sub ip, ip, r4, lsl #5 + mov r4, sp + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q0}, [r1]! + next_tweak q12, q14, q15, q13 + veor q0, q0, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q1}, [r1]! + next_tweak q14, q12, q15, q13 + veor q1, q1, q12 + vst1.8 {q12}, [r4, :128]! + + vld1.8 {q2}, [r1]! + next_tweak q12, q14, q15, q13 + veor q2, q2, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q3}, [r1]! + next_tweak q14, q12, q15, q13 + veor q3, q3, q12 + vst1.8 {q12}, [r4, :128]! + + vld1.8 {q4}, [r1]! + next_tweak q12, q14, q15, q13 + veor q4, q4, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q5}, [r1]! + next_tweak q14, q12, q15, q13 + veor q5, q5, q12 + vst1.8 {q12}, [r4, :128]! + + vld1.8 {q6}, [r1]! + next_tweak q12, q14, q15, q13 + veor q6, q6, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q7}, [r1]! + next_tweak q14, q12, q15, q13 + veor q7, q7, q12 + vst1.8 {q12}, [r4, :128] + +0: vst1.8 {q14}, [r7] // store next iv + bx lr +ENDPROC(__xts_prepare8) + + .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + push {r4-r8, lr} + mov r5, sp // preserve sp + ldrd r6, r7, [sp, #24] // get blocks and iv args + sub ip, sp, #128 // make room for 8x tweak + bic ip, ip, #0xf // align sp to 16 bytes + mov sp, ip + +99: bl __xts_prepare8 + + mov bskey, r2 + mov rounds, r3 + bl \do8 + + __adr ip, 0f + and lr, r6, #7 + cmp r6, #8 + sub ip, ip, lr, lsl #2 + mov r4, sp + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q8}, [r4, :128]! + vld1.8 {q9}, [r4, :128]! + vld1.8 {q10}, [r4, :128]! + vld1.8 {q11}, [r4, :128]! + vld1.8 {q12}, [r4, :128]! + vld1.8 {q13}, [r4, :128]! + vld1.8 {q14}, [r4, :128]! + vld1.8 {q15}, [r4, :128] + +0: __adr ip, 1f + sub ip, ip, lr, lsl #3 + bxlt ip // computed goto if blocks < 8 + + veor \o0, \o0, q8 + vst1.8 {\o0}, [r0]! + veor \o1, \o1, q9 + vst1.8 {\o1}, [r0]! + veor \o2, \o2, q10 + vst1.8 {\o2}, [r0]! + veor \o3, \o3, q11 + vst1.8 {\o3}, [r0]! + veor \o4, \o4, q12 + vst1.8 {\o4}, [r0]! + veor \o5, \o5, q13 + vst1.8 {\o5}, [r0]! + veor \o6, \o6, q14 + vst1.8 {\o6}, [r0]! + veor \o7, \o7, q15 + vst1.8 {\o7}, [r0]! + +1: subs r6, r6, #8 + bgt 99b + + mov sp, r5 + pop {r4-r8, pc} + .endm + +ENTRY(aesbs_xts_encrypt) + __xts_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 +ENDPROC(aesbs_xts_encrypt) + +ENTRY(aesbs_xts_decrypt) + __xts_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 +ENDPROC(aesbs_xts_decrypt) diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c new file mode 100644 index 000000000000..e262f99a44d3 --- /dev/null +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -0,0 +1,405 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2017 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +MODULE_ALIAS_CRYPTO("ecb(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("ctr(aes)"); +MODULE_ALIAS_CRYPTO("xts(aes)"); + +asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds); + +asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); +asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); + +asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 ctr[], bool final); + +asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); +asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds, const u8 in[], + u8 out[]); + +struct aesbs_ctx { + int rounds; + u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32] __aligned(AES_BLOCK_SIZE); +}; + +struct aesbs_cbc_ctx { + struct aesbs_ctx key; + u32 enc[AES_MAX_KEYLENGTH_U32]; +}; + +struct aesbs_xts_ctx { + struct aesbs_ctx key; + u32 twkey[AES_MAX_KEYLENGTH_U32]; +}; + +static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->rounds = 6 + key_len / 4; + + kernel_neon_begin(); + aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); + kernel_neon_end(); + + return 0; +} + +static int __ecb_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks)) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, + ctx->rounds, blocks); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_encrypt); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_decrypt); +} + +static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->key.rounds = 6 + key_len / 4; + + memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); + + kernel_neon_begin(); + aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); + kernel_neon_end(); + + return 0; +} + +static void cbc_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + + __aes_arm_encrypt(ctx->enc, ctx->key.rounds, src, dst); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + return crypto_cbc_encrypt_walk(req, cbc_encrypt_one); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ctr_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + bool final = (walk.total % AES_BLOCK_SIZE) != 0; + + if (walk.nbytes < walk.total) { + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + final = false; + } + + aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->rk, ctx->rounds, blocks, walk.iv, final); + + if (final) { + u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + + if (dst != src) + memcpy(dst, src, walk.total % AES_BLOCK_SIZE); + crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, 0); + break; + } + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = xts_verify_key(tfm, in_key, key_len); + if (err) + return err; + + key_len /= 2; + err = crypto_aes_expand_key(&rk, in_key + key_len, key_len); + if (err) + return err; + + memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey)); + + return aesbs_setkey(tfm, in_key, key_len); +} + +static int __xts_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[])) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + __aes_arm_encrypt(ctx->twkey, ctx->key.rounds, walk.iv, walk.iv); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, + ctx->key.rounds, blocks, walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_encrypt); +} + +static int xts_decrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_decrypt); +} + +static struct skcipher_alg aes_algs[] = { { + .base.cra_name = "__ecb(aes)", + .base.cra_driver_name = "__ecb-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, +}, { + .base.cra_name = "__cbc(aes)", + .base.cra_driver_name = "__cbc-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_cbc_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, +}, { + .base.cra_name = "__ctr(aes)", + .base.cra_driver_name = "__ctr-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base.cra_name = "__xts(aes)", + .base.cra_driver_name = "__xts-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_xts_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, +} }; + +static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; + +static void aes_exit(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) + if (aes_simd_algs[i]) + simd_skcipher_free(aes_simd_algs[i]); + + crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static int __init aes_init(void) +{ + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; + int err; + int i; + + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) + continue; + + algname = aes_algs[i].base.cra_name + 2; + drvname = aes_algs[i].base.cra_driver_name + 2; + basename = aes_algs[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aes_simd_algs[i] = simd; + } + return 0; + +unregister_simds: + aes_exit(); + return err; +} + +module_init(aes_init); +module_exit(aes_exit); diff --git a/arch/arm/crypto/aes_glue.h b/arch/arm/crypto/aes_glue.h deleted file mode 100644 index cca3e51eb606..000000000000 --- a/arch/arm/crypto/aes_glue.h +++ /dev/null @@ -1,19 +0,0 @@ - -#define AES_MAXNR 14 - -struct AES_KEY { - unsigned int rd_key[4 * (AES_MAXNR + 1)]; - int rounds; -}; - -struct AES_CTX { - struct AES_KEY enc_key; - struct AES_KEY dec_key; -}; - -asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); -asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); -asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, - const int bits, struct AES_KEY *key); -asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, - const int bits, struct AES_KEY *key); diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped deleted file mode 100644 index 1d1800f71c5b..000000000000 --- a/arch/arm/crypto/aesbs-core.S_shipped +++ /dev/null @@ -1,2548 +0,0 @@ - -@ ==================================================================== -@ Written by Andy Polyakov for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see http://www.openssl.org/~appro/cryptogams/. -@ -@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel -@ . Permission to use under GPL terms is -@ granted. -@ ==================================================================== - -@ Bit-sliced AES for ARM NEON -@ -@ February 2012. -@ -@ This implementation is direct adaptation of bsaes-x86_64 module for -@ ARM NEON. Except that this module is endian-neutral [in sense that -@ it can be compiled for either endianness] by courtesy of vld1.8's -@ neutrality. Initial version doesn't implement interface to OpenSSL, -@ only low-level primitives and unsupported entry points, just enough -@ to collect performance results, which for Cortex-A8 core are: -@ -@ encrypt 19.5 cycles per byte processed with 128-bit key -@ decrypt 22.1 cycles per byte processed with 128-bit key -@ key conv. 440 cycles per 128-bit key/0.18 of 8x block -@ -@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, -@ which is [much] worse than anticipated (for further details see -@ http://www.openssl.org/~appro/Snapdragon-S4.html). -@ -@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code -@ manages in 20.0 cycles]. -@ -@ When comparing to x86_64 results keep in mind that NEON unit is -@ [mostly] single-issue and thus can't [fully] benefit from -@ instruction-level parallelism. And when comparing to aes-armv4 -@ results keep in mind key schedule conversion overhead (see -@ bsaes-x86_64.pl for further details)... -@ -@ - -@ April-August 2013 -@ -@ Add CBC, CTR and XTS subroutines, adapt for kernel use. -@ -@ - -#ifndef __KERNEL__ -# include "arm_arch.h" - -# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} -# define VFP_ABI_POP vldmia sp!,{d8-d15} -# define VFP_ABI_FRAME 0x40 -#else -# define VFP_ABI_PUSH -# define VFP_ABI_POP -# define VFP_ABI_FRAME 0 -# define BSAES_ASM_EXTENDED_KEY -# define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -# define __ARM_MAX_ARCH__ 7 -#endif - -#ifdef __thumb__ -# define adrl adr -#endif - -#if __ARM_MAX_ARCH__>=7 -.arch armv7-a -.fpu neon - -.text -.syntax unified @ ARMv7-capable assembler is expected to handle this -#ifdef __thumb2__ -.thumb -#else -.code 32 -#endif - -.type _bsaes_decrypt8,%function -.align 4 -_bsaes_decrypt8: - adr r6,_bsaes_decrypt8 - vldmia r4!, {q9} @ round 0 key - add r6,r6,#.LM0ISR-_bsaes_decrypt8 - - vldmia r6!, {q8} @ .LM0ISR - veor q10, q0, q9 @ xor with round0 key - veor q11, q1, q9 - vtbl.8 d0, {q10}, d16 - vtbl.8 d1, {q10}, d17 - veor q12, q2, q9 - vtbl.8 d2, {q11}, d16 - vtbl.8 d3, {q11}, d17 - veor q13, q3, q9 - vtbl.8 d4, {q12}, d16 - vtbl.8 d5, {q12}, d17 - veor q14, q4, q9 - vtbl.8 d6, {q13}, d16 - vtbl.8 d7, {q13}, d17 - veor q15, q5, q9 - vtbl.8 d8, {q14}, d16 - vtbl.8 d9, {q14}, d17 - veor q10, q6, q9 - vtbl.8 d10, {q15}, d16 - vtbl.8 d11, {q15}, d17 - veor q11, q7, q9 - vtbl.8 d12, {q10}, d16 - vtbl.8 d13, {q10}, d17 - vtbl.8 d14, {q11}, d16 - vtbl.8 d15, {q11}, d17 - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q6, #1 - vshr.u64 q11, q4, #1 - veor q10, q10, q7 - veor q11, q11, q5 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #1 - veor q5, q5, q11 - vshl.u64 q11, q11, #1 - veor q6, q6, q10 - veor q4, q4, q11 - vshr.u64 q10, q2, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q3 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q3, q3, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q2, q2, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q5, #2 - vshr.u64 q11, q4, #2 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q9 - vand q11, q11, q9 - veor q7, q7, q10 - vshl.u64 q10, q10, #2 - veor q6, q6, q11 - vshl.u64 q11, q11, #2 - veor q5, q5, q10 - veor q4, q4, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q3 - veor q11, q11, q2 - vand q10, q10, q9 - vand q11, q11, q9 - veor q3, q3, q10 - vshl.u64 q10, q10, #2 - veor q2, q2, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q3, #4 - vshr.u64 q11, q2, #4 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q6, q6, q11 - vshl.u64 q11, q11, #4 - veor q3, q3, q10 - veor q2, q2, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q5 - veor q11, q11, q4 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q4, q4, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - sub r5,r5,#1 - b .Ldec_sbox -.align 4 -.Ldec_loop: - vldmia r4!, {q8-q11} - veor q8, q8, q0 - veor q9, q9, q1 - vtbl.8 d0, {q8}, d24 - vtbl.8 d1, {q8}, d25 - vldmia r4!, {q8} - veor q10, q10, q2 - vtbl.8 d2, {q9}, d24 - vtbl.8 d3, {q9}, d25 - vldmia r4!, {q9} - veor q11, q11, q3 - vtbl.8 d4, {q10}, d24 - vtbl.8 d5, {q10}, d25 - vldmia r4!, {q10} - vtbl.8 d6, {q11}, d24 - vtbl.8 d7, {q11}, d25 - vldmia r4!, {q11} - veor q8, q8, q4 - veor q9, q9, q5 - vtbl.8 d8, {q8}, d24 - vtbl.8 d9, {q8}, d25 - veor q10, q10, q6 - vtbl.8 d10, {q9}, d24 - vtbl.8 d11, {q9}, d25 - veor q11, q11, q7 - vtbl.8 d12, {q10}, d24 - vtbl.8 d13, {q10}, d25 - vtbl.8 d14, {q11}, d24 - vtbl.8 d15, {q11}, d25 -.Ldec_sbox: - veor q1, q1, q4 - veor q3, q3, q4 - - veor q4, q4, q7 - veor q1, q1, q6 - veor q2, q2, q7 - veor q6, q6, q4 - - veor q0, q0, q1 - veor q2, q2, q5 - veor q7, q7, q6 - veor q3, q3, q0 - veor q5, q5, q0 - veor q1, q1, q3 - veor q11, q3, q0 - veor q10, q7, q4 - veor q9, q1, q6 - veor q13, q4, q0 - vmov q8, q10 - veor q12, q5, q2 - - vorr q10, q10, q9 - veor q15, q11, q8 - vand q14, q11, q12 - vorr q11, q11, q12 - veor q12, q12, q9 - vand q8, q8, q9 - veor q9, q6, q2 - vand q15, q15, q12 - vand q13, q13, q9 - veor q9, q3, q7 - veor q12, q1, q5 - veor q11, q11, q13 - veor q10, q10, q13 - vand q13, q9, q12 - vorr q9, q9, q12 - veor q11, q11, q15 - veor q8, q8, q13 - veor q10, q10, q14 - veor q9, q9, q15 - veor q8, q8, q14 - vand q12, q4, q6 - veor q9, q9, q14 - vand q13, q0, q2 - vand q14, q7, q1 - vorr q15, q3, q5 - veor q11, q11, q12 - veor q9, q9, q14 - veor q8, q8, q15 - veor q10, q10, q13 - - @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 - - @ new smaller inversion - - vand q14, q11, q9 - vmov q12, q8 - - veor q13, q10, q14 - veor q15, q8, q14 - veor q14, q8, q14 @ q14=q15 - - vbsl q13, q9, q8 - vbsl q15, q11, q10 - veor q11, q11, q10 - - vbsl q12, q13, q14 - vbsl q8, q14, q13 - - vand q14, q12, q15 - veor q9, q9, q8 - - veor q14, q14, q11 - veor q12, q5, q2 - veor q8, q1, q6 - veor q10, q15, q14 - vand q10, q10, q5 - veor q5, q5, q1 - vand q11, q1, q15 - vand q5, q5, q14 - veor q1, q11, q10 - veor q5, q5, q11 - veor q15, q15, q13 - veor q14, q14, q9 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q2 - veor q12, q12, q8 - veor q2, q2, q6 - vand q8, q8, q15 - vand q6, q6, q13 - vand q12, q12, q14 - vand q2, q2, q9 - veor q8, q8, q12 - veor q2, q2, q6 - veor q12, q12, q11 - veor q6, q6, q10 - veor q5, q5, q12 - veor q2, q2, q12 - veor q1, q1, q8 - veor q6, q6, q8 - - veor q12, q3, q0 - veor q8, q7, q4 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q0 - veor q12, q12, q8 - veor q0, q0, q4 - vand q8, q8, q15 - vand q4, q4, q13 - vand q12, q12, q14 - vand q0, q0, q9 - veor q8, q8, q12 - veor q0, q0, q4 - veor q12, q12, q11 - veor q4, q4, q10 - veor q15, q15, q13 - veor q14, q14, q9 - veor q10, q15, q14 - vand q10, q10, q3 - veor q3, q3, q7 - vand q11, q7, q15 - vand q3, q3, q14 - veor q7, q11, q10 - veor q3, q3, q11 - veor q3, q3, q12 - veor q0, q0, q12 - veor q7, q7, q8 - veor q4, q4, q8 - veor q1, q1, q7 - veor q6, q6, q5 - - veor q4, q4, q1 - veor q2, q2, q7 - veor q5, q5, q7 - veor q4, q4, q2 - veor q7, q7, q0 - veor q4, q4, q5 - veor q3, q3, q6 - veor q6, q6, q1 - veor q3, q3, q4 - - veor q4, q4, q0 - veor q7, q7, q3 - subs r5,r5,#1 - bcc .Ldec_done - @ multiplication by 0x05-0x00-0x04-0x00 - vext.8 q8, q0, q0, #8 - vext.8 q14, q3, q3, #8 - vext.8 q15, q5, q5, #8 - veor q8, q8, q0 - vext.8 q9, q1, q1, #8 - veor q14, q14, q3 - vext.8 q10, q6, q6, #8 - veor q15, q15, q5 - vext.8 q11, q4, q4, #8 - veor q9, q9, q1 - vext.8 q12, q2, q2, #8 - veor q10, q10, q6 - vext.8 q13, q7, q7, #8 - veor q11, q11, q4 - veor q12, q12, q2 - veor q13, q13, q7 - - veor q0, q0, q14 - veor q1, q1, q14 - veor q6, q6, q8 - veor q2, q2, q10 - veor q4, q4, q9 - veor q1, q1, q15 - veor q6, q6, q15 - veor q2, q2, q14 - veor q7, q7, q11 - veor q4, q4, q14 - veor q3, q3, q12 - veor q2, q2, q15 - veor q7, q7, q15 - veor q5, q5, q13 - vext.8 q8, q0, q0, #12 @ x0 <<< 32 - vext.8 q9, q1, q1, #12 - veor q0, q0, q8 @ x0 ^ (x0 <<< 32) - vext.8 q10, q6, q6, #12 - veor q1, q1, q9 - vext.8 q11, q4, q4, #12 - veor q6, q6, q10 - vext.8 q12, q2, q2, #12 - veor q4, q4, q11 - vext.8 q13, q7, q7, #12 - veor q2, q2, q12 - vext.8 q14, q3, q3, #12 - veor q7, q7, q13 - vext.8 q15, q5, q5, #12 - veor q3, q3, q14 - - veor q9, q9, q0 - veor q5, q5, q15 - vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) - veor q10, q10, q1 - veor q8, q8, q5 - veor q9, q9, q5 - vext.8 q1, q1, q1, #8 - veor q13, q13, q2 - veor q0, q0, q8 - veor q14, q14, q7 - veor q1, q1, q9 - vext.8 q8, q2, q2, #8 - veor q12, q12, q4 - vext.8 q9, q7, q7, #8 - veor q15, q15, q3 - vext.8 q2, q4, q4, #8 - veor q11, q11, q6 - vext.8 q7, q5, q5, #8 - veor q12, q12, q5 - vext.8 q4, q3, q3, #8 - veor q11, q11, q5 - vext.8 q3, q6, q6, #8 - veor q5, q9, q13 - veor q11, q11, q2 - veor q7, q7, q15 - veor q6, q4, q14 - veor q4, q8, q12 - veor q2, q3, q10 - vmov q3, q11 - @ vmov q5, q9 - vldmia r6, {q12} @ .LISR - ite eq @ Thumb2 thing, sanity check in ARM - addeq r6,r6,#0x10 - bne .Ldec_loop - vldmia r6, {q12} @ .LISRM0 - b .Ldec_loop -.align 4 -.Ldec_done: - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q3, #1 - vshr.u64 q11, q2, #1 - veor q10, q10, q5 - veor q11, q11, q7 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #1 - veor q7, q7, q11 - vshl.u64 q11, q11, #1 - veor q3, q3, q10 - veor q2, q2, q11 - vshr.u64 q10, q6, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q4 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q4, q4, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q6, q6, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q7, #2 - vshr.u64 q11, q2, #2 - veor q10, q10, q5 - veor q11, q11, q3 - vand q10, q10, q9 - vand q11, q11, q9 - veor q5, q5, q10 - vshl.u64 q10, q10, #2 - veor q3, q3, q11 - vshl.u64 q11, q11, #2 - veor q7, q7, q10 - veor q2, q2, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q4 - veor q11, q11, q6 - vand q10, q10, q9 - vand q11, q11, q9 - veor q4, q4, q10 - vshl.u64 q10, q10, #2 - veor q6, q6, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q4, #4 - vshr.u64 q11, q6, #4 - veor q10, q10, q5 - veor q11, q11, q3 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q3, q3, q11 - vshl.u64 q11, q11, #4 - veor q4, q4, q10 - veor q6, q6, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q7 - veor q11, q11, q2 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q2, q2, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - vldmia r4, {q8} @ last round key - veor q6, q6, q8 - veor q4, q4, q8 - veor q2, q2, q8 - veor q7, q7, q8 - veor q3, q3, q8 - veor q5, q5, q8 - veor q0, q0, q8 - veor q1, q1, q8 - bx lr -.size _bsaes_decrypt8,.-_bsaes_decrypt8 - -.type _bsaes_const,%object -.align 6 -_bsaes_const: -.LM0ISR: @ InvShiftRows constants - .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 -.LISR: - .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 -.LISRM0: - .quad 0x01040b0e0205080f, 0x0306090c00070a0d -.LM0SR: @ ShiftRows constants - .quad 0x0a0e02060f03070b, 0x0004080c05090d01 -.LSR: - .quad 0x0504070600030201, 0x0f0e0d0c0a09080b -.LSRM0: - .quad 0x0304090e00050a0f, 0x01060b0c0207080d -.LM0: - .quad 0x02060a0e03070b0f, 0x0004080c0105090d -.LREVM0SR: - .quad 0x090d01050c000408, 0x03070b0f060a0e02 -.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by " -.align 6 -.size _bsaes_const,.-_bsaes_const - -.type _bsaes_encrypt8,%function -.align 4 -_bsaes_encrypt8: - adr r6,_bsaes_encrypt8 - vldmia r4!, {q9} @ round 0 key - sub r6,r6,#_bsaes_encrypt8-.LM0SR - - vldmia r6!, {q8} @ .LM0SR -_bsaes_encrypt8_alt: - veor q10, q0, q9 @ xor with round0 key - veor q11, q1, q9 - vtbl.8 d0, {q10}, d16 - vtbl.8 d1, {q10}, d17 - veor q12, q2, q9 - vtbl.8 d2, {q11}, d16 - vtbl.8 d3, {q11}, d17 - veor q13, q3, q9 - vtbl.8 d4, {q12}, d16 - vtbl.8 d5, {q12}, d17 - veor q14, q4, q9 - vtbl.8 d6, {q13}, d16 - vtbl.8 d7, {q13}, d17 - veor q15, q5, q9 - vtbl.8 d8, {q14}, d16 - vtbl.8 d9, {q14}, d17 - veor q10, q6, q9 - vtbl.8 d10, {q15}, d16 - vtbl.8 d11, {q15}, d17 - veor q11, q7, q9 - vtbl.8 d12, {q10}, d16 - vtbl.8 d13, {q10}, d17 - vtbl.8 d14, {q11}, d16 - vtbl.8 d15, {q11}, d17 -_bsaes_encrypt8_bitslice: - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q6, #1 - vshr.u64 q11, q4, #1 - veor q10, q10, q7 - veor q11, q11, q5 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #1 - veor q5, q5, q11 - vshl.u64 q11, q11, #1 - veor q6, q6, q10 - veor q4, q4, q11 - vshr.u64 q10, q2, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q3 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q3, q3, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q2, q2, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q5, #2 - vshr.u64 q11, q4, #2 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q9 - vand q11, q11, q9 - veor q7, q7, q10 - vshl.u64 q10, q10, #2 - veor q6, q6, q11 - vshl.u64 q11, q11, #2 - veor q5, q5, q10 - veor q4, q4, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q3 - veor q11, q11, q2 - vand q10, q10, q9 - vand q11, q11, q9 - veor q3, q3, q10 - vshl.u64 q10, q10, #2 - veor q2, q2, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q3, #4 - vshr.u64 q11, q2, #4 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q6, q6, q11 - vshl.u64 q11, q11, #4 - veor q3, q3, q10 - veor q2, q2, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q5 - veor q11, q11, q4 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q4, q4, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - sub r5,r5,#1 - b .Lenc_sbox -.align 4 -.Lenc_loop: - vldmia r4!, {q8-q11} - veor q8, q8, q0 - veor q9, q9, q1 - vtbl.8 d0, {q8}, d24 - vtbl.8 d1, {q8}, d25 - vldmia r4!, {q8} - veor q10, q10, q2 - vtbl.8 d2, {q9}, d24 - vtbl.8 d3, {q9}, d25 - vldmia r4!, {q9} - veor q11, q11, q3 - vtbl.8 d4, {q10}, d24 - vtbl.8 d5, {q10}, d25 - vldmia r4!, {q10} - vtbl.8 d6, {q11}, d24 - vtbl.8 d7, {q11}, d25 - vldmia r4!, {q11} - veor q8, q8, q4 - veor q9, q9, q5 - vtbl.8 d8, {q8}, d24 - vtbl.8 d9, {q8}, d25 - veor q10, q10, q6 - vtbl.8 d10, {q9}, d24 - vtbl.8 d11, {q9}, d25 - veor q11, q11, q7 - vtbl.8 d12, {q10}, d24 - vtbl.8 d13, {q10}, d25 - vtbl.8 d14, {q11}, d24 - vtbl.8 d15, {q11}, d25 -.Lenc_sbox: - veor q2, q2, q1 - veor q5, q5, q6 - veor q3, q3, q0 - veor q6, q6, q2 - veor q5, q5, q0 - - veor q6, q6, q3 - veor q3, q3, q7 - veor q7, q7, q5 - veor q3, q3, q4 - veor q4, q4, q5 - - veor q2, q2, q7 - veor q3, q3, q1 - veor q1, q1, q5 - veor q11, q7, q4 - veor q10, q1, q2 - veor q9, q5, q3 - veor q13, q2, q4 - vmov q8, q10 - veor q12, q6, q0 - - vorr q10, q10, q9 - veor q15, q11, q8 - vand q14, q11, q12 - vorr q11, q11, q12 - veor q12, q12, q9 - vand q8, q8, q9 - veor q9, q3, q0 - vand q15, q15, q12 - vand q13, q13, q9 - veor q9, q7, q1 - veor q12, q5, q6 - veor q11, q11, q13 - veor q10, q10, q13 - vand q13, q9, q12 - vorr q9, q9, q12 - veor q11, q11, q15 - veor q8, q8, q13 - veor q10, q10, q14 - veor q9, q9, q15 - veor q8, q8, q14 - vand q12, q2, q3 - veor q9, q9, q14 - vand q13, q4, q0 - vand q14, q1, q5 - vorr q15, q7, q6 - veor q11, q11, q12 - veor q9, q9, q14 - veor q8, q8, q15 - veor q10, q10, q13 - - @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 - - @ new smaller inversion - - vand q14, q11, q9 - vmov q12, q8 - - veor q13, q10, q14 - veor q15, q8, q14 - veor q14, q8, q14 @ q14=q15 - - vbsl q13, q9, q8 - vbsl q15, q11, q10 - veor q11, q11, q10 - - vbsl q12, q13, q14 - vbsl q8, q14, q13 - - vand q14, q12, q15 - veor q9, q9, q8 - - veor q14, q14, q11 - veor q12, q6, q0 - veor q8, q5, q3 - veor q10, q15, q14 - vand q10, q10, q6 - veor q6, q6, q5 - vand q11, q5, q15 - vand q6, q6, q14 - veor q5, q11, q10 - veor q6, q6, q11 - veor q15, q15, q13 - veor q14, q14, q9 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q0 - veor q12, q12, q8 - veor q0, q0, q3 - vand q8, q8, q15 - vand q3, q3, q13 - vand q12, q12, q14 - vand q0, q0, q9 - veor q8, q8, q12 - veor q0, q0, q3 - veor q12, q12, q11 - veor q3, q3, q10 - veor q6, q6, q12 - veor q0, q0, q12 - veor q5, q5, q8 - veor q3, q3, q8 - - veor q12, q7, q4 - veor q8, q1, q2 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q4 - veor q12, q12, q8 - veor q4, q4, q2 - vand q8, q8, q15 - vand q2, q2, q13 - vand q12, q12, q14 - vand q4, q4, q9 - veor q8, q8, q12 - veor q4, q4, q2 - veor q12, q12, q11 - veor q2, q2, q10 - veor q15, q15, q13 - veor q14, q14, q9 - veor q10, q15, q14 - vand q10, q10, q7 - veor q7, q7, q1 - vand q11, q1, q15 - vand q7, q7, q14 - veor q1, q11, q10 - veor q7, q7, q11 - veor q7, q7, q12 - veor q4, q4, q12 - veor q1, q1, q8 - veor q2, q2, q8 - veor q7, q7, q0 - veor q1, q1, q6 - veor q6, q6, q0 - veor q4, q4, q7 - veor q0, q0, q1 - - veor q1, q1, q5 - veor q5, q5, q2 - veor q2, q2, q3 - veor q3, q3, q5 - veor q4, q4, q5 - - veor q6, q6, q3 - subs r5,r5,#1 - bcc .Lenc_done - vext.8 q8, q0, q0, #12 @ x0 <<< 32 - vext.8 q9, q1, q1, #12 - veor q0, q0, q8 @ x0 ^ (x0 <<< 32) - vext.8 q10, q4, q4, #12 - veor q1, q1, q9 - vext.8 q11, q6, q6, #12 - veor q4, q4, q10 - vext.8 q12, q3, q3, #12 - veor q6, q6, q11 - vext.8 q13, q7, q7, #12 - veor q3, q3, q12 - vext.8 q14, q2, q2, #12 - veor q7, q7, q13 - vext.8 q15, q5, q5, #12 - veor q2, q2, q14 - - veor q9, q9, q0 - veor q5, q5, q15 - vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) - veor q10, q10, q1 - veor q8, q8, q5 - veor q9, q9, q5 - vext.8 q1, q1, q1, #8 - veor q13, q13, q3 - veor q0, q0, q8 - veor q14, q14, q7 - veor q1, q1, q9 - vext.8 q8, q3, q3, #8 - veor q12, q12, q6 - vext.8 q9, q7, q7, #8 - veor q15, q15, q2 - vext.8 q3, q6, q6, #8 - veor q11, q11, q4 - vext.8 q7, q5, q5, #8 - veor q12, q12, q5 - vext.8 q6, q2, q2, #8 - veor q11, q11, q5 - vext.8 q2, q4, q4, #8 - veor q5, q9, q13 - veor q4, q8, q12 - veor q3, q3, q11 - veor q7, q7, q15 - veor q6, q6, q14 - @ vmov q4, q8 - veor q2, q2, q10 - @ vmov q5, q9 - vldmia r6, {q12} @ .LSR - ite eq @ Thumb2 thing, samity check in ARM - addeq r6,r6,#0x10 - bne .Lenc_loop - vldmia r6, {q12} @ .LSRM0 - b .Lenc_loop -.align 4 -.Lenc_done: - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q2, #1 - vshr.u64 q11, q3, #1 - veor q10, q10, q5 - veor q11, q11, q7 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #1 - veor q7, q7, q11 - vshl.u64 q11, q11, #1 - veor q2, q2, q10 - veor q3, q3, q11 - vshr.u64 q10, q4, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q6 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q6, q6, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q4, q4, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q7, #2 - vshr.u64 q11, q3, #2 - veor q10, q10, q5 - veor q11, q11, q2 - vand q10, q10, q9 - vand q11, q11, q9 - veor q5, q5, q10 - vshl.u64 q10, q10, #2 - veor q2, q2, q11 - vshl.u64 q11, q11, #2 - veor q7, q7, q10 - veor q3, q3, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q6 - veor q11, q11, q4 - vand q10, q10, q9 - vand q11, q11, q9 - veor q6, q6, q10 - vshl.u64 q10, q10, #2 - veor q4, q4, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q6, #4 - vshr.u64 q11, q4, #4 - veor q10, q10, q5 - veor q11, q11, q2 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q2, q2, q11 - vshl.u64 q11, q11, #4 - veor q6, q6, q10 - veor q4, q4, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q7 - veor q11, q11, q3 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q3, q3, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - vldmia r4, {q8} @ last round key - veor q4, q4, q8 - veor q6, q6, q8 - veor q3, q3, q8 - veor q7, q7, q8 - veor q2, q2, q8 - veor q5, q5, q8 - veor q0, q0, q8 - veor q1, q1, q8 - bx lr -.size _bsaes_encrypt8,.-_bsaes_encrypt8 -.type _bsaes_key_convert,%function -.align 4 -_bsaes_key_convert: - adr r6,_bsaes_key_convert - vld1.8 {q7}, [r4]! @ load round 0 key - sub r6,r6,#_bsaes_key_convert-.LM0 - vld1.8 {q15}, [r4]! @ load round 1 key - - vmov.i8 q8, #0x01 @ bit masks - vmov.i8 q9, #0x02 - vmov.i8 q10, #0x04 - vmov.i8 q11, #0x08 - vmov.i8 q12, #0x10 - vmov.i8 q13, #0x20 - vldmia r6, {q14} @ .LM0 - -#ifdef __ARMEL__ - vrev32.8 q7, q7 - vrev32.8 q15, q15 -#endif - sub r5,r5,#1 - vstmia r12!, {q7} @ save round 0 key - b .Lkey_loop - -.align 4 -.Lkey_loop: - vtbl.8 d14,{q15},d28 - vtbl.8 d15,{q15},d29 - vmov.i8 q6, #0x40 - vmov.i8 q15, #0x80 - - vtst.8 q0, q7, q8 - vtst.8 q1, q7, q9 - vtst.8 q2, q7, q10 - vtst.8 q3, q7, q11 - vtst.8 q4, q7, q12 - vtst.8 q5, q7, q13 - vtst.8 q6, q7, q6 - vtst.8 q7, q7, q15 - vld1.8 {q15}, [r4]! @ load next round key - vmvn q0, q0 @ "pnot" - vmvn q1, q1 - vmvn q5, q5 - vmvn q6, q6 -#ifdef __ARMEL__ - vrev32.8 q15, q15 -#endif - subs r5,r5,#1 - vstmia r12!,{q0-q7} @ write bit-sliced round key - bne .Lkey_loop - - vmov.i8 q7,#0x63 @ compose .L63 - @ don't save last round key - bx lr -.size _bsaes_key_convert,.-_bsaes_key_convert -.extern AES_cbc_encrypt -.extern AES_decrypt - -.global bsaes_cbc_encrypt -.type bsaes_cbc_encrypt,%function -.align 5 -bsaes_cbc_encrypt: -#ifndef __KERNEL__ - cmp r2, #128 -#ifndef __thumb__ - blo AES_cbc_encrypt -#else - bhs 1f - b AES_cbc_encrypt -1: -#endif -#endif - - @ it is up to the caller to make sure we are called with enc == 0 - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr r8, [ip] @ IV is 1st arg on the stack - mov r2, r2, lsr#4 @ len in 16 byte blocks - sub sp, #0x10 @ scratch space to carry over the IV - mov r9, sp @ save sp - - ldr r10, [r3, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key - add r12, #96 @ sifze of bit-slices key schedule - - @ populate the key schedule - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - mov sp, r12 @ sp is sp - bl _bsaes_key_convert - vldmia sp, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia sp, {q7} -#else - ldr r12, [r3, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [r3, #244] - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - add r12, r3, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, r3, #248 - vldmia r4, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia r4, {q7} - -.align 2 -0: -#endif - - vld1.8 {q15}, [r8] @ load IV - b .Lcbc_dec_loop - -.align 4 -.Lcbc_dec_loop: - subs r2, r2, #0x8 - bmi .Lcbc_dec_loop_finish - - vld1.8 {q0-q1}, [r0]! @ load input - vld1.8 {q2-q3}, [r0]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, sp @ pass the key -#else - add r4, r3, #248 -#endif - vld1.8 {q4-q5}, [r0]! - mov r5, r10 - vld1.8 {q6-q7}, [r0] - sub r0, r0, #0x60 - vstmia r9, {q15} @ put aside IV - - bl _bsaes_decrypt8 - - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q12-q13}, [r0]! - veor q4, q4, q10 - veor q2, q2, q11 - vld1.8 {q14-q15}, [r0]! - veor q7, q7, q12 - vst1.8 {q0-q1}, [r1]! @ write output - veor q3, q3, q13 - vst1.8 {q6}, [r1]! - veor q5, q5, q14 - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - vst1.8 {q7}, [r1]! - vst1.8 {q3}, [r1]! - vst1.8 {q5}, [r1]! - - b .Lcbc_dec_loop - -.Lcbc_dec_loop_finish: - adds r2, r2, #8 - beq .Lcbc_dec_done - - vld1.8 {q0}, [r0]! @ load input - cmp r2, #2 - blo .Lcbc_dec_one - vld1.8 {q1}, [r0]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, sp @ pass the key -#else - add r4, r3, #248 -#endif - mov r5, r10 - vstmia r9, {q15} @ put aside IV - beq .Lcbc_dec_two - vld1.8 {q2}, [r0]! - cmp r2, #4 - blo .Lcbc_dec_three - vld1.8 {q3}, [r0]! - beq .Lcbc_dec_four - vld1.8 {q4}, [r0]! - cmp r2, #6 - blo .Lcbc_dec_five - vld1.8 {q5}, [r0]! - beq .Lcbc_dec_six - vld1.8 {q6}, [r0]! - sub r0, r0, #0x70 - - bl _bsaes_decrypt8 - - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q12-q13}, [r0]! - veor q4, q4, q10 - veor q2, q2, q11 - vld1.8 {q15}, [r0]! - veor q7, q7, q12 - vst1.8 {q0-q1}, [r1]! @ write output - veor q3, q3, q13 - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - vst1.8 {q7}, [r1]! - vst1.8 {q3}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_six: - sub r0, r0, #0x60 - bl _bsaes_decrypt8 - vldmia r9,{q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q12}, [r0]! - veor q4, q4, q10 - veor q2, q2, q11 - vld1.8 {q15}, [r0]! - veor q7, q7, q12 - vst1.8 {q0-q1}, [r1]! @ write output - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - vst1.8 {q7}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_five: - sub r0, r0, #0x50 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q15}, [r0]! - veor q4, q4, q10 - vst1.8 {q0-q1}, [r1]! @ write output - veor q2, q2, q11 - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_four: - sub r0, r0, #0x40 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q15}, [r0]! - veor q4, q4, q10 - vst1.8 {q0-q1}, [r1]! @ write output - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_three: - sub r0, r0, #0x30 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q15}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vst1.8 {q0-q1}, [r1]! @ write output - vst1.8 {q6}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_two: - sub r0, r0, #0x20 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q15}, [r0]! @ reload input - veor q1, q1, q8 - vst1.8 {q0-q1}, [r1]! @ write output - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_one: - sub r0, r0, #0x10 - mov r10, r1 @ save original out pointer - mov r1, r9 @ use the iv scratch space as out buffer - mov r2, r3 - vmov q4,q15 @ just in case ensure that IV - vmov q5,q0 @ and input are preserved - bl AES_decrypt - vld1.8 {q0}, [r9,:64] @ load result - veor q0, q0, q4 @ ^= IV - vmov q15, q5 @ q5 holds input - vst1.8 {q0}, [r10] @ write output - -.Lcbc_dec_done: -#ifndef BSAES_ASM_EXTENDED_KEY - vmov.i32 q0, #0 - vmov.i32 q1, #0 -.Lcbc_dec_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r9 - bne .Lcbc_dec_bzero -#endif - - mov sp, r9 - add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb - vst1.8 {q15}, [r8] @ return IV - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} -.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt -.extern AES_encrypt -.global bsaes_ctr32_encrypt_blocks -.type bsaes_ctr32_encrypt_blocks,%function -.align 5 -bsaes_ctr32_encrypt_blocks: - cmp r2, #8 @ use plain AES for - blo .Lctr_enc_short @ small sizes - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr r8, [ip] @ ctr is 1st arg on the stack - sub sp, sp, #0x10 @ scratch space to carry over the ctr - mov r9, sp @ save sp - - ldr r10, [r3, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key - add r12, #96 @ size of bit-sliced key schedule - - @ populate the key schedule - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - mov sp, r12 @ sp is sp - bl _bsaes_key_convert - veor q7,q7,q15 @ fix up last round key - vstmia r12, {q7} @ save last round key - - vld1.8 {q0}, [r8] @ load counter - add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 - vldmia sp, {q4} @ load round0 key -#else - ldr r12, [r3, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [r3, #244] - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - add r12, r3, #248 @ pass key schedule - bl _bsaes_key_convert - veor q7,q7,q15 @ fix up last round key - vstmia r12, {q7} @ save last round key - -.align 2 -0: add r12, r3, #248 - vld1.8 {q0}, [r8] @ load counter - adrl r8, .LREVM0SR @ borrow r8 - vldmia r12, {q4} @ load round0 key - sub sp, #0x10 @ place for adjusted round0 key -#endif - - vmov.i32 q8,#1 @ compose 1<<96 - veor q9,q9,q9 - vrev32.8 q0,q0 - vext.8 q8,q9,q8,#4 - vrev32.8 q4,q4 - vadd.u32 q9,q8,q8 @ compose 2<<96 - vstmia sp, {q4} @ save adjusted round0 key - b .Lctr_enc_loop - -.align 4 -.Lctr_enc_loop: - vadd.u32 q10, q8, q9 @ compose 3<<96 - vadd.u32 q1, q0, q8 @ +1 - vadd.u32 q2, q0, q9 @ +2 - vadd.u32 q3, q0, q10 @ +3 - vadd.u32 q4, q1, q10 - vadd.u32 q5, q2, q10 - vadd.u32 q6, q3, q10 - vadd.u32 q7, q4, q10 - vadd.u32 q10, q5, q10 @ next counter - - @ Borrow prologue from _bsaes_encrypt8 to use the opportunity - @ to flip byte order in 32-bit counter - - vldmia sp, {q9} @ load round0 key -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x10 @ pass next round key -#else - add r4, r3, #264 -#endif - vldmia r8, {q8} @ .LREVM0SR - mov r5, r10 @ pass rounds - vstmia r9, {q10} @ save next counter - sub r6, r8, #.LREVM0SR-.LSR @ pass constants - - bl _bsaes_encrypt8_alt - - subs r2, r2, #8 - blo .Lctr_enc_loop_done - - vld1.8 {q8-q9}, [r0]! @ load input - vld1.8 {q10-q11}, [r0]! - veor q0, q8 - veor q1, q9 - vld1.8 {q12-q13}, [r0]! - veor q4, q10 - veor q6, q11 - vld1.8 {q14-q15}, [r0]! - veor q3, q12 - vst1.8 {q0-q1}, [r1]! @ write output - veor q7, q13 - veor q2, q14 - vst1.8 {q4}, [r1]! - veor q5, q15 - vst1.8 {q6}, [r1]! - vmov.i32 q8, #1 @ compose 1<<96 - vst1.8 {q3}, [r1]! - veor q9, q9, q9 - vst1.8 {q7}, [r1]! - vext.8 q8, q9, q8, #4 - vst1.8 {q2}, [r1]! - vadd.u32 q9,q8,q8 @ compose 2<<96 - vst1.8 {q5}, [r1]! - vldmia r9, {q0} @ load counter - - bne .Lctr_enc_loop - b .Lctr_enc_done - -.align 4 -.Lctr_enc_loop_done: - add r2, r2, #8 - vld1.8 {q8}, [r0]! @ load input - veor q0, q8 - vst1.8 {q0}, [r1]! @ write output - cmp r2, #2 - blo .Lctr_enc_done - vld1.8 {q9}, [r0]! - veor q1, q9 - vst1.8 {q1}, [r1]! - beq .Lctr_enc_done - vld1.8 {q10}, [r0]! - veor q4, q10 - vst1.8 {q4}, [r1]! - cmp r2, #4 - blo .Lctr_enc_done - vld1.8 {q11}, [r0]! - veor q6, q11 - vst1.8 {q6}, [r1]! - beq .Lctr_enc_done - vld1.8 {q12}, [r0]! - veor q3, q12 - vst1.8 {q3}, [r1]! - cmp r2, #6 - blo .Lctr_enc_done - vld1.8 {q13}, [r0]! - veor q7, q13 - vst1.8 {q7}, [r1]! - beq .Lctr_enc_done - vld1.8 {q14}, [r0] - veor q2, q14 - vst1.8 {q2}, [r1]! - -.Lctr_enc_done: - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifndef BSAES_ASM_EXTENDED_KEY -.Lctr_enc_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r9 - bne .Lctr_enc_bzero -#else - vstmia sp, {q0-q1} -#endif - - mov sp, r9 - add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.align 4 -.Lctr_enc_short: - ldr ip, [sp] @ ctr pointer is passed on stack - stmdb sp!, {r4-r8, lr} - - mov r4, r0 @ copy arguments - mov r5, r1 - mov r6, r2 - mov r7, r3 - ldr r8, [ip, #12] @ load counter LSW - vld1.8 {q1}, [ip] @ load whole counter value -#ifdef __ARMEL__ - rev r8, r8 -#endif - sub sp, sp, #0x10 - vst1.8 {q1}, [sp,:64] @ copy counter value - sub sp, sp, #0x10 - -.Lctr_enc_short_loop: - add r0, sp, #0x10 @ input counter value - mov r1, sp @ output on the stack - mov r2, r7 @ key - - bl AES_encrypt - - vld1.8 {q0}, [r4]! @ load input - vld1.8 {q1}, [sp,:64] @ load encrypted counter - add r8, r8, #1 -#ifdef __ARMEL__ - rev r0, r8 - str r0, [sp, #0x1c] @ next counter value -#else - str r8, [sp, #0x1c] @ next counter value -#endif - veor q0,q0,q1 - vst1.8 {q0}, [r5]! @ store output - subs r6, r6, #1 - bne .Lctr_enc_short_loop - - vmov.i32 q0, #0 - vmov.i32 q1, #0 - vstmia sp!, {q0-q1} - - ldmia sp!, {r4-r8, pc} -.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks -.globl bsaes_xts_encrypt -.type bsaes_xts_encrypt,%function -.align 4 -bsaes_xts_encrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future r3 - - mov r7, r0 - mov r8, r1 - mov r9, r2 - mov r10, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0,sp @ pointer to initial tweak -#endif - - ldr r1, [r10, #240] @ get # of rounds - mov r3, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key - @ add r12, #96 @ size of bit-sliced key schedule - sub r12, #48 @ place for tweak[9] - - @ populate the key schedule - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - veor q7, q7, q15 @ fix up last round key - vstmia r12, {q7} @ save last round key -#else - ldr r12, [r10, #244] - eors r12, #1 - beq 0f - - str r12, [r10, #244] - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - add r12, r10, #248 @ pass key schedule - bl _bsaes_key_convert - veor q7, q7, q15 @ fix up last round key - vstmia r12, {q7} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - - vld1.8 {q8}, [r0] @ initial tweak - adr r2, .Lxts_magic - - subs r9, #0x80 - blo .Lxts_enc_short - b .Lxts_enc_loop - -.align 4 -.Lxts_enc_loop: - vldmia r2, {q5} @ load XTS magic - vshr.s64 q6, q8, #63 - mov r0, sp - vand q6, q6, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q9, #63 - veor q9, q9, q6 - vand q7, q7, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q10, #63 - veor q10, q10, q7 - vand q6, q6, q5 - vld1.8 {q0}, [r7]! - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q11, #63 - veor q11, q11, q6 - vand q7, q7, q5 - vld1.8 {q1}, [r7]! - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q12, #63 - veor q12, q12, q7 - vand q6, q6, q5 - vld1.8 {q2}, [r7]! - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q13, #63 - veor q13, q13, q6 - vand q7, q7, q5 - vld1.8 {q3}, [r7]! - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q14, #63 - veor q14, q14, q7 - vand q6, q6, q5 - vld1.8 {q4}, [r7]! - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q15, #63 - veor q15, q15, q6 - vand q7, q7, q5 - vld1.8 {q5}, [r7]! - veor q4, q4, q12 - vadd.u64 q8, q15, q15 - vst1.64 {q15}, [r0,:128]! - vswp d15,d14 - veor q8, q8, q7 - vst1.64 {q8}, [r0,:128] @ next round tweak - - vld1.8 {q6-q7}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - veor q7, q7, q15 - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - vld1.64 {q14-q15}, [r0,:128]! - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q2, q14 - vst1.8 {q10-q11}, [r8]! - veor q13, q5, q15 - vst1.8 {q12-q13}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - - subs r9, #0x80 - bpl .Lxts_enc_loop - -.Lxts_enc_short: - adds r9, #0x70 - bmi .Lxts_enc_done - - vldmia r2, {q5} @ load XTS magic - vshr.s64 q7, q8, #63 - mov r0, sp - vand q7, q7, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q9, #63 - veor q9, q9, q7 - vand q6, q6, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q10, #63 - veor q10, q10, q6 - vand q7, q7, q5 - vld1.8 {q0}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_1 - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q11, #63 - veor q11, q11, q7 - vand q6, q6, q5 - vld1.8 {q1}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_2 - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q12, #63 - veor q12, q12, q6 - vand q7, q7, q5 - vld1.8 {q2}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_3 - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q13, #63 - veor q13, q13, q7 - vand q6, q6, q5 - vld1.8 {q3}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_4 - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q14, #63 - veor q14, q14, q6 - vand q7, q7, q5 - vld1.8 {q4}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_5 - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q15, #63 - veor q15, q15, q7 - vand q6, q6, q5 - vld1.8 {q5}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_6 - veor q4, q4, q12 - sub r9, #0x10 - vst1.64 {q15}, [r0,:128] @ next round tweak - - vld1.8 {q6}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - vld1.64 {q14}, [r0,:128]! - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q2, q14 - vst1.8 {q10-q11}, [r8]! - vst1.8 {q12}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_6: - vst1.64 {q14}, [r0,:128] @ next round tweak - - veor q4, q4, q12 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q5, q5, q13 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - vst1.8 {q10-q11}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done - -@ put this in range for both ARM and Thumb mode adr instructions -.align 5 -.Lxts_magic: - .quad 1, 0x87 - -.align 5 -.Lxts_enc_5: - vst1.64 {q13}, [r0,:128] @ next round tweak - - veor q3, q3, q11 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q4, q4, q12 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - vst1.8 {q10}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_4: - vst1.64 {q12}, [r0,:128] @ next round tweak - - veor q2, q2, q10 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q3, q3, q11 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - vst1.8 {q8-q9}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_3: - vst1.64 {q11}, [r0,:128] @ next round tweak - - veor q1, q1, q9 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q2, q2, q10 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - vst1.8 {q8}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_2: - vst1.64 {q10}, [r0,:128] @ next round tweak - - veor q0, q0, q8 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q1, q1, q9 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - vst1.8 {q0-q1}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_1: - mov r0, sp - veor q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - - bl AES_encrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r8]! - mov r3, r4 - - vmov q8, q9 @ next round tweak - -.Lxts_enc_done: -#ifndef XTS_CHAIN_TWEAK - adds r9, #0x10 - beq .Lxts_enc_ret - sub r6, r8, #0x10 - -.Lxts_enc_steal: - ldrb r0, [r7], #1 - ldrb r1, [r8, #-0x10] - strb r0, [r8, #-0x10] - strb r1, [r8], #1 - - subs r9, #1 - bhi .Lxts_enc_steal - - vld1.8 {q0}, [r6] - mov r0, sp - veor q0, q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - - bl AES_encrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r6] - mov r3, r4 -#endif - -.Lxts_enc_ret: - bic r0, r3, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_enc_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_enc_bzero - - mov sp, r3 -#ifdef XTS_CHAIN_TWEAK - vst1.8 {q8}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_encrypt,.-bsaes_xts_encrypt - -.globl bsaes_xts_decrypt -.type bsaes_xts_decrypt,%function -.align 4 -bsaes_xts_decrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future r3 - - mov r7, r0 - mov r8, r1 - mov r9, r2 - mov r10, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0, sp @ pointer to initial tweak -#endif - - ldr r1, [r10, #240] @ get # of rounds - mov r3, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key - @ add r12, #96 @ size of bit-sliced key schedule - sub r12, #48 @ place for tweak[9] - - @ populate the key schedule - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - add r4, sp, #0x90 - vldmia r4, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia r4, {q7} -#else - ldr r12, [r10, #244] - eors r12, #1 - beq 0f - - str r12, [r10, #244] - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - add r12, r10, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, r10, #248 - vldmia r4, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia r4, {q7} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - vld1.8 {q8}, [r0] @ initial tweak - adr r2, .Lxts_magic - -#ifndef XTS_CHAIN_TWEAK - tst r9, #0xf @ if not multiple of 16 - it ne @ Thumb2 thing, sanity check in ARM - subne r9, #0x10 @ subtract another 16 bytes -#endif - subs r9, #0x80 - - blo .Lxts_dec_short - b .Lxts_dec_loop - -.align 4 -.Lxts_dec_loop: - vldmia r2, {q5} @ load XTS magic - vshr.s64 q6, q8, #63 - mov r0, sp - vand q6, q6, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q9, #63 - veor q9, q9, q6 - vand q7, q7, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q10, #63 - veor q10, q10, q7 - vand q6, q6, q5 - vld1.8 {q0}, [r7]! - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q11, #63 - veor q11, q11, q6 - vand q7, q7, q5 - vld1.8 {q1}, [r7]! - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q12, #63 - veor q12, q12, q7 - vand q6, q6, q5 - vld1.8 {q2}, [r7]! - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q13, #63 - veor q13, q13, q6 - vand q7, q7, q5 - vld1.8 {q3}, [r7]! - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q14, #63 - veor q14, q14, q7 - vand q6, q6, q5 - vld1.8 {q4}, [r7]! - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q15, #63 - veor q15, q15, q6 - vand q7, q7, q5 - vld1.8 {q5}, [r7]! - veor q4, q4, q12 - vadd.u64 q8, q15, q15 - vst1.64 {q15}, [r0,:128]! - vswp d15,d14 - veor q8, q8, q7 - vst1.64 {q8}, [r0,:128] @ next round tweak - - vld1.8 {q6-q7}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - veor q7, q7, q15 - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - vld1.64 {q14-q15}, [r0,:128]! - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q3, q14 - vst1.8 {q10-q11}, [r8]! - veor q13, q5, q15 - vst1.8 {q12-q13}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - - subs r9, #0x80 - bpl .Lxts_dec_loop - -.Lxts_dec_short: - adds r9, #0x70 - bmi .Lxts_dec_done - - vldmia r2, {q5} @ load XTS magic - vshr.s64 q7, q8, #63 - mov r0, sp - vand q7, q7, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q9, #63 - veor q9, q9, q7 - vand q6, q6, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q10, #63 - veor q10, q10, q6 - vand q7, q7, q5 - vld1.8 {q0}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_1 - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q11, #63 - veor q11, q11, q7 - vand q6, q6, q5 - vld1.8 {q1}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_2 - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q12, #63 - veor q12, q12, q6 - vand q7, q7, q5 - vld1.8 {q2}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_3 - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q13, #63 - veor q13, q13, q7 - vand q6, q6, q5 - vld1.8 {q3}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_4 - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q14, #63 - veor q14, q14, q6 - vand q7, q7, q5 - vld1.8 {q4}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_5 - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q15, #63 - veor q15, q15, q7 - vand q6, q6, q5 - vld1.8 {q5}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_6 - veor q4, q4, q12 - sub r9, #0x10 - vst1.64 {q15}, [r0,:128] @ next round tweak - - vld1.8 {q6}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - vld1.64 {q14}, [r0,:128]! - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q3, q14 - vst1.8 {q10-q11}, [r8]! - vst1.8 {q12}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_6: - vst1.64 {q14}, [r0,:128] @ next round tweak - - veor q4, q4, q12 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q5, q5, q13 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - vst1.8 {q10-q11}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_5: - vst1.64 {q13}, [r0,:128] @ next round tweak - - veor q3, q3, q11 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q4, q4, q12 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - vst1.8 {q10}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_4: - vst1.64 {q12}, [r0,:128] @ next round tweak - - veor q2, q2, q10 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q3, q3, q11 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - vst1.8 {q8-q9}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_3: - vst1.64 {q11}, [r0,:128] @ next round tweak - - veor q1, q1, q9 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q2, q2, q10 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - vst1.8 {q8}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_2: - vst1.64 {q10}, [r0,:128] @ next round tweak - - veor q0, q0, q8 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q1, q1, q9 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - vst1.8 {q0-q1}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_1: - mov r0, sp - veor q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - mov r5, r2 @ preserve magic - - bl AES_decrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r8]! - mov r3, r4 - mov r2, r5 - - vmov q8, q9 @ next round tweak - -.Lxts_dec_done: -#ifndef XTS_CHAIN_TWEAK - adds r9, #0x10 - beq .Lxts_dec_ret - - @ calculate one round of extra tweak for the stolen ciphertext - vldmia r2, {q5} - vshr.s64 q6, q8, #63 - vand q6, q6, q5 - vadd.u64 q9, q8, q8 - vswp d13,d12 - veor q9, q9, q6 - - @ perform the final decryption with the last tweak value - vld1.8 {q0}, [r7]! - mov r0, sp - veor q0, q0, q9 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - - bl AES_decrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q9 - vst1.8 {q0}, [r8] - - mov r6, r8 -.Lxts_dec_steal: - ldrb r1, [r8] - ldrb r0, [r7], #1 - strb r1, [r8, #0x10] - strb r0, [r8], #1 - - subs r9, #1 - bhi .Lxts_dec_steal - - vld1.8 {q0}, [r6] - mov r0, sp - veor q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - - bl AES_decrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r6] - mov r3, r4 -#endif - -.Lxts_dec_ret: - bic r0, r3, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_dec_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_dec_bzero - - mov sp, r3 -#ifdef XTS_CHAIN_TWEAK - vst1.8 {q8}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_decrypt,.-bsaes_xts_decrypt -#endif diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c deleted file mode 100644 index d8e06de72ef3..000000000000 --- a/arch/arm/crypto/aesbs-glue.c +++ /dev/null @@ -1,367 +0,0 @@ -/* - * linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES - * - * Copyright (C) 2013 Linaro Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "aes_glue.h" - -#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE) - -struct BS_KEY { - struct AES_KEY rk; - int converted; - u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE]; -} __aligned(8); - -asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in); -asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in); - -asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes, - struct BS_KEY *key, u8 iv[]); - -asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks, - struct BS_KEY *key, u8 const iv[]); - -asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes, - struct BS_KEY *key, u8 tweak[]); - -asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes, - struct BS_KEY *key, u8 tweak[]); - -struct aesbs_cbc_ctx { - struct AES_KEY enc; - struct BS_KEY dec; -}; - -struct aesbs_ctr_ctx { - struct BS_KEY enc; -}; - -struct aesbs_xts_ctx { - struct BS_KEY enc; - struct BS_KEY dec; - struct AES_KEY twkey; -}; - -static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - int bits = key_len * 8; - - if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - ctx->dec.rk = ctx->enc; - private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); - ctx->dec.converted = 0; - return 0; -} - -static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - int bits = key_len * 8; - - if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - ctx->enc.converted = 0; - return 0; -} - -static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int bits = key_len * 4; - int err; - - err = xts_verify_key(tfm, in_key, key_len); - if (err) - return err; - - if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - ctx->dec.rk = ctx->enc.rk; - private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); - private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey); - ctx->enc.converted = ctx->dec.converted = 0; - return 0; -} - -static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm, - const u8 *src, u8 *dst) -{ - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - AES_encrypt(src, dst, &ctx->enc); -} - -static int aesbs_cbc_encrypt(struct skcipher_request *req) -{ - return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one); -} - -static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm, - const u8 *src, u8 *dst) -{ - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - AES_decrypt(src, dst, &ctx->dec.rk); -} - -static int aesbs_cbc_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - for (err = skcipher_walk_virt(&walk, req, false); - (nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) { - u32 blocks = nbytes / AES_BLOCK_SIZE; - u8 *dst = walk.dst.virt.addr; - u8 *src = walk.src.virt.addr; - u8 *iv = walk.iv; - - if (blocks >= 8) { - kernel_neon_begin(); - bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv); - kernel_neon_end(); - nbytes %= AES_BLOCK_SIZE; - continue; - } - - nbytes = crypto_cbc_decrypt_blocks(&walk, tfm, - aesbs_decrypt_one); - } - return err; -} - -static void inc_be128_ctr(__be32 ctr[], u32 addend) -{ - int i; - - for (i = 3; i >= 0; i--, addend = 1) { - u32 n = be32_to_cpu(ctr[i]) + addend; - - ctr[i] = cpu_to_be32(n); - if (n >= addend) - break; - } -} - -static int aesbs_ctr_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 blocks; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; - __be32 *ctr = (__be32 *)walk.iv; - u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]); - - /* avoid 32 bit counter overflow in the NEON code */ - if (unlikely(headroom < blocks)) { - blocks = headroom + 1; - tail = walk.nbytes - blocks * AES_BLOCK_SIZE; - } - kernel_neon_begin(); - bsaes_ctr32_encrypt_blocks(walk.src.virt.addr, - walk.dst.virt.addr, blocks, - &ctx->enc, walk.iv); - kernel_neon_end(); - inc_be128_ctr(ctr, blocks); - - err = skcipher_walk_done(&walk, tail); - } - if (walk.nbytes) { - u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; - u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; - u8 ks[AES_BLOCK_SIZE]; - - AES_encrypt(walk.iv, ks, &ctx->enc.rk); - if (tdst != tsrc) - memcpy(tdst, tsrc, walk.nbytes); - crypto_xor(tdst, ks, walk.nbytes); - err = skcipher_walk_done(&walk, 0); - } - return err; -} - -static int aesbs_xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - /* generate the initial tweak */ - AES_encrypt(walk.iv, walk.iv, &ctx->twkey); - - while (walk.nbytes) { - kernel_neon_begin(); - bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, - walk.nbytes, &ctx->enc, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); - } - return err; -} - -static int aesbs_xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - /* generate the initial tweak */ - AES_encrypt(walk.iv, walk.iv, &ctx->twkey); - - while (walk.nbytes) { - kernel_neon_begin(); - bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, - walk.nbytes, &ctx->dec, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); - } - return err; -} - -static struct skcipher_alg aesbs_algs[] = { { - .base = { - .cra_name = "__cbc(aes)", - .cra_driver_name = "__cbc-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_cbc_set_key, - .encrypt = aesbs_cbc_encrypt, - .decrypt = aesbs_cbc_decrypt, -}, { - .base = { - .cra_name = "__ctr(aes)", - .cra_driver_name = "__ctr-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, - .setkey = aesbs_ctr_set_key, - .encrypt = aesbs_ctr_encrypt, - .decrypt = aesbs_ctr_encrypt, -}, { - .base = { - .cra_name = "__xts(aes)", - .cra_driver_name = "__xts-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesbs_xts_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - }, - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_xts_set_key, - .encrypt = aesbs_xts_encrypt, - .decrypt = aesbs_xts_decrypt, -} }; - -struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)]; - -static void aesbs_mod_exit(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++) - simd_skcipher_free(aesbs_simd_algs[i]); - - crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs)); -} - -static int __init aesbs_mod_init(void) -{ - struct simd_skcipher_alg *simd; - const char *basename; - const char *algname; - const char *drvname; - int err; - int i; - - if (!cpu_has_neon()) - return -ENODEV; - - err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs)); - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) { - algname = aesbs_algs[i].base.cra_name + 2; - drvname = aesbs_algs[i].base.cra_driver_name + 2; - basename = aesbs_algs[i].base.cra_driver_name; - simd = simd_skcipher_create_compat(algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - goto unregister_simds; - - aesbs_simd_algs[i] = simd; - } - - return 0; - -unregister_simds: - aesbs_mod_exit(); - return err; -} - -module_init(aesbs_mod_init); -module_exit(aesbs_mod_exit); - -MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL"); diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl deleted file mode 100644 index a4d3856e7d24..000000000000 --- a/arch/arm/crypto/bsaes-armv7.pl +++ /dev/null @@ -1,2471 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# -# Specific modes and adaptation for Linux kernel by Ard Biesheuvel -# . Permission to use under GPL terms is -# granted. -# ==================================================================== - -# Bit-sliced AES for ARM NEON -# -# February 2012. -# -# This implementation is direct adaptation of bsaes-x86_64 module for -# ARM NEON. Except that this module is endian-neutral [in sense that -# it can be compiled for either endianness] by courtesy of vld1.8's -# neutrality. Initial version doesn't implement interface to OpenSSL, -# only low-level primitives and unsupported entry points, just enough -# to collect performance results, which for Cortex-A8 core are: -# -# encrypt 19.5 cycles per byte processed with 128-bit key -# decrypt 22.1 cycles per byte processed with 128-bit key -# key conv. 440 cycles per 128-bit key/0.18 of 8x block -# -# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, -# which is [much] worse than anticipated (for further details see -# http://www.openssl.org/~appro/Snapdragon-S4.html). -# -# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code -# manages in 20.0 cycles]. -# -# When comparing to x86_64 results keep in mind that NEON unit is -# [mostly] single-issue and thus can't [fully] benefit from -# instruction-level parallelism. And when comparing to aes-armv4 -# results keep in mind key schedule conversion overhead (see -# bsaes-x86_64.pl for further details)... -# -# - -# April-August 2013 -# -# Add CBC, CTR and XTS subroutines, adapt for kernel use. -# -# - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -my ($inp,$out,$len,$key)=("r0","r1","r2","r3"); -my @XMM=map("q$_",(0..15)); - -{ -my ($key,$rounds,$const)=("r4","r5","r6"); - -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } - -sub Sbox { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb -my @b=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; - &InBasisChange (@b); - &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); - &OutBasisChange (@b[7,1,4,2,6,5,0,3]); -} - -sub InBasisChange { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb -my @b=@_[0..7]; -$code.=<<___; - veor @b[2], @b[2], @b[1] - veor @b[5], @b[5], @b[6] - veor @b[3], @b[3], @b[0] - veor @b[6], @b[6], @b[2] - veor @b[5], @b[5], @b[0] - - veor @b[6], @b[6], @b[3] - veor @b[3], @b[3], @b[7] - veor @b[7], @b[7], @b[5] - veor @b[3], @b[3], @b[4] - veor @b[4], @b[4], @b[5] - - veor @b[2], @b[2], @b[7] - veor @b[3], @b[3], @b[1] - veor @b[1], @b[1], @b[5] -___ -} - -sub OutBasisChange { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb -my @b=@_[0..7]; -$code.=<<___; - veor @b[0], @b[0], @b[6] - veor @b[1], @b[1], @b[4] - veor @b[4], @b[4], @b[6] - veor @b[2], @b[2], @b[0] - veor @b[6], @b[6], @b[1] - - veor @b[1], @b[1], @b[5] - veor @b[5], @b[5], @b[3] - veor @b[3], @b[3], @b[7] - veor @b[7], @b[7], @b[5] - veor @b[2], @b[2], @b[5] - - veor @b[4], @b[4], @b[7] -___ -} - -sub InvSbox { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb -my @b=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; - &InvInBasisChange (@b); - &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); - &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); -} - -sub InvInBasisChange { # OutBasisChange in reverse (with twist) -my @b=@_[5,1,2,6,3,7,0,4]; -$code.=<<___ - veor @b[1], @b[1], @b[7] - veor @b[4], @b[4], @b[7] - - veor @b[7], @b[7], @b[5] - veor @b[1], @b[1], @b[3] - veor @b[2], @b[2], @b[5] - veor @b[3], @b[3], @b[7] - - veor @b[6], @b[6], @b[1] - veor @b[2], @b[2], @b[0] - veor @b[5], @b[5], @b[3] - veor @b[4], @b[4], @b[6] - veor @b[0], @b[0], @b[6] - veor @b[1], @b[1], @b[4] -___ -} - -sub InvOutBasisChange { # InBasisChange in reverse -my @b=@_[2,5,7,3,6,1,0,4]; -$code.=<<___; - veor @b[1], @b[1], @b[5] - veor @b[2], @b[2], @b[7] - - veor @b[3], @b[3], @b[1] - veor @b[4], @b[4], @b[5] - veor @b[7], @b[7], @b[5] - veor @b[3], @b[3], @b[4] - veor @b[5], @b[5], @b[0] - veor @b[3], @b[3], @b[7] - veor @b[6], @b[6], @b[2] - veor @b[2], @b[2], @b[1] - veor @b[6], @b[6], @b[3] - - veor @b[3], @b[3], @b[0] - veor @b[5], @b[5], @b[6] -___ -} - -sub Mul_GF4 { -#;************************************************************* -#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * -#;************************************************************* -my ($x0,$x1,$y0,$y1,$t0,$t1)=@_; -$code.=<<___; - veor $t0, $y0, $y1 - vand $t0, $t0, $x0 - veor $x0, $x0, $x1 - vand $t1, $x1, $y0 - vand $x0, $x0, $y1 - veor $x1, $t1, $t0 - veor $x0, $x0, $t1 -___ -} - -sub Mul_GF4_N { # not used, see next subroutine -# multiply and scale by N -my ($x0,$x1,$y0,$y1,$t0)=@_; -$code.=<<___; - veor $t0, $y0, $y1 - vand $t0, $t0, $x0 - veor $x0, $x0, $x1 - vand $x1, $x1, $y0 - vand $x0, $x0, $y1 - veor $x1, $x1, $x0 - veor $x0, $x0, $t0 -___ -} - -sub Mul_GF4_N_GF4 { -# interleaved Mul_GF4_N and Mul_GF4 -my ($x0,$x1,$y0,$y1,$t0, - $x2,$x3,$y2,$y3,$t1)=@_; -$code.=<<___; - veor $t0, $y0, $y1 - veor $t1, $y2, $y3 - vand $t0, $t0, $x0 - vand $t1, $t1, $x2 - veor $x0, $x0, $x1 - veor $x2, $x2, $x3 - vand $x1, $x1, $y0 - vand $x3, $x3, $y2 - vand $x0, $x0, $y1 - vand $x2, $x2, $y3 - veor $x1, $x1, $x0 - veor $x2, $x2, $x3 - veor $x0, $x0, $t0 - veor $x3, $x3, $t1 -___ -} -sub Mul_GF16_2 { -my @x=@_[0..7]; -my @y=@_[8..11]; -my @t=@_[12..15]; -$code.=<<___; - veor @t[0], @x[0], @x[2] - veor @t[1], @x[1], @x[3] -___ - &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]); -$code.=<<___; - veor @y[0], @y[0], @y[2] - veor @y[1], @y[1], @y[3] -___ - Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], - @x[2], @x[3], @y[2], @y[3], @t[2]); -$code.=<<___; - veor @x[0], @x[0], @t[0] - veor @x[2], @x[2], @t[0] - veor @x[1], @x[1], @t[1] - veor @x[3], @x[3], @t[1] - - veor @t[0], @x[4], @x[6] - veor @t[1], @x[5], @x[7] -___ - &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], - @x[6], @x[7], @y[2], @y[3], @t[2]); -$code.=<<___; - veor @y[0], @y[0], @y[2] - veor @y[1], @y[1], @y[3] -___ - &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]); -$code.=<<___; - veor @x[4], @x[4], @t[0] - veor @x[6], @x[6], @t[0] - veor @x[5], @x[5], @t[1] - veor @x[7], @x[7], @t[1] -___ -} -sub Inv_GF256 { -#;******************************************************************** -#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * -#;******************************************************************** -my @x=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; -# direct optimizations from hardware -$code.=<<___; - veor @t[3], @x[4], @x[6] - veor @t[2], @x[5], @x[7] - veor @t[1], @x[1], @x[3] - veor @s[1], @x[7], @x[6] - vmov @t[0], @t[2] - veor @s[0], @x[0], @x[2] - - vorr @t[2], @t[2], @t[1] - veor @s[3], @t[3], @t[0] - vand @s[2], @t[3], @s[0] - vorr @t[3], @t[3], @s[0] - veor @s[0], @s[0], @t[1] - vand @t[0], @t[0], @t[1] - veor @t[1], @x[3], @x[2] - vand @s[3], @s[3], @s[0] - vand @s[1], @s[1], @t[1] - veor @t[1], @x[4], @x[5] - veor @s[0], @x[1], @x[0] - veor @t[3], @t[3], @s[1] - veor @t[2], @t[2], @s[1] - vand @s[1], @t[1], @s[0] - vorr @t[1], @t[1], @s[0] - veor @t[3], @t[3], @s[3] - veor @t[0], @t[0], @s[1] - veor @t[2], @t[2], @s[2] - veor @t[1], @t[1], @s[3] - veor @t[0], @t[0], @s[2] - vand @s[0], @x[7], @x[3] - veor @t[1], @t[1], @s[2] - vand @s[1], @x[6], @x[2] - vand @s[2], @x[5], @x[1] - vorr @s[3], @x[4], @x[0] - veor @t[3], @t[3], @s[0] - veor @t[1], @t[1], @s[2] - veor @t[0], @t[0], @s[3] - veor @t[2], @t[2], @s[1] - - @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 - - @ new smaller inversion - - vand @s[2], @t[3], @t[1] - vmov @s[0], @t[0] - - veor @s[1], @t[2], @s[2] - veor @s[3], @t[0], @s[2] - veor @s[2], @t[0], @s[2] @ @s[2]=@s[3] - - vbsl @s[1], @t[1], @t[0] - vbsl @s[3], @t[3], @t[2] - veor @t[3], @t[3], @t[2] - - vbsl @s[0], @s[1], @s[2] - vbsl @t[0], @s[2], @s[1] - - vand @s[2], @s[0], @s[3] - veor @t[1], @t[1], @t[0] - - veor @s[2], @s[2], @t[3] -___ -# output in s3, s2, s1, t1 - -# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 - -# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 - &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); - -### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb -} - -# AES linear components - -sub ShiftRows { -my @x=@_[0..7]; -my @t=@_[8..11]; -my $mask=pop; -$code.=<<___; - vldmia $key!, {@t[0]-@t[3]} - veor @t[0], @t[0], @x[0] - veor @t[1], @t[1], @x[1] - vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)` - vldmia $key!, {@t[0]} - veor @t[2], @t[2], @x[2] - vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)` - vldmia $key!, {@t[1]} - veor @t[3], @t[3], @x[3] - vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)` - vldmia $key!, {@t[2]} - vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)` - vldmia $key!, {@t[3]} - veor @t[0], @t[0], @x[4] - veor @t[1], @t[1], @x[5] - vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)` - veor @t[2], @t[2], @x[6] - vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)` - veor @t[3], @t[3], @x[7] - vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)` - vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)` -___ -} - -sub MixColumns { -# modified to emit output in order suitable for feeding back to aesenc[last] -my @x=@_[0..7]; -my @t=@_[8..15]; -my $inv=@_[16]; # optional -$code.=<<___; - vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32 - vext.8 @t[1], @x[1], @x[1], #12 - veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32) - vext.8 @t[2], @x[2], @x[2], #12 - veor @x[1], @x[1], @t[1] - vext.8 @t[3], @x[3], @x[3], #12 - veor @x[2], @x[2], @t[2] - vext.8 @t[4], @x[4], @x[4], #12 - veor @x[3], @x[3], @t[3] - vext.8 @t[5], @x[5], @x[5], #12 - veor @x[4], @x[4], @t[4] - vext.8 @t[6], @x[6], @x[6], #12 - veor @x[5], @x[5], @t[5] - vext.8 @t[7], @x[7], @x[7], #12 - veor @x[6], @x[6], @t[6] - - veor @t[1], @t[1], @x[0] - veor @x[7], @x[7], @t[7] - vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64) - veor @t[2], @t[2], @x[1] - veor @t[0], @t[0], @x[7] - veor @t[1], @t[1], @x[7] - vext.8 @x[1], @x[1], @x[1], #8 - veor @t[5], @t[5], @x[4] - veor @x[0], @x[0], @t[0] - veor @t[6], @t[6], @x[5] - veor @x[1], @x[1], @t[1] - vext.8 @t[0], @x[4], @x[4], #8 - veor @t[4], @t[4], @x[3] - vext.8 @t[1], @x[5], @x[5], #8 - veor @t[7], @t[7], @x[6] - vext.8 @x[4], @x[3], @x[3], #8 - veor @t[3], @t[3], @x[2] - vext.8 @x[5], @x[7], @x[7], #8 - veor @t[4], @t[4], @x[7] - vext.8 @x[3], @x[6], @x[6], #8 - veor @t[3], @t[3], @x[7] - vext.8 @x[6], @x[2], @x[2], #8 - veor @x[7], @t[1], @t[5] -___ -$code.=<<___ if (!$inv); - veor @x[2], @t[0], @t[4] - veor @x[4], @x[4], @t[3] - veor @x[5], @x[5], @t[7] - veor @x[3], @x[3], @t[6] - @ vmov @x[2], @t[0] - veor @x[6], @x[6], @t[2] - @ vmov @x[7], @t[1] -___ -$code.=<<___ if ($inv); - veor @t[3], @t[3], @x[4] - veor @x[5], @x[5], @t[7] - veor @x[2], @x[3], @t[6] - veor @x[3], @t[0], @t[4] - veor @x[4], @x[6], @t[2] - vmov @x[6], @t[3] - @ vmov @x[7], @t[1] -___ -} - -sub InvMixColumns_orig { -my @x=@_[0..7]; -my @t=@_[8..15]; - -$code.=<<___; - @ multiplication by 0x0e - vext.8 @t[7], @x[7], @x[7], #12 - vmov @t[2], @x[2] - veor @x[2], @x[2], @x[5] @ 2 5 - veor @x[7], @x[7], @x[5] @ 7 5 - vext.8 @t[0], @x[0], @x[0], #12 - vmov @t[5], @x[5] - veor @x[5], @x[5], @x[0] @ 5 0 [1] - veor @x[0], @x[0], @x[1] @ 0 1 - vext.8 @t[1], @x[1], @x[1], #12 - veor @x[1], @x[1], @x[2] @ 1 25 - veor @x[0], @x[0], @x[6] @ 01 6 [2] - vext.8 @t[3], @x[3], @x[3], #12 - veor @x[1], @x[1], @x[3] @ 125 3 [4] - veor @x[2], @x[2], @x[0] @ 25 016 [3] - veor @x[3], @x[3], @x[7] @ 3 75 - veor @x[7], @x[7], @x[6] @ 75 6 [0] - vext.8 @t[6], @x[6], @x[6], #12 - vmov @t[4], @x[4] - veor @x[6], @x[6], @x[4] @ 6 4 - veor @x[4], @x[4], @x[3] @ 4 375 [6] - veor @x[3], @x[3], @x[7] @ 375 756=36 - veor @x[6], @x[6], @t[5] @ 64 5 [7] - veor @x[3], @x[3], @t[2] @ 36 2 - vext.8 @t[5], @t[5], @t[5], #12 - veor @x[3], @x[3], @t[4] @ 362 4 [5] -___ - my @y = @x[7,5,0,2,1,3,4,6]; -$code.=<<___; - @ multiplication by 0x0b - veor @y[1], @y[1], @y[0] - veor @y[0], @y[0], @t[0] - vext.8 @t[2], @t[2], @t[2], #12 - veor @y[1], @y[1], @t[1] - veor @y[0], @y[0], @t[5] - vext.8 @t[4], @t[4], @t[4], #12 - veor @y[1], @y[1], @t[6] - veor @y[0], @y[0], @t[7] - veor @t[7], @t[7], @t[6] @ clobber t[7] - - veor @y[3], @y[3], @t[0] - veor @y[1], @y[1], @y[0] - vext.8 @t[0], @t[0], @t[0], #12 - veor @y[2], @y[2], @t[1] - veor @y[4], @y[4], @t[1] - vext.8 @t[1], @t[1], @t[1], #12 - veor @y[2], @y[2], @t[2] - veor @y[3], @y[3], @t[2] - veor @y[5], @y[5], @t[2] - veor @y[2], @y[2], @t[7] - vext.8 @t[2], @t[2], @t[2], #12 - veor @y[3], @y[3], @t[3] - veor @y[6], @y[6], @t[3] - veor @y[4], @y[4], @t[3] - veor @y[7], @y[7], @t[4] - vext.8 @t[3], @t[3], @t[3], #12 - veor @y[5], @y[5], @t[4] - veor @y[7], @y[7], @t[7] - veor @t[7], @t[7], @t[5] @ clobber t[7] even more - veor @y[3], @y[3], @t[5] - veor @y[4], @y[4], @t[4] - - veor @y[5], @y[5], @t[7] - vext.8 @t[4], @t[4], @t[4], #12 - veor @y[6], @y[6], @t[7] - veor @y[4], @y[4], @t[7] - - veor @t[7], @t[7], @t[5] - vext.8 @t[5], @t[5], @t[5], #12 - - @ multiplication by 0x0d - veor @y[4], @y[4], @y[7] - veor @t[7], @t[7], @t[6] @ restore t[7] - veor @y[7], @y[7], @t[4] - vext.8 @t[6], @t[6], @t[6], #12 - veor @y[2], @y[2], @t[0] - veor @y[7], @y[7], @t[5] - vext.8 @t[7], @t[7], @t[7], #12 - veor @y[2], @y[2], @t[2] - - veor @y[3], @y[3], @y[1] - veor @y[1], @y[1], @t[1] - veor @y[0], @y[0], @t[0] - veor @y[3], @y[3], @t[0] - veor @y[1], @y[1], @t[5] - veor @y[0], @y[0], @t[5] - vext.8 @t[0], @t[0], @t[0], #12 - veor @y[1], @y[1], @t[7] - veor @y[0], @y[0], @t[6] - veor @y[3], @y[3], @y[1] - veor @y[4], @y[4], @t[1] - vext.8 @t[1], @t[1], @t[1], #12 - - veor @y[7], @y[7], @t[7] - veor @y[4], @y[4], @t[2] - veor @y[5], @y[5], @t[2] - veor @y[2], @y[2], @t[6] - veor @t[6], @t[6], @t[3] @ clobber t[6] - vext.8 @t[2], @t[2], @t[2], #12 - veor @y[4], @y[4], @y[7] - veor @y[3], @y[3], @t[6] - - veor @y[6], @y[6], @t[6] - veor @y[5], @y[5], @t[5] - vext.8 @t[5], @t[5], @t[5], #12 - veor @y[6], @y[6], @t[4] - vext.8 @t[4], @t[4], @t[4], #12 - veor @y[5], @y[5], @t[6] - veor @y[6], @y[6], @t[7] - vext.8 @t[7], @t[7], @t[7], #12 - veor @t[6], @t[6], @t[3] @ restore t[6] - vext.8 @t[3], @t[3], @t[3], #12 - - @ multiplication by 0x09 - veor @y[4], @y[4], @y[1] - veor @t[1], @t[1], @y[1] @ t[1]=y[1] - veor @t[0], @t[0], @t[5] @ clobber t[0] - vext.8 @t[6], @t[6], @t[6], #12 - veor @t[1], @t[1], @t[5] - veor @y[3], @y[3], @t[0] - veor @t[0], @t[0], @y[0] @ t[0]=y[0] - veor @t[1], @t[1], @t[6] - veor @t[6], @t[6], @t[7] @ clobber t[6] - veor @y[4], @y[4], @t[1] - veor @y[7], @y[7], @t[4] - veor @y[6], @y[6], @t[3] - veor @y[5], @y[5], @t[2] - veor @t[4], @t[4], @y[4] @ t[4]=y[4] - veor @t[3], @t[3], @y[3] @ t[3]=y[3] - veor @t[5], @t[5], @y[5] @ t[5]=y[5] - veor @t[2], @t[2], @y[2] @ t[2]=y[2] - veor @t[3], @t[3], @t[7] - veor @XMM[5], @t[5], @t[6] - veor @XMM[6], @t[6], @y[6] @ t[6]=y[6] - veor @XMM[2], @t[2], @t[6] - veor @XMM[7], @t[7], @y[7] @ t[7]=y[7] - - vmov @XMM[0], @t[0] - vmov @XMM[1], @t[1] - @ vmov @XMM[2], @t[2] - vmov @XMM[3], @t[3] - vmov @XMM[4], @t[4] - @ vmov @XMM[5], @t[5] - @ vmov @XMM[6], @t[6] - @ vmov @XMM[7], @t[7] -___ -} - -sub InvMixColumns { -my @x=@_[0..7]; -my @t=@_[8..15]; - -# Thanks to Jussi Kivilinna for providing pointer to -# -# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | -# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | -# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | -# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | - -$code.=<<___; - @ multiplication by 0x05-0x00-0x04-0x00 - vext.8 @t[0], @x[0], @x[0], #8 - vext.8 @t[6], @x[6], @x[6], #8 - vext.8 @t[7], @x[7], @x[7], #8 - veor @t[0], @t[0], @x[0] - vext.8 @t[1], @x[1], @x[1], #8 - veor @t[6], @t[6], @x[6] - vext.8 @t[2], @x[2], @x[2], #8 - veor @t[7], @t[7], @x[7] - vext.8 @t[3], @x[3], @x[3], #8 - veor @t[1], @t[1], @x[1] - vext.8 @t[4], @x[4], @x[4], #8 - veor @t[2], @t[2], @x[2] - vext.8 @t[5], @x[5], @x[5], #8 - veor @t[3], @t[3], @x[3] - veor @t[4], @t[4], @x[4] - veor @t[5], @t[5], @x[5] - - veor @x[0], @x[0], @t[6] - veor @x[1], @x[1], @t[6] - veor @x[2], @x[2], @t[0] - veor @x[4], @x[4], @t[2] - veor @x[3], @x[3], @t[1] - veor @x[1], @x[1], @t[7] - veor @x[2], @x[2], @t[7] - veor @x[4], @x[4], @t[6] - veor @x[5], @x[5], @t[3] - veor @x[3], @x[3], @t[6] - veor @x[6], @x[6], @t[4] - veor @x[4], @x[4], @t[7] - veor @x[5], @x[5], @t[7] - veor @x[7], @x[7], @t[5] -___ - &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 -} - -sub swapmove { -my ($a,$b,$n,$mask,$t)=@_; -$code.=<<___; - vshr.u64 $t, $b, #$n - veor $t, $t, $a - vand $t, $t, $mask - veor $a, $a, $t - vshl.u64 $t, $t, #$n - veor $b, $b, $t -___ -} -sub swapmove2x { -my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; -$code.=<<___; - vshr.u64 $t0, $b0, #$n - vshr.u64 $t1, $b1, #$n - veor $t0, $t0, $a0 - veor $t1, $t1, $a1 - vand $t0, $t0, $mask - vand $t1, $t1, $mask - veor $a0, $a0, $t0 - vshl.u64 $t0, $t0, #$n - veor $a1, $a1, $t1 - vshl.u64 $t1, $t1, #$n - veor $b0, $b0, $t0 - veor $b1, $b1, $t1 -___ -} - -sub bitslice { -my @x=reverse(@_[0..7]); -my ($t0,$t1,$t2,$t3)=@_[8..11]; -$code.=<<___; - vmov.i8 $t0,#0x55 @ compose .LBS0 - vmov.i8 $t1,#0x33 @ compose .LBS1 -___ - &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); - &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); -$code.=<<___; - vmov.i8 $t0,#0x0f @ compose .LBS2 -___ - &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); - &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); - - &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); - &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); -} - -$code.=<<___; -#ifndef __KERNEL__ -# include "arm_arch.h" - -# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} -# define VFP_ABI_POP vldmia sp!,{d8-d15} -# define VFP_ABI_FRAME 0x40 -#else -# define VFP_ABI_PUSH -# define VFP_ABI_POP -# define VFP_ABI_FRAME 0 -# define BSAES_ASM_EXTENDED_KEY -# define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -# define __ARM_MAX_ARCH__ 7 -#endif - -#ifdef __thumb__ -# define adrl adr -#endif - -#if __ARM_MAX_ARCH__>=7 -.arch armv7-a -.fpu neon - -.text -.syntax unified @ ARMv7-capable assembler is expected to handle this -#ifdef __thumb2__ -.thumb -#else -.code 32 -#endif - -.type _bsaes_decrypt8,%function -.align 4 -_bsaes_decrypt8: - adr $const,_bsaes_decrypt8 - vldmia $key!, {@XMM[9]} @ round 0 key - add $const,$const,#.LM0ISR-_bsaes_decrypt8 - - vldmia $const!, {@XMM[8]} @ .LM0ISR - veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key - veor @XMM[11], @XMM[1], @XMM[9] - vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` - veor @XMM[12], @XMM[2], @XMM[9] - vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` - veor @XMM[13], @XMM[3], @XMM[9] - vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` - veor @XMM[14], @XMM[4], @XMM[9] - vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` - veor @XMM[15], @XMM[5], @XMM[9] - vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` - veor @XMM[10], @XMM[6], @XMM[9] - vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` - veor @XMM[11], @XMM[7], @XMM[9] - vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` - vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` -___ - &bitslice (@XMM[0..7, 8..11]); -$code.=<<___; - sub $rounds,$rounds,#1 - b .Ldec_sbox -.align 4 -.Ldec_loop: -___ - &ShiftRows (@XMM[0..7, 8..12]); -$code.=".Ldec_sbox:\n"; - &InvSbox (@XMM[0..7, 8..15]); -$code.=<<___; - subs $rounds,$rounds,#1 - bcc .Ldec_done -___ - &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); -$code.=<<___; - vldmia $const, {@XMM[12]} @ .LISR - ite eq @ Thumb2 thing, sanity check in ARM - addeq $const,$const,#0x10 - bne .Ldec_loop - vldmia $const, {@XMM[12]} @ .LISRM0 - b .Ldec_loop -.align 4 -.Ldec_done: -___ - &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); -$code.=<<___; - vldmia $key, {@XMM[8]} @ last round key - veor @XMM[6], @XMM[6], @XMM[8] - veor @XMM[4], @XMM[4], @XMM[8] - veor @XMM[2], @XMM[2], @XMM[8] - veor @XMM[7], @XMM[7], @XMM[8] - veor @XMM[3], @XMM[3], @XMM[8] - veor @XMM[5], @XMM[5], @XMM[8] - veor @XMM[0], @XMM[0], @XMM[8] - veor @XMM[1], @XMM[1], @XMM[8] - bx lr -.size _bsaes_decrypt8,.-_bsaes_decrypt8 - -.type _bsaes_const,%object -.align 6 -_bsaes_const: -.LM0ISR: @ InvShiftRows constants - .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 -.LISR: - .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 -.LISRM0: - .quad 0x01040b0e0205080f, 0x0306090c00070a0d -.LM0SR: @ ShiftRows constants - .quad 0x0a0e02060f03070b, 0x0004080c05090d01 -.LSR: - .quad 0x0504070600030201, 0x0f0e0d0c0a09080b -.LSRM0: - .quad 0x0304090e00050a0f, 0x01060b0c0207080d -.LM0: - .quad 0x02060a0e03070b0f, 0x0004080c0105090d -.LREVM0SR: - .quad 0x090d01050c000408, 0x03070b0f060a0e02 -.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by " -.align 6 -.size _bsaes_const,.-_bsaes_const - -.type _bsaes_encrypt8,%function -.align 4 -_bsaes_encrypt8: - adr $const,_bsaes_encrypt8 - vldmia $key!, {@XMM[9]} @ round 0 key - sub $const,$const,#_bsaes_encrypt8-.LM0SR - - vldmia $const!, {@XMM[8]} @ .LM0SR -_bsaes_encrypt8_alt: - veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key - veor @XMM[11], @XMM[1], @XMM[9] - vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` - veor @XMM[12], @XMM[2], @XMM[9] - vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` - veor @XMM[13], @XMM[3], @XMM[9] - vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` - veor @XMM[14], @XMM[4], @XMM[9] - vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` - veor @XMM[15], @XMM[5], @XMM[9] - vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` - veor @XMM[10], @XMM[6], @XMM[9] - vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` - veor @XMM[11], @XMM[7], @XMM[9] - vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` - vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` -_bsaes_encrypt8_bitslice: -___ - &bitslice (@XMM[0..7, 8..11]); -$code.=<<___; - sub $rounds,$rounds,#1 - b .Lenc_sbox -.align 4 -.Lenc_loop: -___ - &ShiftRows (@XMM[0..7, 8..12]); -$code.=".Lenc_sbox:\n"; - &Sbox (@XMM[0..7, 8..15]); -$code.=<<___; - subs $rounds,$rounds,#1 - bcc .Lenc_done -___ - &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); -$code.=<<___; - vldmia $const, {@XMM[12]} @ .LSR - ite eq @ Thumb2 thing, samity check in ARM - addeq $const,$const,#0x10 - bne .Lenc_loop - vldmia $const, {@XMM[12]} @ .LSRM0 - b .Lenc_loop -.align 4 -.Lenc_done: -___ - # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb - &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); -$code.=<<___; - vldmia $key, {@XMM[8]} @ last round key - veor @XMM[4], @XMM[4], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[8] - veor @XMM[3], @XMM[3], @XMM[8] - veor @XMM[7], @XMM[7], @XMM[8] - veor @XMM[2], @XMM[2], @XMM[8] - veor @XMM[5], @XMM[5], @XMM[8] - veor @XMM[0], @XMM[0], @XMM[8] - veor @XMM[1], @XMM[1], @XMM[8] - bx lr -.size _bsaes_encrypt8,.-_bsaes_encrypt8 -___ -} -{ -my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6"); - -sub bitslice_key { -my @x=reverse(@_[0..7]); -my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; - - &swapmove (@x[0,1],1,$bs0,$t2,$t3); -$code.=<<___; - @ &swapmove(@x[2,3],1,$t0,$t2,$t3); - vmov @x[2], @x[0] - vmov @x[3], @x[1] -___ - #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); - - &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); -$code.=<<___; - @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); - vmov @x[4], @x[0] - vmov @x[6], @x[2] - vmov @x[5], @x[1] - vmov @x[7], @x[3] -___ - &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); - &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); -} - -$code.=<<___; -.type _bsaes_key_convert,%function -.align 4 -_bsaes_key_convert: - adr $const,_bsaes_key_convert - vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key - sub $const,$const,#_bsaes_key_convert-.LM0 - vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key - - vmov.i8 @XMM[8], #0x01 @ bit masks - vmov.i8 @XMM[9], #0x02 - vmov.i8 @XMM[10], #0x04 - vmov.i8 @XMM[11], #0x08 - vmov.i8 @XMM[12], #0x10 - vmov.i8 @XMM[13], #0x20 - vldmia $const, {@XMM[14]} @ .LM0 - -#ifdef __ARMEL__ - vrev32.8 @XMM[7], @XMM[7] - vrev32.8 @XMM[15], @XMM[15] -#endif - sub $rounds,$rounds,#1 - vstmia $out!, {@XMM[7]} @ save round 0 key - b .Lkey_loop - -.align 4 -.Lkey_loop: - vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])` - vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])` - vmov.i8 @XMM[6], #0x40 - vmov.i8 @XMM[15], #0x80 - - vtst.8 @XMM[0], @XMM[7], @XMM[8] - vtst.8 @XMM[1], @XMM[7], @XMM[9] - vtst.8 @XMM[2], @XMM[7], @XMM[10] - vtst.8 @XMM[3], @XMM[7], @XMM[11] - vtst.8 @XMM[4], @XMM[7], @XMM[12] - vtst.8 @XMM[5], @XMM[7], @XMM[13] - vtst.8 @XMM[6], @XMM[7], @XMM[6] - vtst.8 @XMM[7], @XMM[7], @XMM[15] - vld1.8 {@XMM[15]}, [$inp]! @ load next round key - vmvn @XMM[0], @XMM[0] @ "pnot" - vmvn @XMM[1], @XMM[1] - vmvn @XMM[5], @XMM[5] - vmvn @XMM[6], @XMM[6] -#ifdef __ARMEL__ - vrev32.8 @XMM[15], @XMM[15] -#endif - subs $rounds,$rounds,#1 - vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key - bne .Lkey_loop - - vmov.i8 @XMM[7],#0x63 @ compose .L63 - @ don't save last round key - bx lr -.size _bsaes_key_convert,.-_bsaes_key_convert -___ -} - -if (0) { # following four functions are unsupported interface - # used for benchmarking... -$code.=<<___; -.globl bsaes_enc_key_convert -.type bsaes_enc_key_convert,%function -.align 4 -bsaes_enc_key_convert: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - - ldr r5,[$inp,#240] @ pass rounds - mov r4,$inp @ pass key - mov r12,$out @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_enc_key_convert,.-bsaes_enc_key_convert - -.globl bsaes_encrypt_128 -.type bsaes_encrypt_128,%function -.align 4 -bsaes_encrypt_128: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so -.Lenc128_loop: - vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input - vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! - mov r4,$key @ pass the key - vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! - mov r5,#10 @ pass rounds - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - - bl _bsaes_encrypt8 - - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - subs $len,$len,#0x80 - vst1.8 {@XMM[5]}, [$out]! - bhi .Lenc128_loop - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_encrypt_128,.-bsaes_encrypt_128 - -.globl bsaes_dec_key_convert -.type bsaes_dec_key_convert,%function -.align 4 -bsaes_dec_key_convert: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - - ldr r5,[$inp,#240] @ pass rounds - mov r4,$inp @ pass key - mov r12,$out @ pass key schedule - bl _bsaes_key_convert - vldmia $out, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia $out, {@XMM[7]} - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_dec_key_convert,.-bsaes_dec_key_convert - -.globl bsaes_decrypt_128 -.type bsaes_decrypt_128,%function -.align 4 -bsaes_decrypt_128: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so -.Ldec128_loop: - vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input - vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! - mov r4,$key @ pass the key - vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! - mov r5,#10 @ pass rounds - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - - bl _bsaes_decrypt8 - - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - subs $len,$len,#0x80 - vst1.8 {@XMM[5]}, [$out]! - bhi .Ldec128_loop - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_decrypt_128,.-bsaes_decrypt_128 -___ -} -{ -my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10)); -my ($keysched)=("sp"); - -$code.=<<___; -.extern AES_cbc_encrypt -.extern AES_decrypt - -.global bsaes_cbc_encrypt -.type bsaes_cbc_encrypt,%function -.align 5 -bsaes_cbc_encrypt: -#ifndef __KERNEL__ - cmp $len, #128 -#ifndef __thumb__ - blo AES_cbc_encrypt -#else - bhs 1f - b AES_cbc_encrypt -1: -#endif -#endif - - @ it is up to the caller to make sure we are called with enc == 0 - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr $ivp, [ip] @ IV is 1st arg on the stack - mov $len, $len, lsr#4 @ len in 16 byte blocks - sub sp, #0x10 @ scratch space to carry over the IV - mov $fp, sp @ save sp - - ldr $rounds, [$key, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - add r12, #`128-32` @ sifze of bit-slices key schedule - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 @ sp is $keysched - bl _bsaes_key_convert - vldmia $keysched, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia $keysched, {@XMM[7]} -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, $key, #248 - vldmia r4, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia r4, {@XMM[7]} - -.align 2 -0: -#endif - - vld1.8 {@XMM[15]}, [$ivp] @ load IV - b .Lcbc_dec_loop - -.align 4 -.Lcbc_dec_loop: - subs $len, $len, #0x8 - bmi .Lcbc_dec_loop_finish - - vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input - vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, $keysched @ pass the key -#else - add r4, $key, #248 -#endif - vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! - mov r5, $rounds - vld1.8 {@XMM[6]-@XMM[7]}, [$inp] - sub $inp, $inp, #0x60 - vstmia $fp, {@XMM[15]} @ put aside IV - - bl _bsaes_decrypt8 - - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - veor @XMM[2], @XMM[2], @XMM[11] - vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! - veor @XMM[7], @XMM[7], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[3], @XMM[3], @XMM[13] - vst1.8 {@XMM[6]}, [$out]! - veor @XMM[5], @XMM[5], @XMM[14] - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - vst1.8 {@XMM[5]}, [$out]! - - b .Lcbc_dec_loop - -.Lcbc_dec_loop_finish: - adds $len, $len, #8 - beq .Lcbc_dec_done - - vld1.8 {@XMM[0]}, [$inp]! @ load input - cmp $len, #2 - blo .Lcbc_dec_one - vld1.8 {@XMM[1]}, [$inp]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, $keysched @ pass the key -#else - add r4, $key, #248 -#endif - mov r5, $rounds - vstmia $fp, {@XMM[15]} @ put aside IV - beq .Lcbc_dec_two - vld1.8 {@XMM[2]}, [$inp]! - cmp $len, #4 - blo .Lcbc_dec_three - vld1.8 {@XMM[3]}, [$inp]! - beq .Lcbc_dec_four - vld1.8 {@XMM[4]}, [$inp]! - cmp $len, #6 - blo .Lcbc_dec_five - vld1.8 {@XMM[5]}, [$inp]! - beq .Lcbc_dec_six - vld1.8 {@XMM[6]}, [$inp]! - sub $inp, $inp, #0x70 - - bl _bsaes_decrypt8 - - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - veor @XMM[2], @XMM[2], @XMM[11] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[7], @XMM[7], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[3], @XMM[3], @XMM[13] - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_six: - sub $inp, $inp, #0x60 - bl _bsaes_decrypt8 - vldmia $fp,{@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[12]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - veor @XMM[2], @XMM[2], @XMM[11] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[7], @XMM[7], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_five: - sub $inp, $inp, #0x50 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[2], @XMM[2], @XMM[11] - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_four: - sub $inp, $inp, #0x40 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_three: - sub $inp, $inp, #0x30 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_two: - sub $inp, $inp, #0x20 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[15]}, [$inp]! @ reload input - veor @XMM[1], @XMM[1], @XMM[8] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_one: - sub $inp, $inp, #0x10 - mov $rounds, $out @ save original out pointer - mov $out, $fp @ use the iv scratch space as out buffer - mov r2, $key - vmov @XMM[4],@XMM[15] @ just in case ensure that IV - vmov @XMM[5],@XMM[0] @ and input are preserved - bl AES_decrypt - vld1.8 {@XMM[0]}, [$fp,:64] @ load result - veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV - vmov @XMM[15], @XMM[5] @ @XMM[5] holds input - vst1.8 {@XMM[0]}, [$rounds] @ write output - -.Lcbc_dec_done: -#ifndef BSAES_ASM_EXTENDED_KEY - vmov.i32 q0, #0 - vmov.i32 q1, #0 -.Lcbc_dec_bzero: @ wipe key schedule [if any] - vstmia $keysched!, {q0-q1} - cmp $keysched, $fp - bne .Lcbc_dec_bzero -#endif - - mov sp, $fp - add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb - vst1.8 {@XMM[15]}, [$ivp] @ return IV - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} -.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt -___ -} -{ -my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10))); -my $const = "r6"; # shared with _bsaes_encrypt8_alt -my $keysched = "sp"; - -$code.=<<___; -.extern AES_encrypt -.global bsaes_ctr32_encrypt_blocks -.type bsaes_ctr32_encrypt_blocks,%function -.align 5 -bsaes_ctr32_encrypt_blocks: - cmp $len, #8 @ use plain AES for - blo .Lctr_enc_short @ small sizes - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr $ctr, [ip] @ ctr is 1st arg on the stack - sub sp, sp, #0x10 @ scratch space to carry over the ctr - mov $fp, sp @ save sp - - ldr $rounds, [$key, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - add r12, #`128-32` @ size of bit-sliced key schedule - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 @ sp is $keysched - bl _bsaes_key_convert - veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key - - vld1.8 {@XMM[0]}, [$ctr] @ load counter - add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr - vldmia $keysched, {@XMM[4]} @ load round0 key -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key - -.align 2 -0: add r12, $key, #248 - vld1.8 {@XMM[0]}, [$ctr] @ load counter - adrl $ctr, .LREVM0SR @ borrow $ctr - vldmia r12, {@XMM[4]} @ load round0 key - sub sp, #0x10 @ place for adjusted round0 key -#endif - - vmov.i32 @XMM[8],#1 @ compose 1<<96 - veor @XMM[9],@XMM[9],@XMM[9] - vrev32.8 @XMM[0],@XMM[0] - vext.8 @XMM[8],@XMM[9],@XMM[8],#4 - vrev32.8 @XMM[4],@XMM[4] - vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 - vstmia $keysched, {@XMM[4]} @ save adjusted round0 key - b .Lctr_enc_loop - -.align 4 -.Lctr_enc_loop: - vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96 - vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1 - vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2 - vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3 - vadd.u32 @XMM[4], @XMM[1], @XMM[10] - vadd.u32 @XMM[5], @XMM[2], @XMM[10] - vadd.u32 @XMM[6], @XMM[3], @XMM[10] - vadd.u32 @XMM[7], @XMM[4], @XMM[10] - vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter - - @ Borrow prologue from _bsaes_encrypt8 to use the opportunity - @ to flip byte order in 32-bit counter - - vldmia $keysched, {@XMM[9]} @ load round0 key -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, $keysched, #0x10 @ pass next round key -#else - add r4, $key, #`248+16` -#endif - vldmia $ctr, {@XMM[8]} @ .LREVM0SR - mov r5, $rounds @ pass rounds - vstmia $fp, {@XMM[10]} @ save next counter - sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants - - bl _bsaes_encrypt8_alt - - subs $len, $len, #8 - blo .Lctr_enc_loop_done - - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[0], @XMM[8] - veor @XMM[1], @XMM[9] - vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! - veor @XMM[4], @XMM[10] - veor @XMM[6], @XMM[11] - vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! - veor @XMM[3], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[7], @XMM[13] - veor @XMM[2], @XMM[14] - vst1.8 {@XMM[4]}, [$out]! - veor @XMM[5], @XMM[15] - vst1.8 {@XMM[6]}, [$out]! - vmov.i32 @XMM[8], #1 @ compose 1<<96 - vst1.8 {@XMM[3]}, [$out]! - veor @XMM[9], @XMM[9], @XMM[9] - vst1.8 {@XMM[7]}, [$out]! - vext.8 @XMM[8], @XMM[9], @XMM[8], #4 - vst1.8 {@XMM[2]}, [$out]! - vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 - vst1.8 {@XMM[5]}, [$out]! - vldmia $fp, {@XMM[0]} @ load counter - - bne .Lctr_enc_loop - b .Lctr_enc_done - -.align 4 -.Lctr_enc_loop_done: - add $len, $len, #8 - vld1.8 {@XMM[8]}, [$inp]! @ load input - veor @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [$out]! @ write output - cmp $len, #2 - blo .Lctr_enc_done - vld1.8 {@XMM[9]}, [$inp]! - veor @XMM[1], @XMM[9] - vst1.8 {@XMM[1]}, [$out]! - beq .Lctr_enc_done - vld1.8 {@XMM[10]}, [$inp]! - veor @XMM[4], @XMM[10] - vst1.8 {@XMM[4]}, [$out]! - cmp $len, #4 - blo .Lctr_enc_done - vld1.8 {@XMM[11]}, [$inp]! - veor @XMM[6], @XMM[11] - vst1.8 {@XMM[6]}, [$out]! - beq .Lctr_enc_done - vld1.8 {@XMM[12]}, [$inp]! - veor @XMM[3], @XMM[12] - vst1.8 {@XMM[3]}, [$out]! - cmp $len, #6 - blo .Lctr_enc_done - vld1.8 {@XMM[13]}, [$inp]! - veor @XMM[7], @XMM[13] - vst1.8 {@XMM[7]}, [$out]! - beq .Lctr_enc_done - vld1.8 {@XMM[14]}, [$inp] - veor @XMM[2], @XMM[14] - vst1.8 {@XMM[2]}, [$out]! - -.Lctr_enc_done: - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifndef BSAES_ASM_EXTENDED_KEY -.Lctr_enc_bzero: @ wipe key schedule [if any] - vstmia $keysched!, {q0-q1} - cmp $keysched, $fp - bne .Lctr_enc_bzero -#else - vstmia $keysched, {q0-q1} -#endif - - mov sp, $fp - add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.align 4 -.Lctr_enc_short: - ldr ip, [sp] @ ctr pointer is passed on stack - stmdb sp!, {r4-r8, lr} - - mov r4, $inp @ copy arguments - mov r5, $out - mov r6, $len - mov r7, $key - ldr r8, [ip, #12] @ load counter LSW - vld1.8 {@XMM[1]}, [ip] @ load whole counter value -#ifdef __ARMEL__ - rev r8, r8 -#endif - sub sp, sp, #0x10 - vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value - sub sp, sp, #0x10 - -.Lctr_enc_short_loop: - add r0, sp, #0x10 @ input counter value - mov r1, sp @ output on the stack - mov r2, r7 @ key - - bl AES_encrypt - - vld1.8 {@XMM[0]}, [r4]! @ load input - vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter - add r8, r8, #1 -#ifdef __ARMEL__ - rev r0, r8 - str r0, [sp, #0x1c] @ next counter value -#else - str r8, [sp, #0x1c] @ next counter value -#endif - veor @XMM[0],@XMM[0],@XMM[1] - vst1.8 {@XMM[0]}, [r5]! @ store output - subs r6, r6, #1 - bne .Lctr_enc_short_loop - - vmov.i32 q0, #0 - vmov.i32 q1, #0 - vstmia sp!, {q0-q1} - - ldmia sp!, {r4-r8, pc} -.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks -___ -} -{ -###################################################################### -# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, -# const AES_KEY *key1, const AES_KEY *key2, -# const unsigned char iv[16]); -# -my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3))); -my $const="r6"; # returned by _bsaes_key_convert -my $twmask=@XMM[5]; -my @T=@XMM[6..7]; - -$code.=<<___; -.globl bsaes_xts_encrypt -.type bsaes_xts_encrypt,%function -.align 4 -bsaes_xts_encrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future $fp - - mov $inp, r0 - mov $out, r1 - mov $len, r2 - mov $key, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0,sp @ pointer to initial tweak -#endif - - ldr $rounds, [$key, #240] @ get # of rounds - mov $fp, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - @ add r12, #`128-32` @ size of bit-sliced key schedule - sub r12, #`32+16` @ place for tweak[9] - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - - vld1.8 {@XMM[8]}, [r0] @ initial tweak - adr $magic, .Lxts_magic - - subs $len, #0x80 - blo .Lxts_enc_short - b .Lxts_enc_loop - -.align 4 -.Lxts_enc_loop: - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - vadd.u64 @XMM[8], @XMM[15], @XMM[15] - vst1.64 {@XMM[15]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - veor @XMM[8], @XMM[8], @T[0] - vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - veor @XMM[7], @XMM[7], @XMM[15] - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[2], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - veor @XMM[13], @XMM[5], @XMM[15] - vst1.8 {@XMM[12]-@XMM[13]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - subs $len, #0x80 - bpl .Lxts_enc_loop - -.Lxts_enc_short: - adds $len, #0x70 - bmi .Lxts_enc_done - - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! - subs $len, #0x10 - bmi .Lxts_enc_`$i-9` -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - sub $len, #0x10 - vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - vld1.64 {@XMM[14]}, [r0,:128]! - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[2], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - vst1.8 {@XMM[12]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_6: - vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak - - veor @XMM[4], @XMM[4], @XMM[12] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[5], @XMM[5], @XMM[13] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done - -@ put this in range for both ARM and Thumb mode adr instructions -.align 5 -.Lxts_magic: - .quad 1, 0x87 - -.align 5 -.Lxts_enc_5: - vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak - - veor @XMM[3], @XMM[3], @XMM[11] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[4], @XMM[4], @XMM[12] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - vst1.8 {@XMM[10]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_4: - vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak - - veor @XMM[2], @XMM[2], @XMM[10] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[3], @XMM[3], @XMM[11] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_3: - vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak - - veor @XMM[1], @XMM[1], @XMM[9] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[2], @XMM[2], @XMM[10] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - vld1.64 {@XMM[10]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - vst1.8 {@XMM[8]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_2: - vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak - - veor @XMM[0], @XMM[0], @XMM[8] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[1], @XMM[1], @XMM[9] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_1: - mov r0, sp - veor @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - - bl AES_encrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [$out]! - mov $fp, r4 - - vmov @XMM[8], @XMM[9] @ next round tweak - -.Lxts_enc_done: -#ifndef XTS_CHAIN_TWEAK - adds $len, #0x10 - beq .Lxts_enc_ret - sub r6, $out, #0x10 - -.Lxts_enc_steal: - ldrb r0, [$inp], #1 - ldrb r1, [$out, #-0x10] - strb r0, [$out, #-0x10] - strb r1, [$out], #1 - - subs $len, #1 - bhi .Lxts_enc_steal - - vld1.8 {@XMM[0]}, [r6] - mov r0, sp - veor @XMM[0], @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - - bl AES_encrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [r6] - mov $fp, r4 -#endif - -.Lxts_enc_ret: - bic r0, $fp, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_enc_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_enc_bzero - - mov sp, $fp -#ifdef XTS_CHAIN_TWEAK - vst1.8 {@XMM[8]}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_encrypt,.-bsaes_xts_encrypt - -.globl bsaes_xts_decrypt -.type bsaes_xts_decrypt,%function -.align 4 -bsaes_xts_decrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future $fp - - mov $inp, r0 - mov $out, r1 - mov $len, r2 - mov $key, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0, sp @ pointer to initial tweak -#endif - - ldr $rounds, [$key, #240] @ get # of rounds - mov $fp, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - @ add r12, #`128-32` @ size of bit-sliced key schedule - sub r12, #`32+16` @ place for tweak[9] - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - add r4, sp, #0x90 - vldmia r4, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia r4, {@XMM[7]} -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, $key, #248 - vldmia r4, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia r4, {@XMM[7]} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - vld1.8 {@XMM[8]}, [r0] @ initial tweak - adr $magic, .Lxts_magic - -#ifndef XTS_CHAIN_TWEAK - tst $len, #0xf @ if not multiple of 16 - it ne @ Thumb2 thing, sanity check in ARM - subne $len, #0x10 @ subtract another 16 bytes -#endif - subs $len, #0x80 - - blo .Lxts_dec_short - b .Lxts_dec_loop - -.align 4 -.Lxts_dec_loop: - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - vadd.u64 @XMM[8], @XMM[15], @XMM[15] - vst1.64 {@XMM[15]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - veor @XMM[8], @XMM[8], @T[0] - vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - veor @XMM[7], @XMM[7], @XMM[15] - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[3], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - veor @XMM[13], @XMM[5], @XMM[15] - vst1.8 {@XMM[12]-@XMM[13]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - subs $len, #0x80 - bpl .Lxts_dec_loop - -.Lxts_dec_short: - adds $len, #0x70 - bmi .Lxts_dec_done - - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! - subs $len, #0x10 - bmi .Lxts_dec_`$i-9` -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - sub $len, #0x10 - vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - vld1.64 {@XMM[14]}, [r0,:128]! - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[3], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - vst1.8 {@XMM[12]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_6: - vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak - - veor @XMM[4], @XMM[4], @XMM[12] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[5], @XMM[5], @XMM[13] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_5: - vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak - - veor @XMM[3], @XMM[3], @XMM[11] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[4], @XMM[4], @XMM[12] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - vst1.8 {@XMM[10]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_4: - vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak - - veor @XMM[2], @XMM[2], @XMM[10] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[3], @XMM[3], @XMM[11] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_3: - vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak - - veor @XMM[1], @XMM[1], @XMM[9] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[2], @XMM[2], @XMM[10] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - vld1.64 {@XMM[10]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - vst1.8 {@XMM[8]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_2: - vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak - - veor @XMM[0], @XMM[0], @XMM[8] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[1], @XMM[1], @XMM[9] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_1: - mov r0, sp - veor @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - mov r5, $magic @ preserve magic - - bl AES_decrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [$out]! - mov $fp, r4 - mov $magic, r5 - - vmov @XMM[8], @XMM[9] @ next round tweak - -.Lxts_dec_done: -#ifndef XTS_CHAIN_TWEAK - adds $len, #0x10 - beq .Lxts_dec_ret - - @ calculate one round of extra tweak for the stolen ciphertext - vldmia $magic, {$twmask} - vshr.s64 @XMM[6], @XMM[8], #63 - vand @XMM[6], @XMM[6], $twmask - vadd.u64 @XMM[9], @XMM[8], @XMM[8] - vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")` - veor @XMM[9], @XMM[9], @XMM[6] - - @ perform the final decryption with the last tweak value - vld1.8 {@XMM[0]}, [$inp]! - mov r0, sp - veor @XMM[0], @XMM[0], @XMM[9] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - - bl AES_decrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[9] - vst1.8 {@XMM[0]}, [$out] - - mov r6, $out -.Lxts_dec_steal: - ldrb r1, [$out] - ldrb r0, [$inp], #1 - strb r1, [$out, #0x10] - strb r0, [$out], #1 - - subs $len, #1 - bhi .Lxts_dec_steal - - vld1.8 {@XMM[0]}, [r6] - mov r0, sp - veor @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - - bl AES_decrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [r6] - mov $fp, r4 -#endif - -.Lxts_dec_ret: - bic r0, $fp, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_dec_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_dec_bzero - - mov sp, $fp -#ifdef XTS_CHAIN_TWEAK - vst1.8 {@XMM[8]}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_decrypt,.-bsaes_xts_decrypt -___ -} -$code.=<<___; -#endif -___ - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; - -open SELF,$0; -while() { - next if (/^#!/); - last if (!s/^#/@/ and !/^$/); - print; -} -close SELF; - -print $code; - -close STDOUT; From 21c8e72037fb163b87b101724438259bc7651975 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 12 Jan 2017 13:40:39 +0000 Subject: [PATCH 044/142] crypto: testmgr - use calculated count for number of test vectors When working on AES in CCM mode for ARM, my code passed the internal tcrypt test before I had even bothered to implement the AES-192 and AES-256 code paths, which is strange because the tcrypt does contain AES-192 and AES-256 test vectors for CCM. As it turned out, the define AES_CCM_ENC_TEST_VECTORS was out of sync with the actual number of test vectors, causing only the AES-128 ones to be executed. So get rid of the defines, and wrap the test vector references in a macro that calculates the number of vectors automatically. The following test vector counts were out of sync with the respective defines: BF_CTR_ENC_TEST_VECTORS 2 -> 3 BF_CTR_DEC_TEST_VECTORS 2 -> 3 TF_CTR_ENC_TEST_VECTORS 2 -> 3 TF_CTR_DEC_TEST_VECTORS 2 -> 3 SERPENT_CTR_ENC_TEST_VECTORS 2 -> 3 SERPENT_CTR_DEC_TEST_VECTORS 2 -> 3 AES_CCM_ENC_TEST_VECTORS 8 -> 14 AES_CCM_DEC_TEST_VECTORS 7 -> 17 AES_CCM_4309_ENC_TEST_VECTORS 7 -> 23 AES_CCM_4309_DEC_TEST_VECTORS 10 -> 23 CAMELLIA_CTR_ENC_TEST_VECTORS 2 -> 3 CAMELLIA_CTR_DEC_TEST_VECTORS 2 -> 3 Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/testmgr.c | 1033 +++++++++------------------------------------- crypto/testmgr.h | 272 +----------- 2 files changed, 204 insertions(+), 1101 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 500a5277cc22..98eb09782db8 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2250,30 +2250,23 @@ static int alg_test_null(const struct alg_test_desc *desc, return 0; } +#define __VECS(tv) { .vecs = tv, .count = ARRAY_SIZE(tv) } + /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { .alg = "ansi_cprng", .test = alg_test_cprng, .suite = { - .cprng = { - .vecs = ansi_cprng_aes_tv_template, - .count = ANSI_CPRNG_AES_TEST_VECTORS - } + .cprng = __VECS(ansi_cprng_aes_tv_template) } }, { .alg = "authenc(hmac(md5),ecb(cipher_null))", .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = hmac_md5_ecb_cipher_null_enc_tv_template, - .count = HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS - }, - .dec = { - .vecs = hmac_md5_ecb_cipher_null_dec_tv_template, - .count = HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS - } + .enc = __VECS(hmac_md5_ecb_cipher_null_enc_tv_template), + .dec = __VECS(hmac_md5_ecb_cipher_null_dec_tv_template) } } }, { @@ -2281,12 +2274,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_aes_cbc_enc_tv_temp, - .count = - HMAC_SHA1_AES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha1_aes_cbc_enc_tv_temp) } } }, { @@ -2294,12 +2282,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_des_cbc_enc_tv_temp, - .count = - HMAC_SHA1_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha1_des_cbc_enc_tv_temp) } } }, { @@ -2308,12 +2291,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA1_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha1_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2325,18 +2303,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_ecb_cipher_null_enc_tv_temp, - .count = - HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VEC - }, - .dec = { - .vecs = - hmac_sha1_ecb_cipher_null_dec_tv_temp, - .count = - HMAC_SHA1_ECB_CIPHER_NULL_DEC_TEST_VEC - } + .enc = __VECS(hmac_sha1_ecb_cipher_null_enc_tv_temp), + .dec = __VECS(hmac_sha1_ecb_cipher_null_dec_tv_temp) } } }, { @@ -2348,12 +2316,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha224_des_cbc_enc_tv_temp, - .count = - HMAC_SHA224_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha224_des_cbc_enc_tv_temp) } } }, { @@ -2362,12 +2325,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha224_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA224_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha224_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2376,12 +2334,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha256_aes_cbc_enc_tv_temp, - .count = - HMAC_SHA256_AES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha256_aes_cbc_enc_tv_temp) } } }, { @@ -2389,12 +2342,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha256_des_cbc_enc_tv_temp, - .count = - HMAC_SHA256_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha256_des_cbc_enc_tv_temp) } } }, { @@ -2403,12 +2351,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha256_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA256_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha256_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2424,12 +2367,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha384_des_cbc_enc_tv_temp, - .count = - HMAC_SHA384_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha384_des_cbc_enc_tv_temp) } } }, { @@ -2438,12 +2376,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha384_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA384_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha384_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2460,12 +2393,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha512_aes_cbc_enc_tv_temp, - .count = - HMAC_SHA512_AES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha512_aes_cbc_enc_tv_temp) } } }, { @@ -2473,12 +2401,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha512_des_cbc_enc_tv_temp, - .count = - HMAC_SHA512_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha512_des_cbc_enc_tv_temp) } } }, { @@ -2487,12 +2410,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha512_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA512_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha512_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2509,14 +2427,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_cbc_enc_tv_template, - .count = AES_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_cbc_dec_tv_template, - .count = AES_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(aes_cbc_enc_tv_template), + .dec = __VECS(aes_cbc_dec_tv_template) } } }, { @@ -2524,14 +2436,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = anubis_cbc_enc_tv_template, - .count = ANUBIS_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = anubis_cbc_dec_tv_template, - .count = ANUBIS_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(anubis_cbc_enc_tv_template), + .dec = __VECS(anubis_cbc_dec_tv_template) } } }, { @@ -2539,14 +2445,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = bf_cbc_enc_tv_template, - .count = BF_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = bf_cbc_dec_tv_template, - .count = BF_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(bf_cbc_enc_tv_template), + .dec = __VECS(bf_cbc_dec_tv_template) } } }, { @@ -2554,14 +2454,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_cbc_enc_tv_template, - .count = CAMELLIA_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_cbc_dec_tv_template, - .count = CAMELLIA_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_cbc_enc_tv_template), + .dec = __VECS(camellia_cbc_dec_tv_template) } } }, { @@ -2569,14 +2463,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast5_cbc_enc_tv_template, - .count = CAST5_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast5_cbc_dec_tv_template, - .count = CAST5_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(cast5_cbc_enc_tv_template), + .dec = __VECS(cast5_cbc_dec_tv_template) } } }, { @@ -2584,14 +2472,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_cbc_enc_tv_template, - .count = CAST6_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_cbc_dec_tv_template, - .count = CAST6_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_cbc_enc_tv_template), + .dec = __VECS(cast6_cbc_dec_tv_template) } } }, { @@ -2599,14 +2481,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des_cbc_enc_tv_template, - .count = DES_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des_cbc_dec_tv_template, - .count = DES_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(des_cbc_enc_tv_template), + .dec = __VECS(des_cbc_dec_tv_template) } } }, { @@ -2615,14 +2491,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = des3_ede_cbc_enc_tv_template, - .count = DES3_EDE_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des3_ede_cbc_dec_tv_template, - .count = DES3_EDE_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(des3_ede_cbc_enc_tv_template), + .dec = __VECS(des3_ede_cbc_dec_tv_template) } } }, { @@ -2630,14 +2500,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_cbc_enc_tv_template, - .count = SERPENT_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_cbc_dec_tv_template, - .count = SERPENT_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_cbc_enc_tv_template), + .dec = __VECS(serpent_cbc_dec_tv_template) } } }, { @@ -2645,14 +2509,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_cbc_enc_tv_template, - .count = TF_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_cbc_dec_tv_template, - .count = TF_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(tf_cbc_enc_tv_template), + .dec = __VECS(tf_cbc_dec_tv_template) } } }, { @@ -2661,14 +2519,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_ccm_enc_tv_template, - .count = AES_CCM_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ccm_dec_tv_template, - .count = AES_CCM_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ccm_enc_tv_template), + .dec = __VECS(aes_ccm_dec_tv_template) } } }, { @@ -2676,14 +2528,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = chacha20_enc_tv_template, - .count = CHACHA20_ENC_TEST_VECTORS - }, - .dec = { - .vecs = chacha20_enc_tv_template, - .count = CHACHA20_ENC_TEST_VECTORS - }, + .enc = __VECS(chacha20_enc_tv_template), + .dec = __VECS(chacha20_enc_tv_template), } } }, { @@ -2691,20 +2537,14 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .test = alg_test_hash, .suite = { - .hash = { - .vecs = aes_cmac128_tv_template, - .count = CMAC_AES_TEST_VECTORS - } + .hash = __VECS(aes_cmac128_tv_template) } }, { .alg = "cmac(des3_ede)", .fips_allowed = 1, .test = alg_test_hash, .suite = { - .hash = { - .vecs = des3_ede_cmac64_tv_template, - .count = CMAC_DES3_EDE_TEST_VECTORS - } + .hash = __VECS(des3_ede_cmac64_tv_template) } }, { .alg = "compress_null", @@ -2713,30 +2553,21 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "crc32", .test = alg_test_hash, .suite = { - .hash = { - .vecs = crc32_tv_template, - .count = CRC32_TEST_VECTORS - } + .hash = __VECS(crc32_tv_template) } }, { .alg = "crc32c", .test = alg_test_crc32c, .fips_allowed = 1, .suite = { - .hash = { - .vecs = crc32c_tv_template, - .count = CRC32C_TEST_VECTORS - } + .hash = __VECS(crc32c_tv_template) } }, { .alg = "crct10dif", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = crct10dif_tv_template, - .count = CRCT10DIF_TEST_VECTORS - } + .hash = __VECS(crct10dif_tv_template) } }, { .alg = "ctr(aes)", @@ -2744,14 +2575,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_ctr_enc_tv_template, - .count = AES_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ctr_dec_tv_template, - .count = AES_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ctr_enc_tv_template), + .dec = __VECS(aes_ctr_dec_tv_template) } } }, { @@ -2759,14 +2584,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = bf_ctr_enc_tv_template, - .count = BF_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = bf_ctr_dec_tv_template, - .count = BF_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(bf_ctr_enc_tv_template), + .dec = __VECS(bf_ctr_dec_tv_template) } } }, { @@ -2774,14 +2593,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_ctr_enc_tv_template, - .count = CAMELLIA_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_ctr_dec_tv_template, - .count = CAMELLIA_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_ctr_enc_tv_template), + .dec = __VECS(camellia_ctr_dec_tv_template) } } }, { @@ -2789,14 +2602,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast5_ctr_enc_tv_template, - .count = CAST5_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast5_ctr_dec_tv_template, - .count = CAST5_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(cast5_ctr_enc_tv_template), + .dec = __VECS(cast5_ctr_dec_tv_template) } } }, { @@ -2804,14 +2611,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_ctr_enc_tv_template, - .count = CAST6_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_ctr_dec_tv_template, - .count = CAST6_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_ctr_enc_tv_template), + .dec = __VECS(cast6_ctr_dec_tv_template) } } }, { @@ -2819,14 +2620,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des_ctr_enc_tv_template, - .count = DES_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des_ctr_dec_tv_template, - .count = DES_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(des_ctr_enc_tv_template), + .dec = __VECS(des_ctr_dec_tv_template) } } }, { @@ -2834,14 +2629,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des3_ede_ctr_enc_tv_template, - .count = DES3_EDE_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des3_ede_ctr_dec_tv_template, - .count = DES3_EDE_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(des3_ede_ctr_enc_tv_template), + .dec = __VECS(des3_ede_ctr_dec_tv_template) } } }, { @@ -2849,14 +2638,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_ctr_enc_tv_template, - .count = SERPENT_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_ctr_dec_tv_template, - .count = SERPENT_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_ctr_enc_tv_template), + .dec = __VECS(serpent_ctr_dec_tv_template) } } }, { @@ -2864,14 +2647,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_ctr_enc_tv_template, - .count = TF_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_ctr_dec_tv_template, - .count = TF_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(tf_ctr_enc_tv_template), + .dec = __VECS(tf_ctr_dec_tv_template) } } }, { @@ -2879,14 +2656,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cts_mode_enc_tv_template, - .count = CTS_MODE_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cts_mode_dec_tv_template, - .count = CTS_MODE_DEC_TEST_VECTORS - } + .enc = __VECS(cts_mode_enc_tv_template), + .dec = __VECS(cts_mode_dec_tv_template) } } }, { @@ -2895,14 +2666,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = deflate_comp_tv_template, - .count = DEFLATE_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = deflate_decomp_tv_template, - .count = DEFLATE_DECOMP_TEST_VECTORS - } + .comp = __VECS(deflate_comp_tv_template), + .decomp = __VECS(deflate_decomp_tv_template) } } }, { @@ -2910,10 +2675,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_kpp, .fips_allowed = 1, .suite = { - .kpp = { - .vecs = dh_tv_template, - .count = DH_TEST_VECTORS - } + .kpp = __VECS(dh_tv_template) } }, { .alg = "digest_null", @@ -2923,30 +2685,21 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_ctr_aes128_tv_template, - .count = ARRAY_SIZE(drbg_nopr_ctr_aes128_tv_template) - } + .drbg = __VECS(drbg_nopr_ctr_aes128_tv_template) } }, { .alg = "drbg_nopr_ctr_aes192", .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_ctr_aes192_tv_template, - .count = ARRAY_SIZE(drbg_nopr_ctr_aes192_tv_template) - } + .drbg = __VECS(drbg_nopr_ctr_aes192_tv_template) } }, { .alg = "drbg_nopr_ctr_aes256", .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_ctr_aes256_tv_template, - .count = ARRAY_SIZE(drbg_nopr_ctr_aes256_tv_template) - } + .drbg = __VECS(drbg_nopr_ctr_aes256_tv_template) } }, { /* @@ -2961,11 +2714,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_hmac_sha256_tv_template, - .count = - ARRAY_SIZE(drbg_nopr_hmac_sha256_tv_template) - } + .drbg = __VECS(drbg_nopr_hmac_sha256_tv_template) } }, { /* covered by drbg_nopr_hmac_sha256 test */ @@ -2985,10 +2734,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_sha256_tv_template, - .count = ARRAY_SIZE(drbg_nopr_sha256_tv_template) - } + .drbg = __VECS(drbg_nopr_sha256_tv_template) } }, { /* covered by drbg_nopr_sha256 test */ @@ -3004,10 +2750,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_pr_ctr_aes128_tv_template, - .count = ARRAY_SIZE(drbg_pr_ctr_aes128_tv_template) - } + .drbg = __VECS(drbg_pr_ctr_aes128_tv_template) } }, { /* covered by drbg_pr_ctr_aes128 test */ @@ -3027,10 +2770,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_pr_hmac_sha256_tv_template, - .count = ARRAY_SIZE(drbg_pr_hmac_sha256_tv_template) - } + .drbg = __VECS(drbg_pr_hmac_sha256_tv_template) } }, { /* covered by drbg_pr_hmac_sha256 test */ @@ -3050,10 +2790,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_pr_sha256_tv_template, - .count = ARRAY_SIZE(drbg_pr_sha256_tv_template) - } + .drbg = __VECS(drbg_pr_sha256_tv_template) } }, { /* covered by drbg_pr_sha256 test */ @@ -3070,14 +2807,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_enc_tv_template, - .count = AES_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_dec_tv_template, - .count = AES_DEC_TEST_VECTORS - } + .enc = __VECS(aes_enc_tv_template), + .dec = __VECS(aes_dec_tv_template) } } }, { @@ -3085,14 +2816,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = anubis_enc_tv_template, - .count = ANUBIS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = anubis_dec_tv_template, - .count = ANUBIS_DEC_TEST_VECTORS - } + .enc = __VECS(anubis_enc_tv_template), + .dec = __VECS(anubis_dec_tv_template) } } }, { @@ -3100,14 +2825,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = arc4_enc_tv_template, - .count = ARC4_ENC_TEST_VECTORS - }, - .dec = { - .vecs = arc4_dec_tv_template, - .count = ARC4_DEC_TEST_VECTORS - } + .enc = __VECS(arc4_enc_tv_template), + .dec = __VECS(arc4_dec_tv_template) } } }, { @@ -3115,14 +2834,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = bf_enc_tv_template, - .count = BF_ENC_TEST_VECTORS - }, - .dec = { - .vecs = bf_dec_tv_template, - .count = BF_DEC_TEST_VECTORS - } + .enc = __VECS(bf_enc_tv_template), + .dec = __VECS(bf_dec_tv_template) } } }, { @@ -3130,14 +2843,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_enc_tv_template, - .count = CAMELLIA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_dec_tv_template, - .count = CAMELLIA_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_enc_tv_template), + .dec = __VECS(camellia_dec_tv_template) } } }, { @@ -3145,14 +2852,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast5_enc_tv_template, - .count = CAST5_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast5_dec_tv_template, - .count = CAST5_DEC_TEST_VECTORS - } + .enc = __VECS(cast5_enc_tv_template), + .dec = __VECS(cast5_dec_tv_template) } } }, { @@ -3160,14 +2861,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_enc_tv_template, - .count = CAST6_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_dec_tv_template, - .count = CAST6_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_enc_tv_template), + .dec = __VECS(cast6_dec_tv_template) } } }, { @@ -3178,14 +2873,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des_enc_tv_template, - .count = DES_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des_dec_tv_template, - .count = DES_DEC_TEST_VECTORS - } + .enc = __VECS(des_enc_tv_template), + .dec = __VECS(des_dec_tv_template) } } }, { @@ -3194,14 +2883,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = des3_ede_enc_tv_template, - .count = DES3_EDE_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des3_ede_dec_tv_template, - .count = DES3_EDE_DEC_TEST_VECTORS - } + .enc = __VECS(des3_ede_enc_tv_template), + .dec = __VECS(des3_ede_dec_tv_template) } } }, { @@ -3224,14 +2907,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = khazad_enc_tv_template, - .count = KHAZAD_ENC_TEST_VECTORS - }, - .dec = { - .vecs = khazad_dec_tv_template, - .count = KHAZAD_DEC_TEST_VECTORS - } + .enc = __VECS(khazad_enc_tv_template), + .dec = __VECS(khazad_dec_tv_template) } } }, { @@ -3239,14 +2916,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = seed_enc_tv_template, - .count = SEED_ENC_TEST_VECTORS - }, - .dec = { - .vecs = seed_dec_tv_template, - .count = SEED_DEC_TEST_VECTORS - } + .enc = __VECS(seed_enc_tv_template), + .dec = __VECS(seed_dec_tv_template) } } }, { @@ -3254,14 +2925,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_enc_tv_template, - .count = SERPENT_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_dec_tv_template, - .count = SERPENT_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_enc_tv_template), + .dec = __VECS(serpent_dec_tv_template) } } }, { @@ -3269,14 +2934,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tea_enc_tv_template, - .count = TEA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tea_dec_tv_template, - .count = TEA_DEC_TEST_VECTORS - } + .enc = __VECS(tea_enc_tv_template), + .dec = __VECS(tea_dec_tv_template) } } }, { @@ -3284,14 +2943,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tnepres_enc_tv_template, - .count = TNEPRES_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tnepres_dec_tv_template, - .count = TNEPRES_DEC_TEST_VECTORS - } + .enc = __VECS(tnepres_enc_tv_template), + .dec = __VECS(tnepres_dec_tv_template) } } }, { @@ -3299,14 +2952,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_enc_tv_template, - .count = TF_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_dec_tv_template, - .count = TF_DEC_TEST_VECTORS - } + .enc = __VECS(tf_enc_tv_template), + .dec = __VECS(tf_dec_tv_template) } } }, { @@ -3314,14 +2961,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = xeta_enc_tv_template, - .count = XETA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = xeta_dec_tv_template, - .count = XETA_DEC_TEST_VECTORS - } + .enc = __VECS(xeta_enc_tv_template), + .dec = __VECS(xeta_dec_tv_template) } } }, { @@ -3329,14 +2970,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = xtea_enc_tv_template, - .count = XTEA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = xtea_dec_tv_template, - .count = XTEA_DEC_TEST_VECTORS - } + .enc = __VECS(xtea_enc_tv_template), + .dec = __VECS(xtea_dec_tv_template) } } }, { @@ -3344,10 +2979,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_kpp, .fips_allowed = 1, .suite = { - .kpp = { - .vecs = ecdh_tv_template, - .count = ECDH_TEST_VECTORS - } + .kpp = __VECS(ecdh_tv_template) } }, { .alg = "gcm(aes)", @@ -3355,14 +2987,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_gcm_enc_tv_template, - .count = AES_GCM_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_gcm_dec_tv_template, - .count = AES_GCM_DEC_TEST_VECTORS - } + .enc = __VECS(aes_gcm_enc_tv_template), + .dec = __VECS(aes_gcm_dec_tv_template) } } }, { @@ -3370,136 +2996,94 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = ghash_tv_template, - .count = GHASH_TEST_VECTORS - } + .hash = __VECS(ghash_tv_template) } }, { .alg = "hmac(crc32)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = bfin_crc_tv_template, - .count = BFIN_CRC_TEST_VECTORS - } + .hash = __VECS(bfin_crc_tv_template) } }, { .alg = "hmac(md5)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = hmac_md5_tv_template, - .count = HMAC_MD5_TEST_VECTORS - } + .hash = __VECS(hmac_md5_tv_template) } }, { .alg = "hmac(rmd128)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = hmac_rmd128_tv_template, - .count = HMAC_RMD128_TEST_VECTORS - } + .hash = __VECS(hmac_rmd128_tv_template) } }, { .alg = "hmac(rmd160)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = hmac_rmd160_tv_template, - .count = HMAC_RMD160_TEST_VECTORS - } + .hash = __VECS(hmac_rmd160_tv_template) } }, { .alg = "hmac(sha1)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha1_tv_template, - .count = HMAC_SHA1_TEST_VECTORS - } + .hash = __VECS(hmac_sha1_tv_template) } }, { .alg = "hmac(sha224)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha224_tv_template, - .count = HMAC_SHA224_TEST_VECTORS - } + .hash = __VECS(hmac_sha224_tv_template) } }, { .alg = "hmac(sha256)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha256_tv_template, - .count = HMAC_SHA256_TEST_VECTORS - } + .hash = __VECS(hmac_sha256_tv_template) } }, { .alg = "hmac(sha3-224)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_224_tv_template, - .count = HMAC_SHA3_224_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_224_tv_template) } }, { .alg = "hmac(sha3-256)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_256_tv_template, - .count = HMAC_SHA3_256_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_256_tv_template) } }, { .alg = "hmac(sha3-384)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_384_tv_template, - .count = HMAC_SHA3_384_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_384_tv_template) } }, { .alg = "hmac(sha3-512)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_512_tv_template, - .count = HMAC_SHA3_512_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_512_tv_template) } }, { .alg = "hmac(sha384)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha384_tv_template, - .count = HMAC_SHA384_TEST_VECTORS - } + .hash = __VECS(hmac_sha384_tv_template) } }, { .alg = "hmac(sha512)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha512_tv_template, - .count = HMAC_SHA512_TEST_VECTORS - } + .hash = __VECS(hmac_sha512_tv_template) } }, { .alg = "jitterentropy_rng", @@ -3511,14 +3095,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_kw_enc_tv_template, - .count = ARRAY_SIZE(aes_kw_enc_tv_template) - }, - .dec = { - .vecs = aes_kw_dec_tv_template, - .count = ARRAY_SIZE(aes_kw_dec_tv_template) - } + .enc = __VECS(aes_kw_enc_tv_template), + .dec = __VECS(aes_kw_dec_tv_template) } } }, { @@ -3526,14 +3104,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = aes_lrw_enc_tv_template, - .count = AES_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_lrw_dec_tv_template, - .count = AES_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(aes_lrw_enc_tv_template), + .dec = __VECS(aes_lrw_dec_tv_template) } } }, { @@ -3541,14 +3113,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_lrw_enc_tv_template, - .count = CAMELLIA_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_lrw_dec_tv_template, - .count = CAMELLIA_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_lrw_enc_tv_template), + .dec = __VECS(camellia_lrw_dec_tv_template) } } }, { @@ -3556,14 +3122,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_lrw_enc_tv_template, - .count = CAST6_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_lrw_dec_tv_template, - .count = CAST6_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_lrw_enc_tv_template), + .dec = __VECS(cast6_lrw_dec_tv_template) } } }, { @@ -3571,14 +3131,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_lrw_enc_tv_template, - .count = SERPENT_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_lrw_dec_tv_template, - .count = SERPENT_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_lrw_enc_tv_template), + .dec = __VECS(serpent_lrw_dec_tv_template) } } }, { @@ -3586,14 +3140,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_lrw_enc_tv_template, - .count = TF_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_lrw_dec_tv_template, - .count = TF_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(tf_lrw_enc_tv_template), + .dec = __VECS(tf_lrw_dec_tv_template) } } }, { @@ -3602,14 +3150,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = lz4_comp_tv_template, - .count = LZ4_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = lz4_decomp_tv_template, - .count = LZ4_DECOMP_TEST_VECTORS - } + .comp = __VECS(lz4_comp_tv_template), + .decomp = __VECS(lz4_decomp_tv_template) } } }, { @@ -3618,14 +3160,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = lz4hc_comp_tv_template, - .count = LZ4HC_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = lz4hc_decomp_tv_template, - .count = LZ4HC_DECOMP_TEST_VECTORS - } + .comp = __VECS(lz4hc_comp_tv_template), + .decomp = __VECS(lz4hc_decomp_tv_template) } } }, { @@ -3634,42 +3170,27 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = lzo_comp_tv_template, - .count = LZO_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = lzo_decomp_tv_template, - .count = LZO_DECOMP_TEST_VECTORS - } + .comp = __VECS(lzo_comp_tv_template), + .decomp = __VECS(lzo_decomp_tv_template) } } }, { .alg = "md4", .test = alg_test_hash, .suite = { - .hash = { - .vecs = md4_tv_template, - .count = MD4_TEST_VECTORS - } + .hash = __VECS(md4_tv_template) } }, { .alg = "md5", .test = alg_test_hash, .suite = { - .hash = { - .vecs = md5_tv_template, - .count = MD5_TEST_VECTORS - } + .hash = __VECS(md5_tv_template) } }, { .alg = "michael_mic", .test = alg_test_hash, .suite = { - .hash = { - .vecs = michael_mic_tv_template, - .count = MICHAEL_MIC_TEST_VECTORS - } + .hash = __VECS(michael_mic_tv_template) } }, { .alg = "ofb(aes)", @@ -3677,14 +3198,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_ofb_enc_tv_template, - .count = AES_OFB_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ofb_dec_tv_template, - .count = AES_OFB_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ofb_enc_tv_template), + .dec = __VECS(aes_ofb_dec_tv_template) } } }, { @@ -3692,24 +3207,15 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = fcrypt_pcbc_enc_tv_template, - .count = FCRYPT_ENC_TEST_VECTORS - }, - .dec = { - .vecs = fcrypt_pcbc_dec_tv_template, - .count = FCRYPT_DEC_TEST_VECTORS - } + .enc = __VECS(fcrypt_pcbc_enc_tv_template), + .dec = __VECS(fcrypt_pcbc_dec_tv_template) } } }, { .alg = "poly1305", .test = alg_test_hash, .suite = { - .hash = { - .vecs = poly1305_tv_template, - .count = POLY1305_TEST_VECTORS - } + .hash = __VECS(poly1305_tv_template) } }, { .alg = "rfc3686(ctr(aes))", @@ -3717,14 +3223,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_ctr_rfc3686_enc_tv_template, - .count = AES_CTR_3686_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ctr_rfc3686_dec_tv_template, - .count = AES_CTR_3686_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ctr_rfc3686_enc_tv_template), + .dec = __VECS(aes_ctr_rfc3686_dec_tv_template) } } }, { @@ -3733,14 +3233,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_gcm_rfc4106_enc_tv_template, - .count = AES_GCM_4106_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_gcm_rfc4106_dec_tv_template, - .count = AES_GCM_4106_DEC_TEST_VECTORS - } + .enc = __VECS(aes_gcm_rfc4106_enc_tv_template), + .dec = __VECS(aes_gcm_rfc4106_dec_tv_template) } } }, { @@ -3749,14 +3243,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_ccm_rfc4309_enc_tv_template, - .count = AES_CCM_4309_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ccm_rfc4309_dec_tv_template, - .count = AES_CCM_4309_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ccm_rfc4309_enc_tv_template), + .dec = __VECS(aes_ccm_rfc4309_dec_tv_template) } } }, { @@ -3764,14 +3252,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = aes_gcm_rfc4543_enc_tv_template, - .count = AES_GCM_4543_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_gcm_rfc4543_dec_tv_template, - .count = AES_GCM_4543_DEC_TEST_VECTORS - }, + .enc = __VECS(aes_gcm_rfc4543_enc_tv_template), + .dec = __VECS(aes_gcm_rfc4543_dec_tv_template), } } }, { @@ -3779,14 +3261,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = rfc7539_enc_tv_template, - .count = RFC7539_ENC_TEST_VECTORS - }, - .dec = { - .vecs = rfc7539_dec_tv_template, - .count = RFC7539_DEC_TEST_VECTORS - }, + .enc = __VECS(rfc7539_enc_tv_template), + .dec = __VECS(rfc7539_dec_tv_template), } } }, { @@ -3794,71 +3270,47 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = rfc7539esp_enc_tv_template, - .count = RFC7539ESP_ENC_TEST_VECTORS - }, - .dec = { - .vecs = rfc7539esp_dec_tv_template, - .count = RFC7539ESP_DEC_TEST_VECTORS - }, + .enc = __VECS(rfc7539esp_enc_tv_template), + .dec = __VECS(rfc7539esp_dec_tv_template), } } }, { .alg = "rmd128", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd128_tv_template, - .count = RMD128_TEST_VECTORS - } + .hash = __VECS(rmd128_tv_template) } }, { .alg = "rmd160", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd160_tv_template, - .count = RMD160_TEST_VECTORS - } + .hash = __VECS(rmd160_tv_template) } }, { .alg = "rmd256", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd256_tv_template, - .count = RMD256_TEST_VECTORS - } + .hash = __VECS(rmd256_tv_template) } }, { .alg = "rmd320", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd320_tv_template, - .count = RMD320_TEST_VECTORS - } + .hash = __VECS(rmd320_tv_template) } }, { .alg = "rsa", .test = alg_test_akcipher, .fips_allowed = 1, .suite = { - .akcipher = { - .vecs = rsa_tv_template, - .count = RSA_TEST_VECTORS - } + .akcipher = __VECS(rsa_tv_template) } }, { .alg = "salsa20", .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = salsa20_stream_enc_tv_template, - .count = SALSA20_STREAM_ENC_TEST_VECTORS - } + .enc = __VECS(salsa20_stream_enc_tv_template) } } }, { @@ -3866,162 +3318,111 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha1_tv_template, - .count = SHA1_TEST_VECTORS - } + .hash = __VECS(sha1_tv_template) } }, { .alg = "sha224", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha224_tv_template, - .count = SHA224_TEST_VECTORS - } + .hash = __VECS(sha224_tv_template) } }, { .alg = "sha256", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha256_tv_template, - .count = SHA256_TEST_VECTORS - } + .hash = __VECS(sha256_tv_template) } }, { .alg = "sha3-224", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_224_tv_template, - .count = SHA3_224_TEST_VECTORS - } + .hash = __VECS(sha3_224_tv_template) } }, { .alg = "sha3-256", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_256_tv_template, - .count = SHA3_256_TEST_VECTORS - } + .hash = __VECS(sha3_256_tv_template) } }, { .alg = "sha3-384", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_384_tv_template, - .count = SHA3_384_TEST_VECTORS - } + .hash = __VECS(sha3_384_tv_template) } }, { .alg = "sha3-512", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_512_tv_template, - .count = SHA3_512_TEST_VECTORS - } + .hash = __VECS(sha3_512_tv_template) } }, { .alg = "sha384", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha384_tv_template, - .count = SHA384_TEST_VECTORS - } + .hash = __VECS(sha384_tv_template) } }, { .alg = "sha512", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha512_tv_template, - .count = SHA512_TEST_VECTORS - } + .hash = __VECS(sha512_tv_template) } }, { .alg = "tgr128", .test = alg_test_hash, .suite = { - .hash = { - .vecs = tgr128_tv_template, - .count = TGR128_TEST_VECTORS - } + .hash = __VECS(tgr128_tv_template) } }, { .alg = "tgr160", .test = alg_test_hash, .suite = { - .hash = { - .vecs = tgr160_tv_template, - .count = TGR160_TEST_VECTORS - } + .hash = __VECS(tgr160_tv_template) } }, { .alg = "tgr192", .test = alg_test_hash, .suite = { - .hash = { - .vecs = tgr192_tv_template, - .count = TGR192_TEST_VECTORS - } + .hash = __VECS(tgr192_tv_template) } }, { .alg = "vmac(aes)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = aes_vmac128_tv_template, - .count = VMAC_AES_TEST_VECTORS - } + .hash = __VECS(aes_vmac128_tv_template) } }, { .alg = "wp256", .test = alg_test_hash, .suite = { - .hash = { - .vecs = wp256_tv_template, - .count = WP256_TEST_VECTORS - } + .hash = __VECS(wp256_tv_template) } }, { .alg = "wp384", .test = alg_test_hash, .suite = { - .hash = { - .vecs = wp384_tv_template, - .count = WP384_TEST_VECTORS - } + .hash = __VECS(wp384_tv_template) } }, { .alg = "wp512", .test = alg_test_hash, .suite = { - .hash = { - .vecs = wp512_tv_template, - .count = WP512_TEST_VECTORS - } + .hash = __VECS(wp512_tv_template) } }, { .alg = "xcbc(aes)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = aes_xcbc128_tv_template, - .count = XCBC_AES_TEST_VECTORS - } + .hash = __VECS(aes_xcbc128_tv_template) } }, { .alg = "xts(aes)", @@ -4029,14 +3430,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_xts_enc_tv_template, - .count = AES_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_xts_dec_tv_template, - .count = AES_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(aes_xts_enc_tv_template), + .dec = __VECS(aes_xts_dec_tv_template) } } }, { @@ -4044,14 +3439,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_xts_enc_tv_template, - .count = CAMELLIA_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_xts_dec_tv_template, - .count = CAMELLIA_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_xts_enc_tv_template), + .dec = __VECS(camellia_xts_dec_tv_template) } } }, { @@ -4059,14 +3448,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_xts_enc_tv_template, - .count = CAST6_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_xts_dec_tv_template, - .count = CAST6_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_xts_enc_tv_template), + .dec = __VECS(cast6_xts_dec_tv_template) } } }, { @@ -4074,14 +3457,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_xts_enc_tv_template, - .count = SERPENT_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_xts_dec_tv_template, - .count = SERPENT_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_xts_enc_tv_template), + .dec = __VECS(serpent_xts_dec_tv_template) } } }, { @@ -4089,14 +3466,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_xts_enc_tv_template, - .count = TF_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_xts_dec_tv_template, - .count = TF_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(tf_xts_enc_tv_template), + .dec = __VECS(tf_xts_dec_tv_template) } } } diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 9b656be7f52f..64595f067d72 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -151,11 +151,6 @@ static char zeroed_string[48]; /* * RSA test vectors. Borrowed from openSSL. */ -#ifdef CONFIG_CRYPTO_FIPS -#define RSA_TEST_VECTORS 2 -#else -#define RSA_TEST_VECTORS 5 -#endif static struct akcipher_testvec rsa_tv_template[] = { { #ifndef CONFIG_CRYPTO_FIPS @@ -340,6 +335,7 @@ static struct akcipher_testvec rsa_tv_template[] = { .m_size = 8, .c_size = 256, .public_key_vec = true, +#ifndef CONFIG_CRYPTO_FIPS }, { .key = "\x30\x82\x09\x29" /* sequence of 2345 bytes */ @@ -538,11 +534,10 @@ static struct akcipher_testvec rsa_tv_template[] = { .key_len = 2349, .m_size = 8, .c_size = 512, +#endif } }; -#define DH_TEST_VECTORS 2 - struct kpp_testvec dh_tv_template[] = { { .secret = @@ -760,11 +755,6 @@ struct kpp_testvec dh_tv_template[] = { } }; -#ifdef CONFIG_CRYPTO_FIPS -#define ECDH_TEST_VECTORS 1 -#else -#define ECDH_TEST_VECTORS 2 -#endif struct kpp_testvec ecdh_tv_template[] = { { #ifndef CONFIG_CRYPTO_FIPS @@ -856,8 +846,6 @@ struct kpp_testvec ecdh_tv_template[] = { /* * MD4 test vectors from RFC1320 */ -#define MD4_TEST_VECTORS 7 - static struct hash_testvec md4_tv_template [] = { { .plaintext = "", @@ -899,7 +887,6 @@ static struct hash_testvec md4_tv_template [] = { }, }; -#define SHA3_224_TEST_VECTORS 3 static struct hash_testvec sha3_224_tv_template[] = { { .plaintext = "", @@ -925,7 +912,6 @@ static struct hash_testvec sha3_224_tv_template[] = { }, }; -#define SHA3_256_TEST_VECTORS 3 static struct hash_testvec sha3_256_tv_template[] = { { .plaintext = "", @@ -952,7 +938,6 @@ static struct hash_testvec sha3_256_tv_template[] = { }; -#define SHA3_384_TEST_VECTORS 3 static struct hash_testvec sha3_384_tv_template[] = { { .plaintext = "", @@ -985,7 +970,6 @@ static struct hash_testvec sha3_384_tv_template[] = { }; -#define SHA3_512_TEST_VECTORS 3 static struct hash_testvec sha3_512_tv_template[] = { { .plaintext = "", @@ -1027,8 +1011,6 @@ static struct hash_testvec sha3_512_tv_template[] = { /* * MD5 test vectors from RFC1321 */ -#define MD5_TEST_VECTORS 7 - static struct hash_testvec md5_tv_template[] = { { .digest = "\xd4\x1d\x8c\xd9\x8f\x00\xb2\x04" @@ -1073,8 +1055,6 @@ static struct hash_testvec md5_tv_template[] = { /* * RIPEMD-128 test vectors from ISO/IEC 10118-3:2004(E) */ -#define RMD128_TEST_VECTORS 10 - static struct hash_testvec rmd128_tv_template[] = { { .digest = "\xcd\xf2\x62\x13\xa1\x50\xdc\x3e" @@ -1137,8 +1117,6 @@ static struct hash_testvec rmd128_tv_template[] = { /* * RIPEMD-160 test vectors from ISO/IEC 10118-3:2004(E) */ -#define RMD160_TEST_VECTORS 10 - static struct hash_testvec rmd160_tv_template[] = { { .digest = "\x9c\x11\x85\xa5\xc5\xe9\xfc\x54\x61\x28" @@ -1201,8 +1179,6 @@ static struct hash_testvec rmd160_tv_template[] = { /* * RIPEMD-256 test vectors */ -#define RMD256_TEST_VECTORS 8 - static struct hash_testvec rmd256_tv_template[] = { { .digest = "\x02\xba\x4c\x4e\x5f\x8e\xcd\x18" @@ -1269,8 +1245,6 @@ static struct hash_testvec rmd256_tv_template[] = { /* * RIPEMD-320 test vectors */ -#define RMD320_TEST_VECTORS 8 - static struct hash_testvec rmd320_tv_template[] = { { .digest = "\x22\xd6\x5d\x56\x61\x53\x6c\xdc\x75\xc1" @@ -1334,7 +1308,6 @@ static struct hash_testvec rmd320_tv_template[] = { } }; -#define CRCT10DIF_TEST_VECTORS ARRAY_SIZE(crct10dif_tv_template) static struct hash_testvec crct10dif_tv_template[] = { { .plaintext = "abc", @@ -1385,8 +1358,6 @@ static struct hash_testvec crct10dif_tv_template[] = { * SHA1 test vectors from from FIPS PUB 180-1 * Long vector from CAVS 5.0 */ -#define SHA1_TEST_VECTORS 6 - static struct hash_testvec sha1_tv_template[] = { { .plaintext = "", @@ -1577,8 +1548,6 @@ static struct hash_testvec sha1_tv_template[] = { /* * SHA224 test vectors from from FIPS PUB 180-2 */ -#define SHA224_TEST_VECTORS 5 - static struct hash_testvec sha224_tv_template[] = { { .plaintext = "", @@ -1751,8 +1720,6 @@ static struct hash_testvec sha224_tv_template[] = { /* * SHA256 test vectors from from NIST */ -#define SHA256_TEST_VECTORS 5 - static struct hash_testvec sha256_tv_template[] = { { .plaintext = "", @@ -1924,8 +1891,6 @@ static struct hash_testvec sha256_tv_template[] = { /* * SHA384 test vectors from from NIST and kerneli */ -#define SHA384_TEST_VECTORS 6 - static struct hash_testvec sha384_tv_template[] = { { .plaintext = "", @@ -2118,8 +2083,6 @@ static struct hash_testvec sha384_tv_template[] = { /* * SHA512 test vectors from from NIST and kerneli */ -#define SHA512_TEST_VECTORS 6 - static struct hash_testvec sha512_tv_template[] = { { .plaintext = "", @@ -2327,8 +2290,6 @@ static struct hash_testvec sha512_tv_template[] = { * by Vincent Rijmen and Paulo S. L. M. Barreto as part of the NESSIE * submission */ -#define WP512_TEST_VECTORS 8 - static struct hash_testvec wp512_tv_template[] = { { .plaintext = "", @@ -2425,8 +2386,6 @@ static struct hash_testvec wp512_tv_template[] = { }, }; -#define WP384_TEST_VECTORS 8 - static struct hash_testvec wp384_tv_template[] = { { .plaintext = "", @@ -2507,8 +2466,6 @@ static struct hash_testvec wp384_tv_template[] = { }, }; -#define WP256_TEST_VECTORS 8 - static struct hash_testvec wp256_tv_template[] = { { .plaintext = "", @@ -2576,8 +2533,6 @@ static struct hash_testvec wp256_tv_template[] = { /* * TIGER test vectors from Tiger website */ -#define TGR192_TEST_VECTORS 6 - static struct hash_testvec tgr192_tv_template[] = { { .plaintext = "", @@ -2621,8 +2576,6 @@ static struct hash_testvec tgr192_tv_template[] = { }, }; -#define TGR160_TEST_VECTORS 6 - static struct hash_testvec tgr160_tv_template[] = { { .plaintext = "", @@ -2666,8 +2619,6 @@ static struct hash_testvec tgr160_tv_template[] = { }, }; -#define TGR128_TEST_VECTORS 6 - static struct hash_testvec tgr128_tv_template[] = { { .plaintext = "", @@ -2705,8 +2656,6 @@ static struct hash_testvec tgr128_tv_template[] = { }, }; -#define GHASH_TEST_VECTORS 6 - static struct hash_testvec ghash_tv_template[] = { { @@ -2822,8 +2771,6 @@ static struct hash_testvec ghash_tv_template[] = * HMAC-MD5 test vectors from RFC2202 * (These need to be fixed to not use strlen). */ -#define HMAC_MD5_TEST_VECTORS 7 - static struct hash_testvec hmac_md5_tv_template[] = { { @@ -2904,8 +2851,6 @@ static struct hash_testvec hmac_md5_tv_template[] = /* * HMAC-RIPEMD128 test vectors from RFC2286 */ -#define HMAC_RMD128_TEST_VECTORS 7 - static struct hash_testvec hmac_rmd128_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", @@ -2985,8 +2930,6 @@ static struct hash_testvec hmac_rmd128_tv_template[] = { /* * HMAC-RIPEMD160 test vectors from RFC2286 */ -#define HMAC_RMD160_TEST_VECTORS 7 - static struct hash_testvec hmac_rmd160_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", @@ -3066,8 +3009,6 @@ static struct hash_testvec hmac_rmd160_tv_template[] = { /* * HMAC-SHA1 test vectors from RFC2202 */ -#define HMAC_SHA1_TEST_VECTORS 7 - static struct hash_testvec hmac_sha1_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", @@ -3149,8 +3090,6 @@ static struct hash_testvec hmac_sha1_tv_template[] = { /* * SHA224 HMAC test vectors from RFC4231 */ -#define HMAC_SHA224_TEST_VECTORS 4 - static struct hash_testvec hmac_sha224_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3264,8 +3203,6 @@ static struct hash_testvec hmac_sha224_tv_template[] = { * HMAC-SHA256 test vectors from * draft-ietf-ipsec-ciph-sha-256-01.txt */ -#define HMAC_SHA256_TEST_VECTORS 10 - static struct hash_testvec hmac_sha256_tv_template[] = { { .key = "\x01\x02\x03\x04\x05\x06\x07\x08" @@ -3401,8 +3338,6 @@ static struct hash_testvec hmac_sha256_tv_template[] = { }, }; -#define CMAC_AES_TEST_VECTORS 6 - static struct hash_testvec aes_cmac128_tv_template[] = { { /* From NIST Special Publication 800-38B, AES-128 */ .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" @@ -3478,8 +3413,6 @@ static struct hash_testvec aes_cmac128_tv_template[] = { } }; -#define CMAC_DES3_EDE_TEST_VECTORS 4 - static struct hash_testvec des3_ede_cmac64_tv_template[] = { /* * From NIST Special Publication 800-38B, Three Key TDEA @@ -3526,8 +3459,6 @@ static struct hash_testvec des3_ede_cmac64_tv_template[] = { } }; -#define XCBC_AES_TEST_VECTORS 6 - static struct hash_testvec aes_xcbc128_tv_template[] = { { .key = "\x00\x01\x02\x03\x04\x05\x06\x07" @@ -3594,7 +3525,6 @@ static struct hash_testvec aes_xcbc128_tv_template[] = { } }; -#define VMAC_AES_TEST_VECTORS 11 static char vmac_string1[128] = {'\x01', '\x01', '\x01', '\x01', '\x02', '\x03', '\x02', '\x02', '\x02', '\x04', '\x01', '\x07', @@ -3701,8 +3631,6 @@ static struct hash_testvec aes_vmac128_tv_template[] = { * SHA384 HMAC test vectors from RFC4231 */ -#define HMAC_SHA384_TEST_VECTORS 4 - static struct hash_testvec hmac_sha384_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3801,8 +3729,6 @@ static struct hash_testvec hmac_sha384_tv_template[] = { * SHA512 HMAC test vectors from RFC4231 */ -#define HMAC_SHA512_TEST_VECTORS 4 - static struct hash_testvec hmac_sha512_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3908,8 +3834,6 @@ static struct hash_testvec hmac_sha512_tv_template[] = { }, }; -#define HMAC_SHA3_224_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_224_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3999,8 +3923,6 @@ static struct hash_testvec hmac_sha3_224_tv_template[] = { }, }; -#define HMAC_SHA3_256_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_256_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -4090,8 +4012,6 @@ static struct hash_testvec hmac_sha3_256_tv_template[] = { }, }; -#define HMAC_SHA3_384_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_384_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -4189,8 +4109,6 @@ static struct hash_testvec hmac_sha3_384_tv_template[] = { }, }; -#define HMAC_SHA3_512_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_512_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -4300,8 +4218,6 @@ static struct hash_testvec hmac_sha3_512_tv_template[] = { * Poly1305 test vectors from RFC7539 A.3. */ -#define POLY1305_TEST_VECTORS 11 - static struct hash_testvec poly1305_tv_template[] = { { /* Test Vector #1 */ .plaintext = "\x00\x00\x00\x00\x00\x00\x00\x00" @@ -4547,19 +4463,6 @@ static struct hash_testvec poly1305_tv_template[] = { /* * DES test vectors. */ -#define DES_ENC_TEST_VECTORS 11 -#define DES_DEC_TEST_VECTORS 5 -#define DES_CBC_ENC_TEST_VECTORS 6 -#define DES_CBC_DEC_TEST_VECTORS 5 -#define DES_CTR_ENC_TEST_VECTORS 2 -#define DES_CTR_DEC_TEST_VECTORS 2 -#define DES3_EDE_ENC_TEST_VECTORS 4 -#define DES3_EDE_DEC_TEST_VECTORS 4 -#define DES3_EDE_CBC_ENC_TEST_VECTORS 2 -#define DES3_EDE_CBC_DEC_TEST_VECTORS 2 -#define DES3_EDE_CTR_ENC_TEST_VECTORS 2 -#define DES3_EDE_CTR_DEC_TEST_VECTORS 2 - static struct cipher_testvec des_enc_tv_template[] = { { /* From Applied Cryptography */ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef", @@ -6620,13 +6523,6 @@ static struct cipher_testvec des3_ede_ctr_dec_tv_template[] = { /* * Blowfish test vectors. */ -#define BF_ENC_TEST_VECTORS 7 -#define BF_DEC_TEST_VECTORS 7 -#define BF_CBC_ENC_TEST_VECTORS 2 -#define BF_CBC_DEC_TEST_VECTORS 2 -#define BF_CTR_ENC_TEST_VECTORS 2 -#define BF_CTR_DEC_TEST_VECTORS 2 - static struct cipher_testvec bf_enc_tv_template[] = { { /* DES test vectors from OpenSSL */ .key = "\x00\x00\x00\x00\x00\x00\x00\x00", @@ -8152,17 +8048,6 @@ static struct cipher_testvec bf_ctr_dec_tv_template[] = { /* * Twofish test vectors. */ -#define TF_ENC_TEST_VECTORS 4 -#define TF_DEC_TEST_VECTORS 4 -#define TF_CBC_ENC_TEST_VECTORS 5 -#define TF_CBC_DEC_TEST_VECTORS 5 -#define TF_CTR_ENC_TEST_VECTORS 2 -#define TF_CTR_DEC_TEST_VECTORS 2 -#define TF_LRW_ENC_TEST_VECTORS 8 -#define TF_LRW_DEC_TEST_VECTORS 8 -#define TF_XTS_ENC_TEST_VECTORS 5 -#define TF_XTS_DEC_TEST_VECTORS 5 - static struct cipher_testvec tf_enc_tv_template[] = { { .key = zeroed_string, @@ -10881,24 +10766,6 @@ static struct cipher_testvec tf_xts_dec_tv_template[] = { * Serpent test vectors. These are backwards because Serpent writes * octet sequences in right-to-left mode. */ -#define SERPENT_ENC_TEST_VECTORS 5 -#define SERPENT_DEC_TEST_VECTORS 5 - -#define TNEPRES_ENC_TEST_VECTORS 4 -#define TNEPRES_DEC_TEST_VECTORS 4 - -#define SERPENT_CBC_ENC_TEST_VECTORS 1 -#define SERPENT_CBC_DEC_TEST_VECTORS 1 - -#define SERPENT_CTR_ENC_TEST_VECTORS 2 -#define SERPENT_CTR_DEC_TEST_VECTORS 2 - -#define SERPENT_LRW_ENC_TEST_VECTORS 8 -#define SERPENT_LRW_DEC_TEST_VECTORS 8 - -#define SERPENT_XTS_ENC_TEST_VECTORS 5 -#define SERPENT_XTS_DEC_TEST_VECTORS 5 - static struct cipher_testvec serpent_enc_tv_template[] = { { .input = "\x00\x01\x02\x03\x04\x05\x06\x07" @@ -13637,17 +13504,6 @@ static struct cipher_testvec serpent_xts_dec_tv_template[] = { }; /* Cast6 test vectors from RFC 2612 */ -#define CAST6_ENC_TEST_VECTORS 4 -#define CAST6_DEC_TEST_VECTORS 4 -#define CAST6_CBC_ENC_TEST_VECTORS 1 -#define CAST6_CBC_DEC_TEST_VECTORS 1 -#define CAST6_CTR_ENC_TEST_VECTORS 2 -#define CAST6_CTR_DEC_TEST_VECTORS 2 -#define CAST6_LRW_ENC_TEST_VECTORS 1 -#define CAST6_LRW_DEC_TEST_VECTORS 1 -#define CAST6_XTS_ENC_TEST_VECTORS 1 -#define CAST6_XTS_DEC_TEST_VECTORS 1 - static struct cipher_testvec cast6_enc_tv_template[] = { { .key = "\x23\x42\xbb\x9e\xfa\x38\x54\x2c" @@ -15182,38 +15038,6 @@ static struct cipher_testvec cast6_xts_dec_tv_template[] = { /* * AES test vectors. */ -#define AES_ENC_TEST_VECTORS 4 -#define AES_DEC_TEST_VECTORS 4 -#define AES_CBC_ENC_TEST_VECTORS 5 -#define AES_CBC_DEC_TEST_VECTORS 5 -#define HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS 2 -#define HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS 2 -#define HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VEC 2 -#define HMAC_SHA1_ECB_CIPHER_NULL_DEC_TEST_VEC 2 -#define HMAC_SHA1_AES_CBC_ENC_TEST_VEC 7 -#define HMAC_SHA256_AES_CBC_ENC_TEST_VEC 7 -#define HMAC_SHA512_AES_CBC_ENC_TEST_VEC 7 -#define AES_LRW_ENC_TEST_VECTORS 8 -#define AES_LRW_DEC_TEST_VECTORS 8 -#define AES_XTS_ENC_TEST_VECTORS 5 -#define AES_XTS_DEC_TEST_VECTORS 5 -#define AES_CTR_ENC_TEST_VECTORS 5 -#define AES_CTR_DEC_TEST_VECTORS 5 -#define AES_OFB_ENC_TEST_VECTORS 1 -#define AES_OFB_DEC_TEST_VECTORS 1 -#define AES_CTR_3686_ENC_TEST_VECTORS 7 -#define AES_CTR_3686_DEC_TEST_VECTORS 6 -#define AES_GCM_ENC_TEST_VECTORS 9 -#define AES_GCM_DEC_TEST_VECTORS 8 -#define AES_GCM_4106_ENC_TEST_VECTORS 23 -#define AES_GCM_4106_DEC_TEST_VECTORS 23 -#define AES_GCM_4543_ENC_TEST_VECTORS 1 -#define AES_GCM_4543_DEC_TEST_VECTORS 2 -#define AES_CCM_ENC_TEST_VECTORS 8 -#define AES_CCM_DEC_TEST_VECTORS 7 -#define AES_CCM_4309_ENC_TEST_VECTORS 7 -#define AES_CCM_4309_DEC_TEST_VECTORS 10 - static struct cipher_testvec aes_enc_tv_template[] = { { /* From FIPS-197 */ .key = "\x00\x01\x02\x03\x04\x05\x06\x07" @@ -17069,8 +16893,6 @@ static struct aead_testvec hmac_sha512_aes_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA1_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha1_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17130,8 +16952,6 @@ static struct aead_testvec hmac_sha1_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA224_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha224_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17191,8 +17011,6 @@ static struct aead_testvec hmac_sha224_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA256_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha256_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17254,8 +17072,6 @@ static struct aead_testvec hmac_sha256_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA384_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha384_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17321,8 +17137,6 @@ static struct aead_testvec hmac_sha384_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA512_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha512_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17392,8 +17206,6 @@ static struct aead_testvec hmac_sha512_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA1_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha1_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17455,8 +17267,6 @@ static struct aead_testvec hmac_sha1_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA224_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha224_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17518,8 +17328,6 @@ static struct aead_testvec hmac_sha224_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA256_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha256_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17583,8 +17391,6 @@ static struct aead_testvec hmac_sha256_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA384_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha384_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17652,8 +17458,6 @@ static struct aead_testvec hmac_sha384_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA512_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha512_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -24434,8 +24238,6 @@ static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = { /* * ChaCha20-Poly1305 AEAD test vectors from RFC7539 2.8.2./A.5. */ -#define RFC7539_ENC_TEST_VECTORS 2 -#define RFC7539_DEC_TEST_VECTORS 2 static struct aead_testvec rfc7539_enc_tv_template[] = { { .key = "\x80\x81\x82\x83\x84\x85\x86\x87" @@ -24703,8 +24505,6 @@ static struct aead_testvec rfc7539_dec_tv_template[] = { /* * draft-irtf-cfrg-chacha20-poly1305 */ -#define RFC7539ESP_DEC_TEST_VECTORS 1 -#define RFC7539ESP_ENC_TEST_VECTORS 1 static struct aead_testvec rfc7539esp_enc_tv_template[] = { { .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" @@ -24927,8 +24727,6 @@ static struct cipher_testvec aes_kw_dec_tv_template[] = { * http://csrc.nist.gov/groups/STM/cavp/documents/rng/RNGVS.pdf * Only AES-128 is supported at this time. */ -#define ANSI_CPRNG_AES_TEST_VECTORS 6 - static struct cprng_testvec ansi_cprng_aes_tv_template[] = { { .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42" @@ -25846,13 +25644,6 @@ static struct drbg_testvec drbg_nopr_ctr_aes128_tv_template[] = { }; /* Cast5 test vectors from RFC 2144 */ -#define CAST5_ENC_TEST_VECTORS 4 -#define CAST5_DEC_TEST_VECTORS 4 -#define CAST5_CBC_ENC_TEST_VECTORS 1 -#define CAST5_CBC_DEC_TEST_VECTORS 1 -#define CAST5_CTR_ENC_TEST_VECTORS 2 -#define CAST5_CTR_DEC_TEST_VECTORS 2 - static struct cipher_testvec cast5_enc_tv_template[] = { { .key = "\x01\x23\x45\x67\x12\x34\x56\x78" @@ -26756,9 +26547,6 @@ static struct cipher_testvec cast5_ctr_dec_tv_template[] = { /* * ARC4 test vectors from OpenSSL */ -#define ARC4_ENC_TEST_VECTORS 7 -#define ARC4_DEC_TEST_VECTORS 7 - static struct cipher_testvec arc4_enc_tv_template[] = { { .key = "\x01\x23\x45\x67\x89\xab\xcd\xef", @@ -26894,9 +26682,6 @@ static struct cipher_testvec arc4_dec_tv_template[] = { /* * TEA test vectors */ -#define TEA_ENC_TEST_VECTORS 4 -#define TEA_DEC_TEST_VECTORS 4 - static struct cipher_testvec tea_enc_tv_template[] = { { .key = zeroed_string, @@ -26986,9 +26771,6 @@ static struct cipher_testvec tea_dec_tv_template[] = { /* * XTEA test vectors */ -#define XTEA_ENC_TEST_VECTORS 4 -#define XTEA_DEC_TEST_VECTORS 4 - static struct cipher_testvec xtea_enc_tv_template[] = { { .key = zeroed_string, @@ -27078,9 +26860,6 @@ static struct cipher_testvec xtea_dec_tv_template[] = { /* * KHAZAD test vectors. */ -#define KHAZAD_ENC_TEST_VECTORS 5 -#define KHAZAD_DEC_TEST_VECTORS 5 - static struct cipher_testvec khazad_enc_tv_template[] = { { .key = "\x80\x00\x00\x00\x00\x00\x00\x00" @@ -27177,11 +26956,6 @@ static struct cipher_testvec khazad_dec_tv_template[] = { * Anubis test vectors. */ -#define ANUBIS_ENC_TEST_VECTORS 5 -#define ANUBIS_DEC_TEST_VECTORS 5 -#define ANUBIS_CBC_ENC_TEST_VECTORS 2 -#define ANUBIS_CBC_DEC_TEST_VECTORS 2 - static struct cipher_testvec anubis_enc_tv_template[] = { { .key = "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe" @@ -27381,9 +27155,6 @@ static struct cipher_testvec anubis_cbc_dec_tv_template[] = { /* * XETA test vectors */ -#define XETA_ENC_TEST_VECTORS 4 -#define XETA_DEC_TEST_VECTORS 4 - static struct cipher_testvec xeta_enc_tv_template[] = { { .key = zeroed_string, @@ -27473,9 +27244,6 @@ static struct cipher_testvec xeta_dec_tv_template[] = { /* * FCrypt test vectors */ -#define FCRYPT_ENC_TEST_VECTORS ARRAY_SIZE(fcrypt_pcbc_enc_tv_template) -#define FCRYPT_DEC_TEST_VECTORS ARRAY_SIZE(fcrypt_pcbc_dec_tv_template) - static struct cipher_testvec fcrypt_pcbc_enc_tv_template[] = { { /* http://www.openafs.org/pipermail/openafs-devel/2000-December/005320.html */ .key = "\x00\x00\x00\x00\x00\x00\x00\x00", @@ -27601,17 +27369,6 @@ static struct cipher_testvec fcrypt_pcbc_dec_tv_template[] = { /* * CAMELLIA test vectors. */ -#define CAMELLIA_ENC_TEST_VECTORS 4 -#define CAMELLIA_DEC_TEST_VECTORS 4 -#define CAMELLIA_CBC_ENC_TEST_VECTORS 3 -#define CAMELLIA_CBC_DEC_TEST_VECTORS 3 -#define CAMELLIA_CTR_ENC_TEST_VECTORS 2 -#define CAMELLIA_CTR_DEC_TEST_VECTORS 2 -#define CAMELLIA_LRW_ENC_TEST_VECTORS 8 -#define CAMELLIA_LRW_DEC_TEST_VECTORS 8 -#define CAMELLIA_XTS_ENC_TEST_VECTORS 5 -#define CAMELLIA_XTS_DEC_TEST_VECTORS 5 - static struct cipher_testvec camellia_enc_tv_template[] = { { .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" @@ -31331,9 +31088,6 @@ static struct cipher_testvec camellia_xts_dec_tv_template[] = { /* * SEED test vectors */ -#define SEED_ENC_TEST_VECTORS 4 -#define SEED_DEC_TEST_VECTORS 4 - static struct cipher_testvec seed_enc_tv_template[] = { { .key = zeroed_string, @@ -31418,7 +31172,6 @@ static struct cipher_testvec seed_dec_tv_template[] = { } }; -#define SALSA20_STREAM_ENC_TEST_VECTORS 5 static struct cipher_testvec salsa20_stream_enc_tv_template[] = { /* * Testvectors from verified.test-vectors submitted to ECRYPT. @@ -32588,7 +32341,6 @@ static struct cipher_testvec salsa20_stream_enc_tv_template[] = { }, }; -#define CHACHA20_ENC_TEST_VECTORS 4 static struct cipher_testvec chacha20_enc_tv_template[] = { { /* RFC7539 A.2. Test Vector #1 */ .key = "\x00\x00\x00\x00\x00\x00\x00\x00" @@ -33100,8 +32852,6 @@ static struct cipher_testvec chacha20_enc_tv_template[] = { /* * CTS (Cipher Text Stealing) mode tests */ -#define CTS_MODE_ENC_TEST_VECTORS 6 -#define CTS_MODE_DEC_TEST_VECTORS 6 static struct cipher_testvec cts_mode_enc_tv_template[] = { { /* from rfc3962 */ .klen = 16, @@ -33322,9 +33072,6 @@ struct comp_testvec { * Params: winbits=-11, Z_DEFAULT_COMPRESSION, MAX_MEM_LEVEL. */ -#define DEFLATE_COMP_TEST_VECTORS 2 -#define DEFLATE_DECOMP_TEST_VECTORS 2 - static struct comp_testvec deflate_comp_tv_template[] = { { .inlen = 70, @@ -33400,9 +33147,6 @@ static struct comp_testvec deflate_decomp_tv_template[] = { /* * LZO test vectors (null-terminated strings). */ -#define LZO_COMP_TEST_VECTORS 2 -#define LZO_DECOMP_TEST_VECTORS 2 - static struct comp_testvec lzo_comp_tv_template[] = { { .inlen = 70, @@ -33534,8 +33278,6 @@ static struct hash_testvec michael_mic_tv_template[] = { /* * CRC32 test vectors */ -#define CRC32_TEST_VECTORS 14 - static struct hash_testvec crc32_tv_template[] = { { .key = "\x87\xa9\xcb\xed", @@ -33968,8 +33710,6 @@ static struct hash_testvec crc32_tv_template[] = { /* * CRC32C test vectors */ -#define CRC32C_TEST_VECTORS 15 - static struct hash_testvec crc32c_tv_template[] = { { .psize = 0, @@ -34406,8 +34146,6 @@ static struct hash_testvec crc32c_tv_template[] = { /* * Blakcifn CRC test vectors */ -#define BFIN_CRC_TEST_VECTORS 6 - static struct hash_testvec bfin_crc_tv_template[] = { { .psize = 0, @@ -34493,9 +34231,6 @@ static struct hash_testvec bfin_crc_tv_template[] = { }; -#define LZ4_COMP_TEST_VECTORS 1 -#define LZ4_DECOMP_TEST_VECTORS 1 - static struct comp_testvec lz4_comp_tv_template[] = { { .inlen = 70, @@ -34526,9 +34261,6 @@ static struct comp_testvec lz4_decomp_tv_template[] = { }, }; -#define LZ4HC_COMP_TEST_VECTORS 1 -#define LZ4HC_DECOMP_TEST_VECTORS 1 - static struct comp_testvec lz4hc_comp_tv_template[] = { { .inlen = 70, From de0f96d7724f4ce3b48af10a0ea29150f6ddd5ba Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 12 Jan 2017 15:03:21 +0000 Subject: [PATCH 045/142] crypto: mediatek - make symbol of_crypto_id static Fixes the following sparse warning: drivers/crypto/mediatek/mtk-platform.c:585:27: warning: symbol 'of_crypto_id' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c index 286296fbf15d..a9c713d4c733 100644 --- a/drivers/crypto/mediatek/mtk-platform.c +++ b/drivers/crypto/mediatek/mtk-platform.c @@ -582,7 +582,7 @@ static int mtk_crypto_remove(struct platform_device *pdev) return 0; } -const struct of_device_id of_crypto_id[] = { +static const struct of_device_id of_crypto_id[] = { { .compatible = "mediatek,eip97-crypto" }, {}, }; From db602a7f940a71870c17e39bcbe4e4d7a4a8273e Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 12 Jan 2017 10:52:46 -0800 Subject: [PATCH 046/142] hwrng: n2 - limit error spewage when self-test fails If the self-test fails, it probably won't actually suddenly start working. Currently, this causes an endless spew of error messages on the console and in the logs, so this patch adds a limiter to the test. Reported-by: Sowmini Varadhan Signed-off-by: Shannon Nelson Signed-off-by: Herbert Xu --- drivers/char/hw_random/n2-drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index 3b06c1d6cfb2..102560ffed44 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -589,6 +589,7 @@ static void n2rng_work(struct work_struct *work) { struct n2rng *np = container_of(work, struct n2rng, work.work); int err = 0; + static int retries = 4; if (!(np->flags & N2RNG_FLAG_CONTROL)) { err = n2rng_guest_check(np); @@ -606,7 +607,9 @@ static void n2rng_work(struct work_struct *work) dev_info(&np->op->dev, "RNG ready\n"); } - if (err && !(np->flags & N2RNG_FLAG_SHUTDOWN)) + if (--retries == 0) + dev_err(&np->op->dev, "Self-test retries failed, RNG not ready\n"); + else if (err && !(np->flags & N2RNG_FLAG_SHUTDOWN)) schedule_delayed_work(&np->work, HZ * 2); } From becbc4940ad8e8ff560e1ceee33d9bb4fe4c9225 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 12 Jan 2017 10:52:47 -0800 Subject: [PATCH 047/142] hwrng: n2 - add device data descriptions Since we're going to need to keep track of more than just one attribute of the hardware, we'll change the use of the data field from the match struct from a single flag to a struct pointer. This patch adds the struct template and initial descriptions. Signed-off-by: Shannon Nelson Signed-off-by: Herbert Xu --- drivers/char/hw_random/n2-drv.c | 47 +++++++++++++++++++++++++++------ drivers/char/hw_random/n2rng.h | 15 +++++++++++ 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index 102560ffed44..74c26c7e98b6 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -625,24 +625,23 @@ static const struct of_device_id n2rng_match[]; static int n2rng_probe(struct platform_device *op) { const struct of_device_id *match; - int multi_capable; int err = -ENOMEM; struct n2rng *np; match = of_match_device(n2rng_match, &op->dev); if (!match) return -EINVAL; - multi_capable = (match->data != NULL); n2rng_driver_version(); np = devm_kzalloc(&op->dev, sizeof(*np), GFP_KERNEL); if (!np) goto out; np->op = op; + np->data = (struct n2rng_template *)match->data; INIT_DELAYED_WORK(&np->work, n2rng_work); - if (multi_capable) + if (np->data->multi_capable) np->flags |= N2RNG_FLAG_MULTI; err = -ENODEV; @@ -673,8 +672,9 @@ static int n2rng_probe(struct platform_device *op) dev_err(&op->dev, "VF RNG lacks rng-#units property\n"); goto out_hvapi_unregister; } - } else + } else { np->num_units = 1; + } dev_info(&op->dev, "Registered RNG HVAPI major %lu minor %lu\n", np->hvapi_major, np->hvapi_minor); @@ -731,30 +731,61 @@ static int n2rng_remove(struct platform_device *op) return 0; } +static struct n2rng_template n2_template = { + .id = N2_n2_rng, + .multi_capable = 0, + .chip_version = 1, +}; + +static struct n2rng_template vf_template = { + .id = N2_vf_rng, + .multi_capable = 1, + .chip_version = 1, +}; + +static struct n2rng_template kt_template = { + .id = N2_kt_rng, + .multi_capable = 1, + .chip_version = 1, +}; + +static struct n2rng_template m4_template = { + .id = N2_m4_rng, + .multi_capable = 1, + .chip_version = 2, +}; + +static struct n2rng_template m7_template = { + .id = N2_m7_rng, + .multi_capable = 1, + .chip_version = 2, +}; + static const struct of_device_id n2rng_match[] = { { .name = "random-number-generator", .compatible = "SUNW,n2-rng", + .data = &n2_template, }, { .name = "random-number-generator", .compatible = "SUNW,vf-rng", - .data = (void *) 1, + .data = &vf_template, }, { .name = "random-number-generator", .compatible = "SUNW,kt-rng", - .data = (void *) 1, + .data = &kt_template, }, { .name = "random-number-generator", .compatible = "ORCL,m4-rng", - .data = (void *) 1, + .data = &m4_template, }, { .name = "random-number-generator", .compatible = "ORCL,m7-rng", - .data = (void *) 1, + .data = &m7_template, }, {}, }; diff --git a/drivers/char/hw_random/n2rng.h b/drivers/char/hw_random/n2rng.h index f244ac89087f..e41e55a7b227 100644 --- a/drivers/char/hw_random/n2rng.h +++ b/drivers/char/hw_random/n2rng.h @@ -60,6 +60,20 @@ extern unsigned long sun4v_rng_data_read_diag_v2(unsigned long data_ra, extern unsigned long sun4v_rng_data_read(unsigned long data_ra, unsigned long *tick_delta); +enum n2rng_compat_id { + N2_n2_rng, + N2_vf_rng, + N2_kt_rng, + N2_m4_rng, + N2_m7_rng, +}; + +struct n2rng_template { + enum n2rng_compat_id id; + int multi_capable; + int chip_version; +}; + struct n2rng_unit { u64 control[HV_RNG_NUM_CONTROL]; }; @@ -74,6 +88,7 @@ struct n2rng { #define N2RNG_FLAG_SHUTDOWN 0x00000010 /* Driver unregistering */ #define N2RNG_FLAG_BUFFER_VALID 0x00000020 /* u32 buffer holds valid data */ + struct n2rng_template *data; int num_units; struct n2rng_unit *units; From 07e25d43be8502bd8ab6122c4f6449ebf30e98f7 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 12 Jan 2017 10:52:48 -0800 Subject: [PATCH 048/142] hwrng: n2 - support new hardware register layout Add the new register layout constants and the requisite logic for using them. Signed-off-by: Shannon Nelson Signed-off-by: Herbert Xu --- drivers/char/hw_random/n2-drv.c | 140 ++++++++++++++++++++++++-------- drivers/char/hw_random/n2rng.h | 36 ++++++-- 2 files changed, 132 insertions(+), 44 deletions(-) diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index 74c26c7e98b6..f0bd5ee32122 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -302,26 +302,57 @@ static int n2rng_try_read_ctl(struct n2rng *np) return n2rng_hv_err_trans(hv_err); } -#define CONTROL_DEFAULT_BASE \ - ((2 << RNG_CTL_ASEL_SHIFT) | \ - (N2RNG_ACCUM_CYCLES_DEFAULT << RNG_CTL_WAIT_SHIFT) | \ - RNG_CTL_LFSR) +static u64 n2rng_control_default(struct n2rng *np, int ctl) +{ + u64 val = 0; -#define CONTROL_DEFAULT_0 \ - (CONTROL_DEFAULT_BASE | \ - (1 << RNG_CTL_VCO_SHIFT) | \ - RNG_CTL_ES1) -#define CONTROL_DEFAULT_1 \ - (CONTROL_DEFAULT_BASE | \ - (2 << RNG_CTL_VCO_SHIFT) | \ - RNG_CTL_ES2) -#define CONTROL_DEFAULT_2 \ - (CONTROL_DEFAULT_BASE | \ - (3 << RNG_CTL_VCO_SHIFT) | \ - RNG_CTL_ES3) -#define CONTROL_DEFAULT_3 \ - (CONTROL_DEFAULT_BASE | \ - RNG_CTL_ES1 | RNG_CTL_ES2 | RNG_CTL_ES3) + if (np->data->chip_version == 1) { + val = ((2 << RNG_v1_CTL_ASEL_SHIFT) | + (N2RNG_ACCUM_CYCLES_DEFAULT << RNG_v1_CTL_WAIT_SHIFT) | + RNG_CTL_LFSR); + + switch (ctl) { + case 0: + val |= (1 << RNG_v1_CTL_VCO_SHIFT) | RNG_CTL_ES1; + break; + case 1: + val |= (2 << RNG_v1_CTL_VCO_SHIFT) | RNG_CTL_ES2; + break; + case 2: + val |= (3 << RNG_v1_CTL_VCO_SHIFT) | RNG_CTL_ES3; + break; + case 3: + val |= RNG_CTL_ES1 | RNG_CTL_ES2 | RNG_CTL_ES3; + break; + default: + break; + } + + } else { + val = ((2 << RNG_v2_CTL_ASEL_SHIFT) | + (N2RNG_ACCUM_CYCLES_DEFAULT << RNG_v2_CTL_WAIT_SHIFT) | + RNG_CTL_LFSR); + + switch (ctl) { + case 0: + val |= (1 << RNG_v2_CTL_VCO_SHIFT) | RNG_CTL_ES1; + break; + case 1: + val |= (2 << RNG_v2_CTL_VCO_SHIFT) | RNG_CTL_ES2; + break; + case 2: + val |= (3 << RNG_v2_CTL_VCO_SHIFT) | RNG_CTL_ES3; + break; + case 3: + val |= RNG_CTL_ES1 | RNG_CTL_ES2 | RNG_CTL_ES3; + break; + default: + break; + } + } + + return val; +} static void n2rng_control_swstate_init(struct n2rng *np) { @@ -336,10 +367,10 @@ static void n2rng_control_swstate_init(struct n2rng *np) for (i = 0; i < np->num_units; i++) { struct n2rng_unit *up = &np->units[i]; - up->control[0] = CONTROL_DEFAULT_0; - up->control[1] = CONTROL_DEFAULT_1; - up->control[2] = CONTROL_DEFAULT_2; - up->control[3] = CONTROL_DEFAULT_3; + up->control[0] = n2rng_control_default(np, 0); + up->control[1] = n2rng_control_default(np, 1); + up->control[2] = n2rng_control_default(np, 2); + up->control[3] = n2rng_control_default(np, 3); } np->hv_state = HV_RNG_STATE_UNCONFIGURED; @@ -399,6 +430,7 @@ static int n2rng_data_read(struct hwrng *rng, u32 *data) } else { int err = n2rng_generic_read_data(ra); if (!err) { + np->flags |= N2RNG_FLAG_BUFFER_VALID; np->buffer = np->test_data >> 32; *data = np->test_data & 0xffffffff; len = 4; @@ -487,9 +519,21 @@ static void n2rng_dump_test_buffer(struct n2rng *np) static int n2rng_check_selftest_buffer(struct n2rng *np, unsigned long unit) { - u64 val = SELFTEST_VAL; + u64 val; int err, matches, limit; + switch (np->data->id) { + case N2_n2_rng: + case N2_vf_rng: + case N2_kt_rng: + case N2_m4_rng: /* yes, m4 uses the old value */ + val = RNG_v1_SELFTEST_VAL; + break; + default: + val = RNG_v2_SELFTEST_VAL; + break; + } + matches = 0; for (limit = 0; limit < SELFTEST_LOOPS_MAX; limit++) { matches += n2rng_test_buffer_find(np, val); @@ -512,14 +556,32 @@ static int n2rng_check_selftest_buffer(struct n2rng *np, unsigned long unit) static int n2rng_control_selftest(struct n2rng *np, unsigned long unit) { int err; + u64 base, base3; - np->test_control[0] = (0x2 << RNG_CTL_ASEL_SHIFT); - np->test_control[1] = (0x2 << RNG_CTL_ASEL_SHIFT); - np->test_control[2] = (0x2 << RNG_CTL_ASEL_SHIFT); - np->test_control[3] = ((0x2 << RNG_CTL_ASEL_SHIFT) | - RNG_CTL_LFSR | - ((SELFTEST_TICKS - 2) << RNG_CTL_WAIT_SHIFT)); + switch (np->data->id) { + case N2_n2_rng: + case N2_vf_rng: + case N2_kt_rng: + base = RNG_v1_CTL_ASEL_NOOUT << RNG_v1_CTL_ASEL_SHIFT; + base3 = base | RNG_CTL_LFSR | + ((RNG_v1_SELFTEST_TICKS - 2) << RNG_v1_CTL_WAIT_SHIFT); + break; + case N2_m4_rng: + base = RNG_v2_CTL_ASEL_NOOUT << RNG_v2_CTL_ASEL_SHIFT; + base3 = base | RNG_CTL_LFSR | + ((RNG_v1_SELFTEST_TICKS - 2) << RNG_v2_CTL_WAIT_SHIFT); + break; + default: + base = RNG_v2_CTL_ASEL_NOOUT << RNG_v2_CTL_ASEL_SHIFT; + base3 = base | RNG_CTL_LFSR | + (RNG_v2_SELFTEST_TICKS << RNG_v2_CTL_WAIT_SHIFT); + break; + } + np->test_control[0] = base; + np->test_control[1] = base; + np->test_control[2] = base; + np->test_control[3] = base3; err = n2rng_entropy_diag_read(np, unit, np->test_control, HV_RNG_STATE_HEALTHCHECK, @@ -557,11 +619,19 @@ static int n2rng_control_configure_units(struct n2rng *np) struct n2rng_unit *up = &np->units[unit]; unsigned long ctl_ra = __pa(&up->control[0]); int esrc; - u64 base; + u64 base, shift; - base = ((np->accum_cycles << RNG_CTL_WAIT_SHIFT) | - (2 << RNG_CTL_ASEL_SHIFT) | - RNG_CTL_LFSR); + if (np->data->chip_version == 1) { + base = ((np->accum_cycles << RNG_v1_CTL_WAIT_SHIFT) | + (RNG_v1_CTL_ASEL_NOOUT << RNG_v1_CTL_ASEL_SHIFT) | + RNG_CTL_LFSR); + shift = RNG_v1_CTL_VCO_SHIFT; + } else { + base = ((np->accum_cycles << RNG_v2_CTL_WAIT_SHIFT) | + (RNG_v2_CTL_ASEL_NOOUT << RNG_v2_CTL_ASEL_SHIFT) | + RNG_CTL_LFSR); + shift = RNG_v2_CTL_VCO_SHIFT; + } /* XXX This isn't the best. We should fetch a bunch * XXX of words using each entropy source combined XXX @@ -570,7 +640,7 @@ static int n2rng_control_configure_units(struct n2rng *np) */ for (esrc = 0; esrc < 3; esrc++) up->control[esrc] = base | - (esrc << RNG_CTL_VCO_SHIFT) | + (esrc << shift) | (RNG_CTL_ES1 << esrc); up->control[3] = base | diff --git a/drivers/char/hw_random/n2rng.h b/drivers/char/hw_random/n2rng.h index e41e55a7b227..6bad6cc634e8 100644 --- a/drivers/char/hw_random/n2rng.h +++ b/drivers/char/hw_random/n2rng.h @@ -6,18 +6,34 @@ #ifndef _N2RNG_H #define _N2RNG_H -#define RNG_CTL_WAIT 0x0000000001fffe00ULL /* Minimum wait time */ -#define RNG_CTL_WAIT_SHIFT 9 -#define RNG_CTL_BYPASS 0x0000000000000100ULL /* VCO voltage source */ -#define RNG_CTL_VCO 0x00000000000000c0ULL /* VCO rate control */ -#define RNG_CTL_VCO_SHIFT 6 -#define RNG_CTL_ASEL 0x0000000000000030ULL /* Analog MUX select */ -#define RNG_CTL_ASEL_SHIFT 4 +/* ver1 devices - n2-rng, vf-rng, kt-rng */ +#define RNG_v1_CTL_WAIT 0x0000000001fffe00ULL /* Minimum wait time */ +#define RNG_v1_CTL_WAIT_SHIFT 9 +#define RNG_v1_CTL_BYPASS 0x0000000000000100ULL /* VCO voltage source */ +#define RNG_v1_CTL_VCO 0x00000000000000c0ULL /* VCO rate control */ +#define RNG_v1_CTL_VCO_SHIFT 6 +#define RNG_v1_CTL_ASEL 0x0000000000000030ULL /* Analog MUX select */ +#define RNG_v1_CTL_ASEL_SHIFT 4 +#define RNG_v1_CTL_ASEL_NOOUT 2 + +/* these are the same in v2 as in v1 */ #define RNG_CTL_LFSR 0x0000000000000008ULL /* Use LFSR or plain shift */ #define RNG_CTL_ES3 0x0000000000000004ULL /* Enable entropy source 3 */ #define RNG_CTL_ES2 0x0000000000000002ULL /* Enable entropy source 2 */ #define RNG_CTL_ES1 0x0000000000000001ULL /* Enable entropy source 1 */ +/* ver2 devices - m4-rng, m7-rng */ +#define RNG_v2_CTL_WAIT 0x0000000007fff800ULL /* Minimum wait time */ +#define RNG_v2_CTL_WAIT_SHIFT 12 +#define RNG_v2_CTL_BYPASS 0x0000000000000400ULL /* VCO voltage source */ +#define RNG_v2_CTL_VCO 0x0000000000000300ULL /* VCO rate control */ +#define RNG_v2_CTL_VCO_SHIFT 9 +#define RNG_v2_CTL_PERF 0x0000000000000180ULL /* Perf */ +#define RNG_v2_CTL_ASEL 0x0000000000000070ULL /* Analog MUX select */ +#define RNG_v2_CTL_ASEL_SHIFT 4 +#define RNG_v2_CTL_ASEL_NOOUT 7 + + #define HV_FAST_RNG_GET_DIAG_CTL 0x130 #define HV_FAST_RNG_CTL_READ 0x131 #define HV_FAST_RNG_CTL_WRITE 0x132 @@ -112,8 +128,10 @@ struct n2rng { u64 scratch_control[HV_RNG_NUM_CONTROL]; -#define SELFTEST_TICKS 38859 -#define SELFTEST_VAL ((u64)0xB8820C7BD387E32C) +#define RNG_v1_SELFTEST_TICKS 38859 +#define RNG_v1_SELFTEST_VAL ((u64)0xB8820C7BD387E32C) +#define RNG_v2_SELFTEST_TICKS 64 +#define RNG_v2_SELFTEST_VAL ((u64)0xffffffffffffffff) #define SELFTEST_POLY ((u64)0x231DCEE91262B8A3) #define SELFTEST_MATCH_GOAL 6 #define SELFTEST_LOOPS_MAX 40000 From 0ff1436fb2e3da085f7177d03ce4362c45b75d57 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 12 Jan 2017 10:52:49 -0800 Subject: [PATCH 049/142] hwrng: n2 - update version info Signed-off-by: Shannon Nelson Signed-off-by: Herbert Xu --- drivers/char/hw_random/n2-drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index f0bd5ee32122..31cbdbbaebfc 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -21,11 +21,11 @@ #define DRV_MODULE_NAME "n2rng" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "0.2" -#define DRV_MODULE_RELDATE "July 27, 2011" +#define DRV_MODULE_VERSION "0.3" +#define DRV_MODULE_RELDATE "Jan 7, 2017" static char version[] = - DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + DRV_MODULE_NAME " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); MODULE_DESCRIPTION("Niagara2 RNG driver"); @@ -765,7 +765,7 @@ static int n2rng_probe(struct platform_device *op) "multi-unit-capable" : "single-unit"), np->num_units); - np->hwrng.name = "n2rng"; + np->hwrng.name = DRV_MODULE_NAME; np->hwrng.data_read = n2rng_data_read; np->hwrng.priv = (unsigned long) np; From 658fa754cd0700e8e96d61898f0953c4514859c6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 13 Jan 2017 08:33:26 +0000 Subject: [PATCH 050/142] crypto: arm/aes - avoid reserved 'tt' mnemonic in asm code The ARMv8-M architecture introduces 'tt' and 'ttt' instructions, which means we can no longer use 'tt' as a register alias on recent versions of binutils for ARM. So replace the alias with 'ttab'. Fixes: 81edb4262975 ("crypto: arm/aes - replace scalar AES cipher") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-cipher-core.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S index b04261e1e068..c817a86c4ca8 100644 --- a/arch/arm/crypto/aes-cipher-core.S +++ b/arch/arm/crypto/aes-cipher-core.S @@ -18,7 +18,7 @@ rounds .req r1 in .req r2 out .req r3 - tt .req ip + ttab .req ip t0 .req lr t1 .req r2 @@ -34,9 +34,9 @@ .macro __load, out, in, idx .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 - ldr \out, [tt, \in, lsr #(8 * \idx) - 2] + ldr \out, [ttab, \in, lsr #(8 * \idx) - 2] .else - ldr \out, [tt, \in, lsl #2] + ldr \out, [ttab, \in, lsl #2] .endif .endm @@ -136,7 +136,7 @@ eor r6, r6, r10 eor r7, r7, r11 - __adrl tt, \ttab + __adrl ttab, \ttab tst rounds, #2 bne 1f @@ -146,7 +146,7 @@ 1: subs rounds, rounds, #4 \round r8, r9, r10, r11, r4, r5, r6, r7 - __adrl tt, \ltab, ls + __adrl ttab, \ltab, ls \round r4, r5, r6, r7, r8, r9, r10, r11 bhi 0b From 87170961f31294dd213e0427bc7cea3283d91b84 Mon Sep 17 00:00:00 2001 From: "Gonglei \\(Arei\\)" Date: Fri, 13 Jan 2017 17:34:16 +0800 Subject: [PATCH 051/142] crypto: virtio - adjust priority of algorithm Some hardware accelerators (like intel aesni or the s390 cpacf functions) have lower priorities than virtio crypto, and those drivers are faster than the same in the host via virtio. So let's lower the priority of virtio-crypto's algorithm, make it's higher than software implementations but lower than the hardware ones. Suggested-by: Christian Borntraeger Signed-off-by: Gonglei Acked-by: Christian Borntraeger Signed-off-by: Herbert Xu --- drivers/crypto/virtio/virtio_crypto_algs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c index 970d0cafd25a..49defda4e03d 100644 --- a/drivers/crypto/virtio/virtio_crypto_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_algs.c @@ -505,7 +505,7 @@ void virtio_crypto_ablkcipher_finalize_req( static struct crypto_alg virtio_crypto_algs[] = { { .cra_name = "cbc(aes)", .cra_driver_name = "virtio_crypto_aes_cbc", - .cra_priority = 501, + .cra_priority = 150, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct virtio_crypto_ablkcipher_ctx), From 3bfb2e6b32443841ff90460a78bdefb19f8d61e8 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sun, 15 Jan 2017 13:37:50 +0100 Subject: [PATCH 052/142] crypto: img-hash - use dma_data_direction when calling dma_map_sg The fourth argument of dma_map_sg() and dma_unmap_sg() is an item of dma_data_direction enum. Function img_hash_xmit_dma() wrongly used DMA_MEM_TO_DEV, which is an item of dma_transfer_direction enum. Replace DMA_MEM_TO_DEV (which value is 1) with DMA_TO_DEVICE (which value is fortunately also 1) when calling dma_map_sg() and dma_unmap_sg(). Signed-off-by: Nicolas Iooss Signed-off-by: Herbert Xu --- drivers/crypto/img-hash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index a2e77b87485b..9b07f3d88feb 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -226,7 +226,7 @@ static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg) struct dma_async_tx_descriptor *desc; struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); - ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV); + ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); if (ctx->dma_ct == 0) { dev_err(hdev->dev, "Invalid DMA sg\n"); hdev->err = -EINVAL; @@ -241,7 +241,7 @@ static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg) if (!desc) { dev_err(hdev->dev, "Null DMA descriptor\n"); hdev->err = -EINVAL; - dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV); + dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); return -EINVAL; } desc->callback = img_hash_dma_callback; From 76512f2d8ca5e68160f4df0dead5e8e7b4bdb08e Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Wed, 18 Jan 2017 14:54:05 +0100 Subject: [PATCH 053/142] crypto: tcrypt - Add debug prints tcrypt is very tight-lipped when it succeeds, but a bit more feedback would be useful when developing or debugging crypto drivers, especially since even a successful run ends with the module failing to insert. Add a couple of debug prints, which can be enabled with dynamic debug: Before: # insmod tcrypt.ko mode=10 insmod: can't insert 'tcrypt.ko': Resource temporarily unavailable After: # insmod tcrypt.ko mode=10 dyndbg tcrypt: testing ecb(aes) tcrypt: testing cbc(aes) tcrypt: testing lrw(aes) tcrypt: testing xts(aes) tcrypt: testing ctr(aes) tcrypt: testing rfc3686(ctr(aes)) tcrypt: all tests passed insmod: can't insert 'tcrypt.ko': Resource temporarily unavailable Signed-off-by: Rabin Vincent Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index ae22f05d5936..9a11f3c2bf98 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -22,6 +22,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1010,6 +1012,8 @@ static inline int tcrypt_test(const char *alg) { int ret; + pr_debug("testing %s\n", alg); + ret = alg_test(alg, alg, 0, 0); /* non-fips algs return -EINVAL in fips mode */ if (fips_enabled && ret == -EINVAL) @@ -2059,6 +2063,8 @@ static int __init tcrypt_mod_init(void) if (err) { printk(KERN_ERR "tcrypt: one or more tests failed!\n"); goto err_free_tv; + } else { + pr_debug("all tests passed\n"); } /* We intentionaly return -EAGAIN to prevent keeping the module, From 13954e788dc7e3a47cc00419c120a63756be4eb1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 19 Jan 2017 12:23:32 +0000 Subject: [PATCH 054/142] crypto: arm/aes-neonbs - fix issue with v2.22 and older assembler The GNU assembler for ARM version 2.22 or older fails to infer the element size from the vmov instructions, and aborts the build in the following way; .../aes-neonbs-core.S: Assembler messages: .../aes-neonbs-core.S:817: Error: bad type for scalar -- `vmov q1h[1],r10' .../aes-neonbs-core.S:817: Error: bad type for scalar -- `vmov q1h[0],r9' .../aes-neonbs-core.S:817: Error: bad type for scalar -- `vmov q1l[1],r8' .../aes-neonbs-core.S:817: Error: bad type for scalar -- `vmov q1l[0],r7' .../aes-neonbs-core.S:818: Error: bad type for scalar -- `vmov q2h[1],r10' .../aes-neonbs-core.S:818: Error: bad type for scalar -- `vmov q2h[0],r9' .../aes-neonbs-core.S:818: Error: bad type for scalar -- `vmov q2l[1],r8' .../aes-neonbs-core.S:818: Error: bad type for scalar -- `vmov q2l[0],r7' Fix this by setting the element size explicitly, by replacing vmov with vmov.32. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-neonbs-core.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S index c9477044fbba..12da247164d1 100644 --- a/arch/arm/crypto/aes-neonbs-core.S +++ b/arch/arm/crypto/aes-neonbs-core.S @@ -766,13 +766,13 @@ ENTRY(aesbs_cbc_decrypt) ENDPROC(aesbs_cbc_decrypt) .macro next_ctr, q - vmov \q\()h[1], r10 + vmov.32 \q\()h[1], r10 adds r10, r10, #1 - vmov \q\()h[0], r9 + vmov.32 \q\()h[0], r9 adcs r9, r9, #0 - vmov \q\()l[1], r8 + vmov.32 \q\()l[1], r8 adcs r8, r8, #0 - vmov \q\()l[0], r7 + vmov.32 \q\()l[0], r7 adc r7, r7, #0 vrev32.8 \q, \q .endm From 587d531b8f67ebe62f8326849a7a685a03cbc904 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 19 Jan 2017 22:28:05 +0100 Subject: [PATCH 055/142] crypto: x86/crc32c - fix %progbits -> @progbits %progbits form is used on ARM (where @ is a comment char). x86 consistently uses @progbits everywhere else. Signed-off-by: Denys Vlasenko CC: Herbert Xu CC: Josh Poimboeuf CC: Xiaodong Liu CC: Megha Dey CC: George Spelvin CC: linux-crypto@vger.kernel.org CC: x86@kernel.org CC: linux-kernel@vger.kernel.org Reviewed-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index dc05f010ca9b..7a7de27c6f41 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -312,7 +312,7 @@ do_return: ret ENDPROC(crc_pcl) -.section .rodata, "a", %progbits +.section .rodata, "a", @progbits ################################################################ ## jump table Table is 129 entries x 2 bytes each ################################################################ From e183914af00e15eb41ae666d44e323bfa154be13 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 19 Jan 2017 22:33:04 +0100 Subject: [PATCH 056/142] crypto: x86 - make constants readonly, allow linker to merge them A lot of asm-optimized routines in arch/x86/crypto/ keep its constants in .data. This is wrong, they should be on .rodata. Mnay of these constants are the same in different modules. For example, 128-bit shuffle mask 0x000102030405060708090A0B0C0D0E0F exists in at least half a dozen places. There is a way to let linker merge them and use just one copy. The rules are as follows: mergeable objects of different sizes should not share sections. You can't put them all in one .rodata section, they will lose "mergeability". GCC puts its mergeable constants in ".rodata.cstSIZE" sections, or ".rodata.cstSIZE." if -fdata-sections is used. This patch does the same: .section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 It is important that all data in such section consists of 16-byte elements, not larger ones, and there are no implicit use of one element from another. When this is not the case, use non-mergeable section: .section .rodata[.VAR_NAME], "a", @progbits This reduces .data by ~15 kbytes: text data bss dec hex filename 11097415 2705840 2630712 16433967 fac32f vmlinux-prev.o 11112095 2690672 2630712 16433479 fac147 vmlinux.o Merged objects are visible in System.map: ffffffff81a28810 r POLY ffffffff81a28810 r POLY ffffffff81a28820 r TWOONE ffffffff81a28820 r TWOONE ffffffff81a28830 r PSHUFFLE_BYTE_FLIP_MASK <- merged regardless of ffffffff81a28830 r SHUF_MASK <------------- the name difference ffffffff81a28830 r SHUF_MASK ffffffff81a28830 r SHUF_MASK .. ffffffff81a28d00 r K512 <- merged three identical 640-byte tables ffffffff81a28d00 r K512 ffffffff81a28d00 r K512 Use of object names in section name suffixes is not strictly necessary, but might help if someday link stage will use garbage collection to eliminate unused sections (ld --gc-sections). Signed-off-by: Denys Vlasenko CC: Herbert Xu CC: Josh Poimboeuf CC: Xiaodong Liu CC: Megha Dey CC: linux-crypto@vger.kernel.org CC: x86@kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 43 ++++++++++++++----- arch/x86/crypto/aesni-intel_avx-x86_64.S | 32 ++++++++++---- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 5 ++- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 12 ++++-- arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 14 +++++- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 12 +++++- arch/x86/crypto/chacha20-avx2-x86_64.S | 9 +++- arch/x86/crypto/chacha20-ssse3-x86_64.S | 7 ++- arch/x86/crypto/crct10dif-pcl-asm_64.S | 14 ++++-- arch/x86/crypto/des3_ede-asm_64.S | 2 +- arch/x86/crypto/ghash-clmulni-intel_asm.S | 3 +- arch/x86/crypto/poly1305-avx2-x86_64.S | 6 ++- arch/x86/crypto/poly1305-sse2-x86_64.S | 6 ++- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 5 ++- arch/x86/crypto/serpent-avx2-asm_64.S | 9 +++- .../crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S | 6 ++- .../crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S | 3 +- arch/x86/crypto/sha1-mb/sha1_x8_avx2.S | 15 ++++++- arch/x86/crypto/sha1_ni_asm.S | 8 ++-- arch/x86/crypto/sha256-avx-asm.S | 9 +++- arch/x86/crypto/sha256-avx2-asm.S | 9 +++- .../sha256-mb/sha256_mb_mgr_flush_avx2.S | 6 ++- .../sha256-mb/sha256_mb_mgr_submit_avx2.S | 3 +- arch/x86/crypto/sha256-mb/sha256_x8_avx2.S | 7 ++- arch/x86/crypto/sha256-ssse3-asm.S | 8 +++- arch/x86/crypto/sha256_ni_asm.S | 4 +- arch/x86/crypto/sha512-avx-asm.S | 9 ++-- arch/x86/crypto/sha512-avx2-asm.S | 10 ++++- .../sha512-mb/sha512_mb_mgr_flush_avx2.S | 10 ++++- .../sha512-mb/sha512_mb_mgr_submit_avx2.S | 4 +- arch/x86/crypto/sha512-mb/sha512_x4_avx2.S | 4 +- arch/x86/crypto/sha512-ssse3-asm.S | 9 ++-- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 6 ++- 33 files changed, 232 insertions(+), 77 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 383a6f84a060..3c465184ff8a 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -46,27 +46,48 @@ #ifdef __x86_64__ -.data +# constants in mergeable sections, linker can reorder and merge +.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 .align 16 .Lgf128mul_x_ble_mask: .octa 0x00000000000000010000000000000087 +.section .rodata.cst16.POLY, "aM", @progbits, 16 +.align 16 POLY: .octa 0xC2000000000000000000000000000001 +.section .rodata.cst16.TWOONE, "aM", @progbits, 16 +.align 16 TWOONE: .octa 0x00000001000000000000000000000001 +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 +SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +.section .rodata.cst16.MASK1, "aM", @progbits, 16 +.align 16 +MASK1: .octa 0x0000000000000000ffffffffffffffff +.section .rodata.cst16.MASK2, "aM", @progbits, 16 +.align 16 +MASK2: .octa 0xffffffffffffffff0000000000000000 +.section .rodata.cst16.ONE, "aM", @progbits, 16 +.align 16 +ONE: .octa 0x00000000000000000000000000000001 +.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16 +.align 16 +F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 +.section .rodata.cst16.dec, "aM", @progbits, 16 +.align 16 +dec: .octa 0x1 +.section .rodata.cst16.enc, "aM", @progbits, 16 +.align 16 +enc: .octa 0x2 + # order of these constants should not change. # more specifically, ALL_F should follow SHIFT_MASK, -# and ZERO should follow ALL_F - -SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F -MASK1: .octa 0x0000000000000000ffffffffffffffff -MASK2: .octa 0xffffffffffffffff0000000000000000 +# and zero should follow ALL_F +.section .rodata, "a", @progbits +.align 16 SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 ALL_F: .octa 0xffffffffffffffffffffffffffffffff -ZERO: .octa 0x00000000000000000000000000000000 -ONE: .octa 0x00000000000000000000000000000001 -F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 -dec: .octa 0x1 -enc: .octa 0x2 + .octa 0x00000000000000000000000000000000 .text diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 522ab68d1c88..d664382c6e56 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -122,22 +122,38 @@ #include #include -.data +# constants in mergeable sections, linker can reorder and merge +.section .rodata.cst16.POLY, "aM", @progbits, 16 .align 16 - POLY: .octa 0xC2000000000000000000000000000001 + +.section .rodata.cst16.POLY2, "aM", @progbits, 16 +.align 16 POLY2: .octa 0xC20000000000000000000001C2000000 + +.section .rodata.cst16.TWOONE, "aM", @progbits, 16 +.align 16 TWOONE: .octa 0x00000001000000000000000000000001 -# order of these constants should not change. -# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F - +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F + +.section .rodata.cst16.ONE, "aM", @progbits, 16 +.align 16 +ONE: .octa 0x00000000000000000000000000000001 + +.section .rodata.cst16.ONEf, "aM", @progbits, 16 +.align 16 +ONEf: .octa 0x01000000000000000000000000000000 + +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F +.section .rodata, "a", @progbits +.align 16 SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 ALL_F: .octa 0xffffffffffffffffffffffffffffffff -ZERO: .octa 0x00000000000000000000000000000000 -ONE: .octa 0x00000000000000000000000000000001 -ONEf: .octa 0x01000000000000000000000000000000 + .octa 0x00000000000000000000000000000000 .text diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index aa9e8bd163f6..f7c495e2863c 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -571,7 +571,9 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vmovdqu y6, 14 * 16(rio); \ vmovdqu y7, 15 * 16(rio); -.data + +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 .align 16 #define SHUFB_BYTES(idx) \ @@ -711,6 +713,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 /* 4-bit mask */ +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 .align 4 .L0f0f0f0f: .long 0x0f0f0f0f diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 16186c18656d..eee5b3982cfd 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -610,20 +610,25 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vmovdqu y6, 14 * 32(rio); \ vmovdqu y7, 15 * 32(rio); -.data -.align 32 +.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 +.align 32 #define SHUFB_BYTES(idx) \ 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) - .Lshufb_16x16b: .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) +.section .rodata.cst32.pack_bswap, "aM", @progbits, 32 +.align 32 .Lpack_bswap: .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + /* For CTR-mode IV byteswap */ .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 @@ -750,6 +755,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 .align 4 /* 4-bit mask */ .L0f0f0f0f: diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 14fa1966bf01..b4a8806234ea 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -195,19 +195,29 @@ vpshufb rmask, x0, x0; \ vpshufb rmask, x1, x1; -.data - +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 .align 16 .Lbswap_mask: .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 +.align 16 .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.section .rodata.cst16.bswap_iv_mask, "aM", @progbits, 16 +.align 16 .Lbswap_iv_mask: .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst4.16_mask, "aM", @progbits, 4 +.align 4 .L16_mask: .byte 16, 16, 16, 16 +.section .rodata.cst4.32_mask, "aM", @progbits, 4 +.align 4 .L32_mask: .byte 32, 0, 0, 0 +.section .rodata.cst4.first_mask, "aM", @progbits, 4 +.align 4 .Lfirst_mask: .byte 0x1f, 0, 0, 0 diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index c419389889cd..952d3156a933 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -225,8 +225,7 @@ vpshufb rmask, x2, x2; \ vpshufb rmask, x3, x3; -.data - +.section .rodata.cst16, "aM", @progbits, 16 .align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 @@ -244,10 +243,19 @@ .byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0 .Lrkr_dec_QBAR_QBAR_QBAR_QBAR: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst4.L16_mask, "aM", @progbits, 4 +.align 4 .L16_mask: .byte 16, 16, 16, 16 + +.section .rodata.cst4.L32_mask, "aM", @progbits, 4 +.align 4 .L32_mask: .byte 32, 0, 0, 0 + +.section .rodata.cst4.first_mask, "aM", @progbits, 4 +.align 4 .Lfirst_mask: .byte 0x1f, 0, 0, 0 diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S index 16694e625f77..3a2dc3dc6cac 100644 --- a/arch/x86/crypto/chacha20-avx2-x86_64.S +++ b/arch/x86/crypto/chacha20-avx2-x86_64.S @@ -11,13 +11,18 @@ #include -.data +.section .rodata.cst32.ROT8, "aM", @progbits, 32 .align 32 - ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 .octa 0x0e0d0c0f0a09080b0605040702010003 + +.section .rodata.cst32.ROT16, "aM", @progbits, 32 +.align 32 ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 .octa 0x0d0c0f0e09080b0a0504070601000302 + +.section .rodata.cst32.CTRINC, "aM", @progbits, 32 +.align 32 CTRINC: .octa 0x00000003000000020000000100000000 .octa 0x00000007000000060000000500000004 diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S index 3a33124e9112..3f511a7d73b8 100644 --- a/arch/x86/crypto/chacha20-ssse3-x86_64.S +++ b/arch/x86/crypto/chacha20-ssse3-x86_64.S @@ -11,11 +11,14 @@ #include -.data +.section .rodata.cst16.ROT8, "aM", @progbits, 16 .align 16 - ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 +.section .rodata.cst16.ROT16, "aM", @progbits, 16 +.align 16 ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 +.section .rodata.cst16.CTRINC, "aM", @progbits, 16 +.align 16 CTRINC: .octa 0x00000003000000020000000100000000 .text diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index 35e97569d05f..de04d3e98d8d 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -554,12 +554,11 @@ _only_less_than_2: ENDPROC(crc_t10dif_pcl) -.data - +.section .rodata, "a", @progbits +.align 16 # precomputed constants # these constants are precomputed from the poly: # 0x8bb70000 (0x8bb7 scaled to 32 bits) -.align 16 # Q = 0x18BB70000 # rk1 = 2^(32*3) mod Q << 32 # rk2 = 2^(32*5) mod Q << 32 @@ -613,14 +612,23 @@ rk20: +.section .rodata.cst16.mask1, "aM", @progbits, 16 +.align 16 mask1: .octa 0x80808080808080808080808080808080 + +.section .rodata.cst16.mask2, "aM", @progbits, 16 +.align 16 mask2: .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +.section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32 +.align 32 pshufb_shf_table: # use these values for shift constants for the pshufb instruction # different alignments result in values as shown: diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index 038f6ae87c5e..f3e91647ca27 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -537,7 +537,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) ret; ENDPROC(des3_ede_x86_64_crypt_blk_3way) -.data +.section .rodata, "a", @progbits .align 16 .L_s1: .quad 0x0010100001010400, 0x0000000000000000 diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index eed55c8cca4f..f94375a8dcd1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -20,8 +20,7 @@ #include #include -.data - +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 .align 16 .Lbswap_mask: .octa 0x000102030405060708090a0b0c0d0e0f diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S index eff2f414e22b..3b6e70d085da 100644 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ b/arch/x86/crypto/poly1305-avx2-x86_64.S @@ -11,11 +11,13 @@ #include -.data +.section .rodata.cst32.ANMASK, "aM", @progbits, 32 .align 32 - ANMASK: .octa 0x0000000003ffffff0000000003ffffff .octa 0x0000000003ffffff0000000003ffffff + +.section .rodata.cst32.ORMASK, "aM", @progbits, 32 +.align 32 ORMASK: .octa 0x00000000010000000000000001000000 .octa 0x00000000010000000000000001000000 diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index 338c748054ed..c88c670cb5fc 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S @@ -11,10 +11,12 @@ #include -.data +.section .rodata.cst16.ANMASK, "aM", @progbits, 16 .align 16 - ANMASK: .octa 0x0000000003ffffff0000000003ffffff + +.section .rodata.cst16.ORMASK, "aM", @progbits, 16 +.align 16 ORMASK: .octa 0x00000000010000000000000001000000 .text diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 8be571808342..2925077f8c6a 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -29,11 +29,12 @@ .file "serpent-avx-x86_64-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index 97c48add33ed..d67888f2a52a 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -20,13 +20,18 @@ .file "serpent-avx2-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask_0: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask_1: .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S index 96df6a39d7e2..93b945597ecf 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S @@ -281,11 +281,13 @@ ENTRY(sha1_mb_mgr_get_comp_job_avx2) ret ENDPROC(sha1_mb_mgr_get_comp_job_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 one: .quad 1 two: diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S index 63a0d9c8e31f..7a93b1c0d69a 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S @@ -203,8 +203,7 @@ return_null: ENDPROC(sha1_mb_mgr_submit_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S index c9dae1cd2919..20f77aa633de 100644 --- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S @@ -461,21 +461,32 @@ lloop: ENDPROC(sha1_x8_avx2) -.data - +.section .rodata.cst32.K00_19, "aM", @progbits, 32 .align 32 K00_19: .octa 0x5A8279995A8279995A8279995A827999 .octa 0x5A8279995A8279995A8279995A827999 + +.section .rodata.cst32.K20_39, "aM", @progbits, 32 +.align 32 K20_39: .octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 .octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 + +.section .rodata.cst32.K40_59, "aM", @progbits, 32 +.align 32 K40_59: .octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC .octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC + +.section .rodata.cst32.K60_79, "aM", @progbits, 32 +.align 32 K60_79: .octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 .octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 874a651b9e7d..ebbdba72ae07 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -293,10 +293,12 @@ ENTRY(sha1_ni_transform) ret ENDPROC(sha1_ni_transform) -.data - -.align 64 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x000102030405060708090a0b0c0d0e0f + +.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 +.align 16 UPPER_WORD_MASK: .octa 0xFFFFFFFF000000000000000000000000 diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 92b3b5d75ba9..e08888a1a5f2 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -463,7 +463,7 @@ done_hash: ret ENDPROC(sha256_transform_avx) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -483,14 +483,21 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16 +.align 16 # shuffle xBxA -> 00BA _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 +.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16 +.align 16 # shuffle xDxC -> DC00 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 570ec5ec62d7..89c8f09787d2 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -723,7 +723,7 @@ done_hash: ret ENDPROC(sha256_transform_rorx) -.data +.section .rodata.cst512.K256, "aM", @progbits, 512 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -759,14 +759,21 @@ K256: .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203 # shuffle xBxA -> 00BA +.section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32 +.align 32 _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100 # shuffle xDxC -> DC00 +.section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32 +.align 32 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S index a78a0694ddef..8fe6338bcc84 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -284,11 +284,13 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) ret ENDPROC(sha256_mb_mgr_get_comp_job_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 one: .quad 1 two: diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S index 7ea670e25acc..b36ae7454084 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S @@ -208,8 +208,7 @@ return_null: ENDPROC(sha256_mb_mgr_submit_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S index aa21aea4c722..1687c80c5995 100644 --- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S @@ -437,7 +437,8 @@ Lrounds_16_xx: ret ENDPROC(sha256_x8_avx2) -.data + +.section .rodata.K256_8, "a", @progbits .align 64 K256_8: .octa 0x428a2f98428a2f98428a2f98428a2f98 @@ -568,10 +569,14 @@ K256_8: .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 .octa 0xc67178f2c67178f2c67178f2c67178f2 .octa 0xc67178f2c67178f2c67178f2c67178f2 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 .global K256 K256: diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 2cedc44e8121..39b83c93e7fd 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -474,7 +474,7 @@ done_hash: ret ENDPROC(sha256_transform_ssse3) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -494,13 +494,19 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16 +.align 16 # shuffle xBxA -> 00BA _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 +.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16 +.align 16 # shuffle xDxC -> DC00 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 748cdf21a938..fb58f58ecfbc 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -329,7 +329,7 @@ ENTRY(sha256_ni_transform) ret ENDPROC(sha256_ni_transform) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -349,5 +349,7 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 565274d6a641..39235fefe6f7 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -370,14 +370,17 @@ ENDPROC(sha512_transform_avx) ######################################################################## ### Binary Data -.data - +.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16 .align 16 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. XMM_QWORD_BSWAP: .octa 0x08090a0b0c0d0e0f0001020304050607 +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 # K[t] used in SHA512 hashing K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 1f20b35d8573..7f5f6c6ec72e 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -684,8 +684,11 @@ ENDPROC(sha512_transform_rorx) ######################################################################## ### Binary Data -.data +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 .align 64 # K[t] used in SHA512 hashing K512: @@ -730,14 +733,17 @@ K512: .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 .align 32 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 .octa 0x18191a1b1c1d1e1f1011121314151617 +.section .rodata.cst32.MASK_YMM_LO, "aM", @progbits, 32 +.align 32 MASK_YMM_LO: .octa 0x00000000000000000000000000000000 .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S index 3ddba19a0db6..7c629caebc05 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S @@ -280,12 +280,18 @@ ENTRY(sha512_mb_mgr_get_comp_job_avx2) pop %rbx ret ENDPROC(sha512_mb_mgr_get_comp_job_avx2) -.data -.align 16 +.section .rodata.cst8.one, "aM", @progbits, 8 +.align 8 one: .quad 1 + +.section .rodata.cst8.two, "aM", @progbits, 8 +.align 8 two: .quad 2 + +.section .rodata.cst8.three, "aM", @progbits, 8 +.align 8 three: .quad 3 diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S index 815f07bdd1f8..4ba709ba78e5 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S @@ -209,8 +209,9 @@ return_null: xor job_rax, job_rax jmp return ENDPROC(sha512_mb_mgr_submit_avx2) -.data +/* UNUSED? +.section .rodata.cst16, "aM", @progbits, 16 .align 16 H0: .int 0x6a09e667 H1: .int 0xbb67ae85 @@ -220,3 +221,4 @@ H4: .int 0x510e527f H5: .int 0x9b05688c H6: .int 0x1f83d9ab H7: .int 0x5be0cd19 +*/ diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S index 31ab1eff6413..e22e907643a6 100644 --- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S @@ -361,7 +361,7 @@ Lrounds_16_xx: ret ENDPROC(sha512_x4_avx2) -.data +.section .rodata.K512_4, "a", @progbits .align 64 K512_4: .octa 0x428a2f98d728ae22428a2f98d728ae22,\ @@ -525,5 +525,7 @@ K512_4: .octa 0x6c44198c4a4758176c44198c4a475817,\ 0x6c44198c4a4758176c44198c4a475817 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 .octa 0x18191a1b1c1d1e1f1011121314151617 diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index e610e29cbc81..66bbd9058a90 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -369,14 +369,17 @@ ENDPROC(sha512_transform_ssse3) ######################################################################## ### Binary Data -.data - +.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16 .align 16 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. XMM_QWORD_BSWAP: .octa 0x08090a0b0c0d0e0f0001020304050607 +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 # K[t] used in SHA512 hashing K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index dc66273e610d..b3f49d286348 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -29,11 +29,13 @@ .file "twofish-avx-x86_64-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 From a873996238e4019c54c49b56fcc1fef35a93da41 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Fri, 20 Jan 2017 13:41:08 +0800 Subject: [PATCH 057/142] crypto: mediatek - move HW control data to transformation context This patch moves hardware control block members from mtk_*_rec to transformation context and refines related definition. This makes operational context to manage its own control information easily for each DMA transfer. Signed-off-by: Ryder Lee Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-aes.c | 144 ++++++++++++------------- drivers/crypto/mediatek/mtk-platform.h | 26 +---- drivers/crypto/mediatek/mtk-sha.c | 101 +++++++++-------- 3 files changed, 126 insertions(+), 145 deletions(-) diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c index 1370cabeeb5b..126b93ce2b4e 100644 --- a/drivers/crypto/mediatek/mtk-aes.c +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -20,23 +20,25 @@ #define AES_BUF_SIZE ((PAGE_SIZE << AES_BUF_ORDER) \ & ~(AES_BLOCK_SIZE - 1)) -/* AES command token */ +/* AES command token size */ #define AES_CT_SIZE_ECB 2 #define AES_CT_SIZE_CBC 3 #define AES_CT_CTRL_HDR cpu_to_le32(0x00220000) -#define AES_COMMAND0 cpu_to_le32(0x05000000) -#define AES_COMMAND1 cpu_to_le32(0x2d060000) -#define AES_COMMAND2 cpu_to_le32(0xe4a63806) +/* AES-CBC/ECB command token */ +#define AES_CMD0 cpu_to_le32(0x05000000) +#define AES_CMD1 cpu_to_le32(0x2d060000) +#define AES_CMD2 cpu_to_le32(0xe4a63806) -/* AES transform information */ -#define AES_TFM_ECB cpu_to_le32(0x0 << 0) -#define AES_TFM_CBC cpu_to_le32(0x1 << 0) -#define AES_TFM_DECRYPT cpu_to_le32(0x5 << 0) -#define AES_TFM_ENCRYPT cpu_to_le32(0x4 << 0) +/* AES transform information word 0 fields */ +#define AES_TFM_BASIC_OUT cpu_to_le32(0x4 << 0) +#define AES_TFM_BASIC_IN cpu_to_le32(0x5 << 0) #define AES_TFM_SIZE(x) cpu_to_le32((x) << 8) #define AES_TFM_128BITS cpu_to_le32(0xb << 16) #define AES_TFM_192BITS cpu_to_le32(0xd << 16) #define AES_TFM_256BITS cpu_to_le32(0xf << 16) +/* AES transform information word 1 fields */ +#define AES_TFM_ECB cpu_to_le32(0x0 << 0) +#define AES_TFM_CBC cpu_to_le32(0x1 << 0) #define AES_TFM_FULL_IV cpu_to_le32(0xf << 5) /* AES flags */ @@ -47,37 +49,24 @@ #define AES_FLAGS_BUSY BIT(3) /** - * mtk_aes_ct is a set of hardware instructions(command token) - * that are used to control engine's processing flow of AES. - */ -struct mtk_aes_ct { - __le32 ct_ctrl0; - __le32 ct_ctrl1; - __le32 ct_ctrl2; -}; - -/** - * mtk_aes_tfm is used to define AES transform state - * and contains all keys and initial vectors. - */ -struct mtk_aes_tfm { - __le32 tfm_ctrl0; - __le32 tfm_ctrl1; - __le32 state[SIZE_IN_WORDS(AES_KEYSIZE_256 + AES_BLOCK_SIZE)]; -}; - -/** - * mtk_aes_info consists of command token and transform state of AES, - * which should be encapsulated in command and result descriptors. + * Command token(CT) is a set of hardware instructions that + * are used to control engine's processing flow of AES. * - * The engine requires this information to do: + * Transform information(TFM) is used to define AES state and + * contains all keys and initial vectors. + * + * The engine requires CT and TFM to do: * - Commands decoding and control of the engine's data path. * - Coordinating hardware data fetch and store operations. * - Result token construction and output. */ -struct mtk_aes_info { - struct mtk_aes_ct ct; - struct mtk_aes_tfm tfm; +struct mtk_aes_ct { + __le32 cmd[AES_CT_SIZE_CBC]; +}; + +struct mtk_aes_tfm { + __le32 ctrl[2]; + __le32 state[SIZE_IN_WORDS(AES_KEYSIZE_256 + AES_BLOCK_SIZE)]; }; struct mtk_aes_reqctx { @@ -86,8 +75,15 @@ struct mtk_aes_reqctx { struct mtk_aes_ctx { struct mtk_cryp *cryp; - struct mtk_aes_info info; u32 keylen; + + struct mtk_aes_ct ct; + dma_addr_t ct_dma; + struct mtk_aes_tfm tfm; + dma_addr_t tfm_dma; + + __le32 ct_hdr; + u32 ct_size; }; struct mtk_aes_drv { @@ -174,57 +170,57 @@ static int mtk_aes_info_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, size_t len) { - struct mtk_aes_ctx *ctx = crypto_ablkcipher_ctx( - crypto_ablkcipher_reqtfm(aes->req)); - struct mtk_aes_info *info = aes->info; - struct mtk_aes_ct *ct = &info->ct; - struct mtk_aes_tfm *tfm = &info->tfm; + struct mtk_aes_ctx *ctx = aes->ctx; - aes->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len); + ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len); + ctx->ct.cmd[0] = AES_CMD0 | cpu_to_le32(len); + ctx->ct.cmd[1] = AES_CMD1; if (aes->flags & AES_FLAGS_ENCRYPT) - tfm->tfm_ctrl0 = AES_TFM_ENCRYPT; + ctx->tfm.ctrl[0] = AES_TFM_BASIC_OUT; else - tfm->tfm_ctrl0 = AES_TFM_DECRYPT; + ctx->tfm.ctrl[0] = AES_TFM_BASIC_IN; if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128)) - tfm->tfm_ctrl0 |= AES_TFM_128BITS; + ctx->tfm.ctrl[0] |= AES_TFM_128BITS; else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256)) - tfm->tfm_ctrl0 |= AES_TFM_256BITS; + ctx->tfm.ctrl[0] |= AES_TFM_256BITS; else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_192)) - tfm->tfm_ctrl0 |= AES_TFM_192BITS; - - ct->ct_ctrl0 = AES_COMMAND0 | cpu_to_le32(len); - ct->ct_ctrl1 = AES_COMMAND1; + ctx->tfm.ctrl[0] |= AES_TFM_192BITS; if (aes->flags & AES_FLAGS_CBC) { const u32 *iv = (const u32 *)aes->req->info; - u32 *iv_state = tfm->state + ctx->keylen; + u32 *iv_state = ctx->tfm.state + ctx->keylen; int i; - aes->ct_size = AES_CT_SIZE_CBC; - ct->ct_ctrl2 = AES_COMMAND2; - - tfm->tfm_ctrl0 |= AES_TFM_SIZE(ctx->keylen + + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen + SIZE_IN_WORDS(AES_BLOCK_SIZE)); - tfm->tfm_ctrl1 = AES_TFM_CBC | AES_TFM_FULL_IV; + ctx->tfm.ctrl[1] = AES_TFM_CBC | AES_TFM_FULL_IV; for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++) iv_state[i] = cpu_to_le32(iv[i]); + ctx->ct.cmd[2] = AES_CMD2; + ctx->ct_size = AES_CT_SIZE_CBC; } else if (aes->flags & AES_FLAGS_ECB) { - aes->ct_size = AES_CT_SIZE_ECB; - tfm->tfm_ctrl0 |= AES_TFM_SIZE(ctx->keylen); - tfm->tfm_ctrl1 = AES_TFM_ECB; + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen); + ctx->tfm.ctrl[1] = AES_TFM_ECB; + + ctx->ct_size = AES_CT_SIZE_ECB; } - aes->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(cryp->dev, aes->ct_dma))) { - dev_err(cryp->dev, "dma %zu bytes error\n", sizeof(*info)); + ctx->ct_dma = dma_map_single(cryp->dev, &ctx->ct, sizeof(ctx->ct), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) + return -EINVAL; + + ctx->tfm_dma = dma_map_single(cryp->dev, &ctx->tfm, sizeof(ctx->tfm), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->tfm_dma))) { + dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm), + DMA_TO_DEVICE); return -EINVAL; } - aes->tfm_dma = aes->ct_dma + sizeof(*ct); return 0; } @@ -253,10 +249,10 @@ static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) if (nents == 0) { res->hdr |= MTK_DESC_FIRST; cmd->hdr |= MTK_DESC_FIRST | - MTK_DESC_CT_LEN(aes->ct_size); - cmd->ct = cpu_to_le32(aes->ct_dma); - cmd->ct_hdr = aes->ct_hdr; - cmd->tfm = cpu_to_le32(aes->tfm_dma); + MTK_DESC_CT_LEN(aes->ctx->ct_size); + cmd->ct = cpu_to_le32(aes->ctx->ct_dma); + cmd->ct_hdr = aes->ctx->ct_hdr; + cmd->tfm = cpu_to_le32(aes->ctx->tfm_dma); } if (++ring->pos == MTK_DESC_NUM) @@ -396,7 +392,7 @@ static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, rctx->mode &= AES_FLAGS_MODE_MSK; /* Assign new request to device */ aes->req = req; - aes->info = &ctx->info; + aes->ctx = ctx; aes->flags = (aes->flags & ~AES_FLAGS_MODE_MSK) | rctx->mode; err = mtk_aes_map(cryp, aes); @@ -408,8 +404,12 @@ static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) { - dma_unmap_single(cryp->dev, aes->ct_dma, - sizeof(struct mtk_aes_info), DMA_TO_DEVICE); + struct mtk_aes_ctx *ctx = aes->ctx; + + dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct), + DMA_TO_DEVICE); + dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm), + DMA_TO_DEVICE); if (aes->src.sg == aes->dst.sg) { dma_unmap_sg(cryp->dev, aes->src.sg, @@ -454,7 +454,7 @@ static int mtk_aes_setkey(struct crypto_ablkcipher *tfm, { struct mtk_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); const u32 *key_tmp = (const u32 *)key; - u32 *key_state = ctx->info.tfm.state; + u32 *key_state = ctx->tfm.state; int i; if (keylen != AES_KEYSIZE_128 && diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h index 4d4309a007da..1516786b7a02 100644 --- a/drivers/crypto/mediatek/mtk-platform.h +++ b/drivers/crypto/mediatek/mtk-platform.h @@ -113,22 +113,20 @@ struct mtk_aes_dma { u32 sg_len; }; +struct mtk_aes_ctx; + /** * struct mtk_aes_rec - AES operation record * @queue: crypto request queue * @req: pointer to ablkcipher request * @task: the tasklet is use in AES interrupt + * @ctx: pointer to current context * @src: the structure that holds source sg list info * @dst: the structure that holds destination sg list info * @aligned_sg: the scatter list is use to alignment * @real_dst: pointer to the destination sg list * @total: request buffer length * @buf: pointer to page buffer - * @info: pointer to AES transform state and command token - * @ct_hdr: AES command token control field - * @ct_size: size of AES command token - * @ct_dma: DMA address of AES command token - * @tfm_dma: DMA address of AES transform state * @id: record identification * @flags: it's describing AES operation state * @lock: the ablkcipher queue lock @@ -139,6 +137,7 @@ struct mtk_aes_rec { struct crypto_queue queue; struct ablkcipher_request *req; struct tasklet_struct task; + struct mtk_aes_ctx *ctx; struct mtk_aes_dma src; struct mtk_aes_dma dst; @@ -148,12 +147,6 @@ struct mtk_aes_rec { size_t total; void *buf; - void *info; - __le32 ct_hdr; - u32 ct_size; - dma_addr_t ct_dma; - dma_addr_t tfm_dma; - u8 id; unsigned long flags; /* queue lock */ @@ -165,11 +158,6 @@ struct mtk_aes_rec { * @queue: crypto request queue * @req: pointer to ahash request * @task: the tasklet is use in SHA interrupt - * @info: pointer to SHA transform state and command token - * @ct_hdr: SHA command token control field - * @ct_size: size of SHA command token - * @ct_dma: DMA address of SHA command token - * @tfm_dma: DMA address of SHA transform state * @id: record identification * @flags: it's describing SHA operation state * @lock: the ablkcipher queue lock @@ -181,12 +169,6 @@ struct mtk_sha_rec { struct ahash_request *req; struct tasklet_struct task; - void *info; - __le32 ct_hdr; - u32 ct_size; - dma_addr_t ct_dma; - dma_addr_t tfm_dma; - u8 id; unsigned long flags; /* queue lock */ diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index f1e188bc203c..8cbff218debb 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -28,9 +28,9 @@ /* SHA command token */ #define SHA_CT_SIZE 5 #define SHA_CT_CTRL_HDR cpu_to_le32(0x02220000) -#define SHA_COMMAND0 cpu_to_le32(0x03020000) -#define SHA_COMMAND1 cpu_to_le32(0x21060000) -#define SHA_COMMAND2 cpu_to_le32(0xe0e63802) +#define SHA_CMD0 cpu_to_le32(0x03020000) +#define SHA_CMD1 cpu_to_le32(0x21060000) +#define SHA_CMD2 cpu_to_le32(0xe0e63802) /* SHA transform information */ #define SHA_TFM_HASH cpu_to_le32(0x2 << 0) @@ -66,11 +66,8 @@ * and it contains the first two words of transform state. */ struct mtk_sha_ct { - __le32 tfm_ctrl0; - __le32 tfm_ctrl1; - __le32 ct_ctrl0; - __le32 ct_ctrl1; - __le32 ct_ctrl2; + __le32 ctrl[2]; + __le32 cmd[3]; }; /** @@ -78,8 +75,7 @@ struct mtk_sha_ct { * and store result digest that produced by engine. */ struct mtk_sha_tfm { - __le32 tfm_ctrl0; - __le32 tfm_ctrl1; + __le32 ctrl[2]; __le32 digest[SIZE_IN_WORDS(SHA512_DIGEST_SIZE)]; }; @@ -102,6 +98,11 @@ struct mtk_sha_reqctx { size_t bufcnt; dma_addr_t dma_addr; + __le32 ct_hdr; + u32 ct_size; + dma_addr_t ct_dma; + dma_addr_t tfm_dma; + /* Walk state */ struct scatterlist *sg; u32 offset; /* Offset in current sg */ @@ -270,34 +271,32 @@ static void mtk_sha_fill_padding(struct mtk_sha_reqctx *ctx, u32 len) } /* Initialize basic transform information of SHA */ -static void mtk_sha_info_init(struct mtk_sha_rec *sha, - struct mtk_sha_reqctx *ctx) +static void mtk_sha_info_init(struct mtk_sha_reqctx *ctx) { - struct mtk_sha_info *info = sha->info; - struct mtk_sha_ct *ct = &info->ct; - struct mtk_sha_tfm *tfm = &info->tfm; + struct mtk_sha_ct *ct = &ctx->info.ct; + struct mtk_sha_tfm *tfm = &ctx->info.tfm; - sha->ct_hdr = SHA_CT_CTRL_HDR; - sha->ct_size = SHA_CT_SIZE; + ctx->ct_hdr = SHA_CT_CTRL_HDR; + ctx->ct_size = SHA_CT_SIZE; - tfm->tfm_ctrl0 = SHA_TFM_HASH | SHA_TFM_INNER_DIG | - SHA_TFM_SIZE(SIZE_IN_WORDS(ctx->ds)); + tfm->ctrl[0] = SHA_TFM_HASH | SHA_TFM_INNER_DIG | + SHA_TFM_SIZE(SIZE_IN_WORDS(ctx->ds)); switch (ctx->flags & SHA_FLAGS_ALGO_MSK) { case SHA_FLAGS_SHA1: - tfm->tfm_ctrl0 |= SHA_TFM_SHA1; + tfm->ctrl[0] |= SHA_TFM_SHA1; break; case SHA_FLAGS_SHA224: - tfm->tfm_ctrl0 |= SHA_TFM_SHA224; + tfm->ctrl[0] |= SHA_TFM_SHA224; break; case SHA_FLAGS_SHA256: - tfm->tfm_ctrl0 |= SHA_TFM_SHA256; + tfm->ctrl[0] |= SHA_TFM_SHA256; break; case SHA_FLAGS_SHA384: - tfm->tfm_ctrl0 |= SHA_TFM_SHA384; + tfm->ctrl[0] |= SHA_TFM_SHA384; break; case SHA_FLAGS_SHA512: - tfm->tfm_ctrl0 |= SHA_TFM_SHA512; + tfm->ctrl[0] |= SHA_TFM_SHA512; break; default: @@ -305,13 +304,13 @@ static void mtk_sha_info_init(struct mtk_sha_rec *sha, return; } - tfm->tfm_ctrl1 = SHA_TFM_HASH_STORE; - ct->tfm_ctrl0 = tfm->tfm_ctrl0 | SHA_TFM_CONTINUE | SHA_TFM_START; - ct->tfm_ctrl1 = tfm->tfm_ctrl1; + tfm->ctrl[1] = SHA_TFM_HASH_STORE; + ct->ctrl[0] = tfm->ctrl[0] | SHA_TFM_CONTINUE | SHA_TFM_START; + ct->ctrl[1] = tfm->ctrl[1]; - ct->ct_ctrl0 = SHA_COMMAND0; - ct->ct_ctrl1 = SHA_COMMAND1; - ct->ct_ctrl2 = SHA_COMMAND2 | SHA_TFM_DIGEST(SIZE_IN_WORDS(ctx->ds)); + ct->cmd[0] = SHA_CMD0; + ct->cmd[1] = SHA_CMD1; + ct->cmd[2] = SHA_CMD2 | SHA_TFM_DIGEST(SIZE_IN_WORDS(ctx->ds)); } /* @@ -323,28 +322,28 @@ static int mtk_sha_info_map(struct mtk_cryp *cryp, size_t len) { struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); - struct mtk_sha_info *info = sha->info; + struct mtk_sha_info *info = &ctx->info; struct mtk_sha_ct *ct = &info->ct; if (ctx->start) ctx->start = false; else - ct->tfm_ctrl0 &= ~SHA_TFM_START; + ct->ctrl[0] &= ~SHA_TFM_START; - sha->ct_hdr &= ~SHA_DATA_LEN_MSK; - sha->ct_hdr |= cpu_to_le32(len); - ct->ct_ctrl0 &= ~SHA_DATA_LEN_MSK; - ct->ct_ctrl0 |= cpu_to_le32(len); + ctx->ct_hdr &= ~SHA_DATA_LEN_MSK; + ctx->ct_hdr |= cpu_to_le32(len); + ct->cmd[0] &= ~SHA_DATA_LEN_MSK; + ct->cmd[0] |= cpu_to_le32(len); ctx->digcnt += len; - sha->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), + ctx->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), DMA_BIDIRECTIONAL); - if (unlikely(dma_mapping_error(cryp->dev, sha->ct_dma))) { + if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) { dev_err(cryp->dev, "dma %zu bytes error\n", sizeof(*info)); return -EINVAL; } - sha->tfm_dma = sha->ct_dma + sizeof(*ct); + ctx->tfm_dma = ctx->ct_dma + sizeof(*ct); return 0; } @@ -425,6 +424,7 @@ static int mtk_sha_init(struct ahash_request *req) static int mtk_sha_xmit(struct mtk_cryp *cryp, struct mtk_sha_rec *sha, dma_addr_t addr, size_t len) { + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); struct mtk_ring *ring = cryp->ring[sha->id]; struct mtk_desc *cmd = ring->cmd_base + ring->pos; struct mtk_desc *res = ring->res_base + ring->pos; @@ -444,12 +444,12 @@ static int mtk_sha_xmit(struct mtk_cryp *cryp, struct mtk_sha_rec *sha, cmd->hdr = MTK_DESC_FIRST | MTK_DESC_LAST | MTK_DESC_BUF_LEN(len) | - MTK_DESC_CT_LEN(sha->ct_size); + MTK_DESC_CT_LEN(ctx->ct_size); cmd->buf = cpu_to_le32(addr); - cmd->ct = cpu_to_le32(sha->ct_dma); - cmd->ct_hdr = sha->ct_hdr; - cmd->tfm = cpu_to_le32(sha->tfm_dma); + cmd->ct = cpu_to_le32(ctx->ct_dma); + cmd->ct_hdr = ctx->ct_hdr; + cmd->tfm = cpu_to_le32(ctx->tfm_dma); if (++ring->pos == MTK_DESC_NUM) ring->pos = 0; @@ -486,11 +486,11 @@ static int mtk_sha_xmit2(struct mtk_cryp *cryp, cmd->hdr = MTK_DESC_BUF_LEN(len1) | MTK_DESC_FIRST | - MTK_DESC_CT_LEN(sha->ct_size); + MTK_DESC_CT_LEN(ctx->ct_size); cmd->buf = cpu_to_le32(sg_dma_address(ctx->sg)); - cmd->ct = cpu_to_le32(sha->ct_dma); - cmd->ct_hdr = sha->ct_hdr; - cmd->tfm = cpu_to_le32(sha->tfm_dma); + cmd->ct = cpu_to_le32(ctx->ct_dma); + cmd->ct_hdr = ctx->ct_hdr; + cmd->tfm = cpu_to_le32(ctx->tfm_dma); if (++ring->pos == MTK_DESC_NUM) ring->pos = 0; @@ -732,9 +732,8 @@ static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id, ctx = ahash_request_ctx(req); sha->req = req; - sha->info = &ctx->info; - mtk_sha_info_init(sha, ctx); + mtk_sha_info_init(ctx); if (ctx->op == SHA_OP_UPDATE) { err = mtk_sha_update_start(cryp, sha); @@ -766,8 +765,8 @@ static void mtk_sha_unmap(struct mtk_cryp *cryp, struct mtk_sha_rec *sha) { struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); - dma_unmap_single(cryp->dev, sha->ct_dma, - sizeof(struct mtk_sha_info), DMA_BIDIRECTIONAL); + dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->info), + DMA_BIDIRECTIONAL); if (ctx->flags & SHA_FLAGS_SG) { dma_unmap_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE); From 4432861fb9747fce52f94bc13da0d9b41292ef69 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Fri, 20 Jan 2017 13:41:09 +0800 Subject: [PATCH 058/142] crypto: mediatek - fix incorrect data transfer result This patch fixes mtk_aes_xmit() data transfer bug. The original function uses the same loop and ring->pos to handle both command and result descriptors. But this produces incomplete results when src.sg_len != dst.sg_len. To solve the problem, we splits the descriptors into different loops and uses cmd_pos and res_pos to record them respectively. Signed-off-by: Ryder Lee Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-aes.c | 44 +++++++++++++++----------- drivers/crypto/mediatek/mtk-platform.h | 6 ++-- drivers/crypto/mediatek/mtk-sha.c | 29 ++++++++++------- 3 files changed, 47 insertions(+), 32 deletions(-) diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c index 126b93ce2b4e..b658cb9fb6a2 100644 --- a/drivers/crypto/mediatek/mtk-aes.c +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -225,29 +225,25 @@ static int mtk_aes_info_map(struct mtk_cryp *cryp, return 0; } +/* + * Write descriptors for processing. This will configure the engine, load + * the transform information and then start the packet processing. + */ static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) { struct mtk_ring *ring = cryp->ring[aes->id]; struct mtk_desc *cmd = NULL, *res = NULL; - struct scatterlist *ssg, *dsg; - u32 len = aes->src.sg_len; + struct scatterlist *ssg = aes->src.sg, *dsg = aes->dst.sg; + u32 slen = aes->src.sg_len, dlen = aes->dst.sg_len; int nents; - /* Fill in the command/result descriptors */ - for (nents = 0; nents < len; ++nents) { - ssg = &aes->src.sg[nents]; - dsg = &aes->dst.sg[nents]; - - cmd = ring->cmd_base + ring->pos; + /* Write command descriptors */ + for (nents = 0; nents < slen; ++nents, ssg = sg_next(ssg)) { + cmd = ring->cmd_base + ring->cmd_pos; cmd->hdr = MTK_DESC_BUF_LEN(ssg->length); cmd->buf = cpu_to_le32(sg_dma_address(ssg)); - res = ring->res_base + ring->pos; - res->hdr = MTK_DESC_BUF_LEN(dsg->length); - res->buf = cpu_to_le32(sg_dma_address(dsg)); - if (nents == 0) { - res->hdr |= MTK_DESC_FIRST; cmd->hdr |= MTK_DESC_FIRST | MTK_DESC_CT_LEN(aes->ctx->ct_size); cmd->ct = cpu_to_le32(aes->ctx->ct_dma); @@ -255,11 +251,23 @@ static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) cmd->tfm = cpu_to_le32(aes->ctx->tfm_dma); } - if (++ring->pos == MTK_DESC_NUM) - ring->pos = 0; + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; } - cmd->hdr |= MTK_DESC_LAST; + + /* Prepare result descriptors */ + for (nents = 0; nents < dlen; ++nents, dsg = sg_next(dsg)) { + res = ring->res_base + ring->res_pos; + res->hdr = MTK_DESC_BUF_LEN(dsg->length); + res->buf = cpu_to_le32(sg_dma_address(dsg)); + + if (nents == 0) + res->hdr |= MTK_DESC_FIRST; + + if (++ring->res_pos == MTK_DESC_NUM) + ring->res_pos = 0; + } res->hdr |= MTK_DESC_LAST; /* @@ -268,8 +276,8 @@ static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) */ wmb(); /* Start DMA transfer */ - mtk_aes_write(cryp, RDR_PREP_COUNT(aes->id), MTK_DESC_CNT(len)); - mtk_aes_write(cryp, CDR_PREP_COUNT(aes->id), MTK_DESC_CNT(len)); + mtk_aes_write(cryp, RDR_PREP_COUNT(aes->id), MTK_DESC_CNT(dlen)); + mtk_aes_write(cryp, CDR_PREP_COUNT(aes->id), MTK_DESC_CNT(slen)); return -EINPROGRESS; } diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h index 1516786b7a02..8c50b74788ff 100644 --- a/drivers/crypto/mediatek/mtk-platform.h +++ b/drivers/crypto/mediatek/mtk-platform.h @@ -83,9 +83,10 @@ struct mtk_desc { * struct mtk_ring - Descriptor ring * @cmd_base: pointer to command descriptor ring base * @cmd_dma: DMA address of command descriptor ring + * @cmd_pos: current position in the command descriptor ring * @res_base: pointer to result descriptor ring base * @res_dma: DMA address of result descriptor ring - * @pos: current position in the ring + * @res_pos: current position in the result descriptor ring * * A descriptor ring is a circular buffer that is used to manage * one or more descriptors. There are two type of descriptor rings; @@ -94,9 +95,10 @@ struct mtk_desc { struct mtk_ring { struct mtk_desc *cmd_base; dma_addr_t cmd_dma; + u32 cmd_pos; struct mtk_desc *res_base; dma_addr_t res_dma; - u32 pos; + u32 res_pos; }; /** diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index 8cbff218debb..2536ebc53602 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -426,8 +426,8 @@ static int mtk_sha_xmit(struct mtk_cryp *cryp, struct mtk_sha_rec *sha, { struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); struct mtk_ring *ring = cryp->ring[sha->id]; - struct mtk_desc *cmd = ring->cmd_base + ring->pos; - struct mtk_desc *res = ring->res_base + ring->pos; + struct mtk_desc *cmd = ring->cmd_base + ring->cmd_pos; + struct mtk_desc *res = ring->res_base + ring->res_pos; int err; err = mtk_sha_info_map(cryp, sha, len); @@ -451,9 +451,10 @@ static int mtk_sha_xmit(struct mtk_cryp *cryp, struct mtk_sha_rec *sha, cmd->ct_hdr = ctx->ct_hdr; cmd->tfm = cpu_to_le32(ctx->tfm_dma); - if (++ring->pos == MTK_DESC_NUM) - ring->pos = 0; + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; + ring->res_pos = ring->cmd_pos; /* * Make sure that all changes to the DMA ring are done before we * start engine. @@ -472,8 +473,8 @@ static int mtk_sha_xmit2(struct mtk_cryp *cryp, size_t len1, size_t len2) { struct mtk_ring *ring = cryp->ring[sha->id]; - struct mtk_desc *cmd = ring->cmd_base + ring->pos; - struct mtk_desc *res = ring->res_base + ring->pos; + struct mtk_desc *cmd = ring->cmd_base + ring->cmd_pos; + struct mtk_desc *res = ring->res_base + ring->res_pos; int err; err = mtk_sha_info_map(cryp, sha, len1 + len2); @@ -492,11 +493,13 @@ static int mtk_sha_xmit2(struct mtk_cryp *cryp, cmd->ct_hdr = ctx->ct_hdr; cmd->tfm = cpu_to_le32(ctx->tfm_dma); - if (++ring->pos == MTK_DESC_NUM) - ring->pos = 0; + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; - cmd = ring->cmd_base + ring->pos; - res = ring->res_base + ring->pos; + ring->res_pos = ring->cmd_pos; + + cmd = ring->cmd_base + ring->cmd_pos; + res = ring->res_base + ring->res_pos; res->hdr = MTK_DESC_BUF_LEN(len2) | MTK_DESC_LAST; res->buf = cpu_to_le32(cryp->tmp_dma); @@ -504,8 +507,10 @@ static int mtk_sha_xmit2(struct mtk_cryp *cryp, cmd->hdr = MTK_DESC_BUF_LEN(len2) | MTK_DESC_LAST; cmd->buf = cpu_to_le32(ctx->dma_addr); - if (++ring->pos == MTK_DESC_NUM) - ring->pos = 0; + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; + + ring->res_pos = ring->cmd_pos; /* * Make sure that all changes to the DMA ring are done before we From 382ae57d5e52a62e77d62e60e5be9a6526d40da0 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Fri, 20 Jan 2017 13:41:10 +0800 Subject: [PATCH 059/142] crypto: mediatek - make crypto request queue management more generic This patch changes mtk_aes_handle_queue() to make it more generic. The function argument is now a pointer to struct crypto_async_request, which is the common base of struct ablkcipher_request and struct aead_request. Also this patch introduces struct mtk_aes_base_ctx which will be the common base of all the transformation contexts. Hence the very same queue will be used to manage both block cipher and AEAD requests (such as gcm and authenc implemented in further patches). Signed-off-by: Ryder Lee Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-aes.c | 75 +++++++++++++++----------- drivers/crypto/mediatek/mtk-platform.h | 14 +++-- 2 files changed, 53 insertions(+), 36 deletions(-) diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c index b658cb9fb6a2..7e5a8e05b5b7 100644 --- a/drivers/crypto/mediatek/mtk-aes.c +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -73,9 +73,10 @@ struct mtk_aes_reqctx { u64 mode; }; -struct mtk_aes_ctx { +struct mtk_aes_base_ctx { struct mtk_cryp *cryp; u32 keylen; + mtk_aes_fn start; struct mtk_aes_ct ct; dma_addr_t ct_dma; @@ -86,6 +87,10 @@ struct mtk_aes_ctx { u32 ct_size; }; +struct mtk_aes_ctx { + struct mtk_aes_base_ctx base; +}; + struct mtk_aes_drv { struct list_head dev_list; /* Device list lock */ @@ -108,7 +113,7 @@ static inline void mtk_aes_write(struct mtk_cryp *cryp, writel_relaxed(value, cryp->base + offset); } -static struct mtk_cryp *mtk_aes_find_dev(struct mtk_aes_ctx *ctx) +static struct mtk_cryp *mtk_aes_find_dev(struct mtk_aes_base_ctx *ctx) { struct mtk_cryp *cryp = NULL; struct mtk_cryp *tmp; @@ -170,7 +175,8 @@ static int mtk_aes_info_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, size_t len) { - struct mtk_aes_ctx *ctx = aes->ctx; + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct mtk_aes_base_ctx *ctx = aes->ctx; ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len); ctx->ct.cmd[0] = AES_CMD0 | cpu_to_le32(len); @@ -189,7 +195,7 @@ static int mtk_aes_info_map(struct mtk_cryp *cryp, ctx->tfm.ctrl[0] |= AES_TFM_192BITS; if (aes->flags & AES_FLAGS_CBC) { - const u32 *iv = (const u32 *)aes->req->info; + const u32 *iv = (const u32 *)req->info; u32 *iv_state = ctx->tfm.state + ctx->keylen; int i; @@ -299,11 +305,10 @@ static inline void mtk_aes_restore_sg(const struct mtk_aes_dma *dma) sg->length += dma->remainder; } -static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, + struct scatterlist *src, struct scatterlist *dst, + size_t len) { - struct scatterlist *src = aes->req->src; - struct scatterlist *dst = aes->req->dst; - size_t len = aes->req->nbytes; size_t padlen = 0; bool src_aligned, dst_aligned; @@ -366,18 +371,17 @@ static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) } static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, - struct ablkcipher_request *req) + struct crypto_async_request *new_areq) { struct mtk_aes_rec *aes = cryp->aes[id]; struct crypto_async_request *areq, *backlog; - struct mtk_aes_reqctx *rctx; - struct mtk_aes_ctx *ctx; + struct mtk_aes_base_ctx *ctx; unsigned long flags; - int err, ret = 0; + int ret = 0; spin_lock_irqsave(&aes->lock, flags); - if (req) - ret = ablkcipher_enqueue_request(&aes->queue, req); + if (new_areq) + ret = crypto_enqueue_request(&aes->queue, new_areq); if (aes->flags & AES_FLAGS_BUSY) { spin_unlock_irqrestore(&aes->lock, flags); return ret; @@ -394,16 +398,25 @@ static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, if (backlog) backlog->complete(backlog, -EINPROGRESS); - req = ablkcipher_request_cast(areq); - ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); + ctx = crypto_tfm_ctx(areq->tfm); + + aes->areq = areq; + aes->ctx = ctx; + + return ctx->start(cryp, aes); +} + +static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req); + int err; + rctx = ablkcipher_request_ctx(req); rctx->mode &= AES_FLAGS_MODE_MSK; - /* Assign new request to device */ - aes->req = req; - aes->ctx = ctx; aes->flags = (aes->flags & ~AES_FLAGS_MODE_MSK) | rctx->mode; - err = mtk_aes_map(cryp, aes); + err = mtk_aes_map(cryp, aes, req->src, req->dst, req->nbytes); if (err) return err; @@ -412,7 +425,7 @@ static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) { - struct mtk_aes_ctx *ctx = aes->ctx; + struct mtk_aes_base_ctx *ctx = aes->ctx; dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct), DMA_TO_DEVICE); @@ -449,8 +462,7 @@ static inline void mtk_aes_complete(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) { aes->flags &= ~AES_FLAGS_BUSY; - - aes->req->base.complete(&aes->req->base, 0); + aes->areq->complete(aes->areq, 0); /* Handle new request */ mtk_aes_handle_queue(cryp, aes->id, NULL); @@ -460,7 +472,7 @@ static inline void mtk_aes_complete(struct mtk_cryp *cryp, static int mtk_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, u32 keylen) { - struct mtk_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct mtk_aes_base_ctx *ctx = crypto_ablkcipher_ctx(tfm); const u32 *key_tmp = (const u32 *)key; u32 *key_state = ctx->tfm.state; int i; @@ -482,14 +494,15 @@ static int mtk_aes_setkey(struct crypto_ablkcipher *tfm, static int mtk_aes_crypt(struct ablkcipher_request *req, u64 mode) { - struct mtk_aes_ctx *ctx = crypto_ablkcipher_ctx( - crypto_ablkcipher_reqtfm(req)); - struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req); + struct mtk_aes_base_ctx *ctx; + struct mtk_aes_reqctx *rctx; + ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); + rctx = ablkcipher_request_ctx(req); rctx->mode = mode; return mtk_aes_handle_queue(ctx->cryp, - !(mode & AES_FLAGS_ENCRYPT), req); + !(mode & AES_FLAGS_ENCRYPT), &req->base); } static int mtk_ecb_encrypt(struct ablkcipher_request *req) @@ -517,14 +530,14 @@ static int mtk_aes_cra_init(struct crypto_tfm *tfm) struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm); struct mtk_cryp *cryp = NULL; - tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx); - - cryp = mtk_aes_find_dev(ctx); + cryp = mtk_aes_find_dev(&ctx->base); if (!cryp) { pr_err("can't find crypto device\n"); return -ENODEV; } + tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx); + ctx->base.start = mtk_aes_start; return 0; } diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h index 8c50b74788ff..9f5210c7f1db 100644 --- a/drivers/crypto/mediatek/mtk-platform.h +++ b/drivers/crypto/mediatek/mtk-platform.h @@ -115,12 +115,16 @@ struct mtk_aes_dma { u32 sg_len; }; -struct mtk_aes_ctx; +struct mtk_aes_base_ctx; +struct mtk_aes_rec; +struct mtk_cryp; + +typedef int (*mtk_aes_fn)(struct mtk_cryp *cryp, struct mtk_aes_rec *aes); /** * struct mtk_aes_rec - AES operation record * @queue: crypto request queue - * @req: pointer to ablkcipher request + * @req: pointer to async request * @task: the tasklet is use in AES interrupt * @ctx: pointer to current context * @src: the structure that holds source sg list info @@ -131,15 +135,15 @@ struct mtk_aes_ctx; * @buf: pointer to page buffer * @id: record identification * @flags: it's describing AES operation state - * @lock: the ablkcipher queue lock + * @lock: the async queue lock * * Structure used to record AES execution state. */ struct mtk_aes_rec { struct crypto_queue queue; - struct ablkcipher_request *req; + struct crypto_async_request *areq; struct tasklet_struct task; - struct mtk_aes_ctx *ctx; + struct mtk_aes_base_ctx *ctx; struct mtk_aes_dma src; struct mtk_aes_dma dst; From 87421984b4d2e04cfe858849db10ac326d9f3aed Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Fri, 20 Jan 2017 13:41:11 +0800 Subject: [PATCH 060/142] crypto: mediatek - rework crypto request completion This patch introduces a new callback 'resume' in the struct mtk_aes_rec. This callback is run to resume/complete the processing of the crypto request when woken up by AES interrupts when DMA completion. This callback will help implementing the GCM mode support in further patches. Signed-off-by: Ryder Lee Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-aes.c | 25 +++++++++++++------------ drivers/crypto/mediatek/mtk-platform.h | 3 +++ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c index 7e5a8e05b5b7..9c4e46891783 100644 --- a/drivers/crypto/mediatek/mtk-aes.c +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -406,6 +406,15 @@ static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, return ctx->start(cryp, aes); } +static int mtk_aes_complete(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + aes->flags &= ~AES_FLAGS_BUSY; + aes->areq->complete(aes->areq, 0); + + /* Handle new request */ + return mtk_aes_handle_queue(cryp, aes->id, NULL); +} + static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) { struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); @@ -416,6 +425,8 @@ static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) rctx->mode &= AES_FLAGS_MODE_MSK; aes->flags = (aes->flags & ~AES_FLAGS_MODE_MSK) | rctx->mode; + aes->resume = mtk_aes_complete; + err = mtk_aes_map(cryp, aes, req->src, req->dst, req->nbytes); if (err) return err; @@ -458,16 +469,6 @@ static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) aes->buf, aes->total); } -static inline void mtk_aes_complete(struct mtk_cryp *cryp, - struct mtk_aes_rec *aes) -{ - aes->flags &= ~AES_FLAGS_BUSY; - aes->areq->complete(aes->areq, 0); - - /* Handle new request */ - mtk_aes_handle_queue(cryp, aes->id, NULL); -} - /* Check and set the AES key to transform state buffer */ static int mtk_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, u32 keylen) @@ -591,7 +592,7 @@ static void mtk_aes_enc_task(unsigned long data) struct mtk_aes_rec *aes = cryp->aes[0]; mtk_aes_unmap(cryp, aes); - mtk_aes_complete(cryp, aes); + aes->resume(cryp, aes); } static void mtk_aes_dec_task(unsigned long data) @@ -600,7 +601,7 @@ static void mtk_aes_dec_task(unsigned long data) struct mtk_aes_rec *aes = cryp->aes[1]; mtk_aes_unmap(cryp, aes); - mtk_aes_complete(cryp, aes); + aes->resume(cryp, aes); } static irqreturn_t mtk_aes_enc_irq(int irq, void *dev_id) diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h index 9f5210c7f1db..36d166bb29fd 100644 --- a/drivers/crypto/mediatek/mtk-platform.h +++ b/drivers/crypto/mediatek/mtk-platform.h @@ -131,6 +131,7 @@ typedef int (*mtk_aes_fn)(struct mtk_cryp *cryp, struct mtk_aes_rec *aes); * @dst: the structure that holds destination sg list info * @aligned_sg: the scatter list is use to alignment * @real_dst: pointer to the destination sg list + * @resume: pointer to resume function * @total: request buffer length * @buf: pointer to page buffer * @id: record identification @@ -150,6 +151,8 @@ struct mtk_aes_rec { struct scatterlist aligned_sg; struct scatterlist *real_dst; + mtk_aes_fn resume; + size_t total; void *buf; From 0abc271494d1209e097f2837e324ecd9c05716e5 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Fri, 20 Jan 2017 13:41:12 +0800 Subject: [PATCH 061/142] crypto: mediatek - regroup functions by usage This patch only regroup functions by usage. This will help to integrate the GCM support patch later by adjusting some shared code section, such as common code which will be reused by GCM, AES mode setting, and DMA transfer. Signed-off-by: Ryder Lee Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-aes.c | 272 ++++++++++++++++-------------- 1 file changed, 141 insertions(+), 131 deletions(-) diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c index 9c4e46891783..b5946e92d99b 100644 --- a/drivers/crypto/mediatek/mtk-aes.c +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -42,7 +42,6 @@ #define AES_TFM_FULL_IV cpu_to_le32(0xf << 5) /* AES flags */ -#define AES_FLAGS_MODE_MSK 0x7 #define AES_FLAGS_ECB BIT(0) #define AES_FLAGS_CBC BIT(1) #define AES_FLAGS_ENCRYPT BIT(2) @@ -170,65 +169,28 @@ static bool mtk_aes_check_aligned(struct scatterlist *sg, size_t len, return false; } -/* Initialize and map transform information of AES */ -static int mtk_aes_info_map(struct mtk_cryp *cryp, - struct mtk_aes_rec *aes, - size_t len) +static inline void mtk_aes_set_mode(struct mtk_aes_rec *aes, + const struct mtk_aes_reqctx *rctx) { - struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); - struct mtk_aes_base_ctx *ctx = aes->ctx; + /* Clear all but persistent flags and set request flags. */ + aes->flags = (aes->flags & AES_FLAGS_BUSY) | rctx->mode; +} - ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len); - ctx->ct.cmd[0] = AES_CMD0 | cpu_to_le32(len); - ctx->ct.cmd[1] = AES_CMD1; +static inline void mtk_aes_restore_sg(const struct mtk_aes_dma *dma) +{ + struct scatterlist *sg = dma->sg; + int nents = dma->nents; - if (aes->flags & AES_FLAGS_ENCRYPT) - ctx->tfm.ctrl[0] = AES_TFM_BASIC_OUT; - else - ctx->tfm.ctrl[0] = AES_TFM_BASIC_IN; + if (!dma->remainder) + return; - if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128)) - ctx->tfm.ctrl[0] |= AES_TFM_128BITS; - else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256)) - ctx->tfm.ctrl[0] |= AES_TFM_256BITS; - else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_192)) - ctx->tfm.ctrl[0] |= AES_TFM_192BITS; + while (--nents > 0 && sg) + sg = sg_next(sg); - if (aes->flags & AES_FLAGS_CBC) { - const u32 *iv = (const u32 *)req->info; - u32 *iv_state = ctx->tfm.state + ctx->keylen; - int i; + if (!sg) + return; - ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen + - SIZE_IN_WORDS(AES_BLOCK_SIZE)); - ctx->tfm.ctrl[1] = AES_TFM_CBC | AES_TFM_FULL_IV; - - for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++) - iv_state[i] = cpu_to_le32(iv[i]); - - ctx->ct.cmd[2] = AES_CMD2; - ctx->ct_size = AES_CT_SIZE_CBC; - } else if (aes->flags & AES_FLAGS_ECB) { - ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen); - ctx->tfm.ctrl[1] = AES_TFM_ECB; - - ctx->ct_size = AES_CT_SIZE_ECB; - } - - ctx->ct_dma = dma_map_single(cryp->dev, &ctx->ct, sizeof(ctx->ct), - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) - return -EINVAL; - - ctx->tfm_dma = dma_map_single(cryp->dev, &ctx->tfm, sizeof(ctx->tfm), - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(cryp->dev, ctx->tfm_dma))) { - dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm), - DMA_TO_DEVICE); - return -EINVAL; - } - - return 0; + sg->length += dma->remainder; } /* @@ -288,24 +250,134 @@ static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) return -EINPROGRESS; } -static inline void mtk_aes_restore_sg(const struct mtk_aes_dma *dma) +static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) { - struct scatterlist *sg = dma->sg; - int nents = dma->nents; + struct mtk_aes_base_ctx *ctx = aes->ctx; - if (!dma->remainder) - return; + dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct), + DMA_TO_DEVICE); + dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm), + DMA_TO_DEVICE); - while (--nents > 0 && sg) - sg = sg_next(sg); + if (aes->src.sg == aes->dst.sg) { + dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, + DMA_BIDIRECTIONAL); - if (!sg) - return; + if (aes->src.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->src); + } else { + dma_unmap_sg(cryp->dev, aes->dst.sg, aes->dst.nents, + DMA_FROM_DEVICE); - sg->length += dma->remainder; + if (aes->dst.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->dst); + + dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, + DMA_TO_DEVICE); + + if (aes->src.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->src); + } + + if (aes->dst.sg == &aes->aligned_sg) + sg_copy_from_buffer(aes->real_dst, sg_nents(aes->real_dst), + aes->buf, aes->total); } -static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, +static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_base_ctx *ctx = aes->ctx; + + ctx->ct_dma = dma_map_single(cryp->dev, &ctx->ct, sizeof(ctx->ct), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) + return -EINVAL; + + ctx->tfm_dma = dma_map_single(cryp->dev, &ctx->tfm, sizeof(ctx->tfm), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->tfm_dma))) + goto tfm_map_err; + + if (aes->src.sg == aes->dst.sg) { + aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, + aes->src.nents, + DMA_BIDIRECTIONAL); + aes->dst.sg_len = aes->src.sg_len; + if (unlikely(!aes->src.sg_len)) + goto sg_map_err; + } else { + aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, + aes->src.nents, DMA_TO_DEVICE); + if (unlikely(!aes->src.sg_len)) + goto sg_map_err; + + aes->dst.sg_len = dma_map_sg(cryp->dev, aes->dst.sg, + aes->dst.nents, DMA_FROM_DEVICE); + if (unlikely(!aes->dst.sg_len)) { + dma_unmap_sg(cryp->dev, aes->src.sg, + aes->src.nents, DMA_TO_DEVICE); + goto sg_map_err; + } + } + + return mtk_aes_xmit(cryp, aes); + +sg_map_err: + dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm), + DMA_TO_DEVICE); +tfm_map_err: + dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct), + DMA_TO_DEVICE); + + return -EINVAL; +} + +/* Initialize transform information of CBC/ECB mode */ +static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, + size_t len) +{ + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct mtk_aes_base_ctx *ctx = aes->ctx; + + ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len); + ctx->ct.cmd[0] = AES_CMD0 | cpu_to_le32(len); + ctx->ct.cmd[1] = AES_CMD1; + + if (aes->flags & AES_FLAGS_ENCRYPT) + ctx->tfm.ctrl[0] = AES_TFM_BASIC_OUT; + else + ctx->tfm.ctrl[0] = AES_TFM_BASIC_IN; + + if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128)) + ctx->tfm.ctrl[0] |= AES_TFM_128BITS; + else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256)) + ctx->tfm.ctrl[0] |= AES_TFM_256BITS; + else + ctx->tfm.ctrl[0] |= AES_TFM_192BITS; + + if (aes->flags & AES_FLAGS_CBC) { + const u32 *iv = (const u32 *)req->info; + u32 *iv_state = ctx->tfm.state + ctx->keylen; + int i; + + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen + + SIZE_IN_WORDS(AES_BLOCK_SIZE)); + ctx->tfm.ctrl[1] = AES_TFM_CBC | AES_TFM_FULL_IV; + + for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++) + iv_state[i] = cpu_to_le32(iv[i]); + + ctx->ct.cmd[2] = AES_CMD2; + ctx->ct_size = AES_CT_SIZE_CBC; + } else if (aes->flags & AES_FLAGS_ECB) { + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen); + ctx->tfm.ctrl[1] = AES_TFM_ECB; + + ctx->ct_size = AES_CT_SIZE_ECB; + } +} + +static int mtk_aes_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, struct scatterlist *src, struct scatterlist *dst, size_t len) { @@ -346,28 +418,9 @@ static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, sg_set_buf(&aes->aligned_sg, aes->buf, len + padlen); } - if (aes->src.sg == aes->dst.sg) { - aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, - aes->src.nents, DMA_BIDIRECTIONAL); - aes->dst.sg_len = aes->src.sg_len; - if (unlikely(!aes->src.sg_len)) - return -EFAULT; - } else { - aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, - aes->src.nents, DMA_TO_DEVICE); - if (unlikely(!aes->src.sg_len)) - return -EFAULT; + mtk_aes_info_init(cryp, aes, len + padlen); - aes->dst.sg_len = dma_map_sg(cryp->dev, aes->dst.sg, - aes->dst.nents, DMA_FROM_DEVICE); - if (unlikely(!aes->dst.sg_len)) { - dma_unmap_sg(cryp->dev, aes->src.sg, - aes->src.nents, DMA_TO_DEVICE); - return -EFAULT; - } - } - - return mtk_aes_info_map(cryp, aes, len + padlen); + return mtk_aes_map(cryp, aes); } static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, @@ -419,54 +472,11 @@ static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) { struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req); - int err; - - rctx = ablkcipher_request_ctx(req); - rctx->mode &= AES_FLAGS_MODE_MSK; - aes->flags = (aes->flags & ~AES_FLAGS_MODE_MSK) | rctx->mode; + mtk_aes_set_mode(aes, rctx); aes->resume = mtk_aes_complete; - err = mtk_aes_map(cryp, aes, req->src, req->dst, req->nbytes); - if (err) - return err; - - return mtk_aes_xmit(cryp, aes); -} - -static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) -{ - struct mtk_aes_base_ctx *ctx = aes->ctx; - - dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct), - DMA_TO_DEVICE); - dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm), - DMA_TO_DEVICE); - - if (aes->src.sg == aes->dst.sg) { - dma_unmap_sg(cryp->dev, aes->src.sg, - aes->src.nents, DMA_BIDIRECTIONAL); - - if (aes->src.sg != &aes->aligned_sg) - mtk_aes_restore_sg(&aes->src); - } else { - dma_unmap_sg(cryp->dev, aes->dst.sg, - aes->dst.nents, DMA_FROM_DEVICE); - - if (aes->dst.sg != &aes->aligned_sg) - mtk_aes_restore_sg(&aes->dst); - - dma_unmap_sg(cryp->dev, aes->src.sg, - aes->src.nents, DMA_TO_DEVICE); - - if (aes->src.sg != &aes->aligned_sg) - mtk_aes_restore_sg(&aes->src); - } - - if (aes->dst.sg == &aes->aligned_sg) - sg_copy_from_buffer(aes->real_dst, - sg_nents(aes->real_dst), - aes->buf, aes->total); + return mtk_aes_dma(cryp, aes, req->src, req->dst, req->nbytes); } /* Check and set the AES key to transform state buffer */ From 059b14947aaa2b5eb66465be5fa0ba816044ab01 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Fri, 20 Jan 2017 13:41:13 +0800 Subject: [PATCH 062/142] crypto: mediatek - fix typo and indentation Dummy patch to fix typo and indentation. Signed-off-by: Ryder Lee Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-aes.c | 90 +++++++++++++------------- drivers/crypto/mediatek/mtk-platform.h | 2 +- drivers/crypto/mediatek/mtk-sha.c | 40 +++++------- 3 files changed, 63 insertions(+), 69 deletions(-) diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c index b5946e92d99b..5e7c3ceea00e 100644 --- a/drivers/crypto/mediatek/mtk-aes.c +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -314,8 +314,8 @@ static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) aes->dst.sg_len = dma_map_sg(cryp->dev, aes->dst.sg, aes->dst.nents, DMA_FROM_DEVICE); if (unlikely(!aes->dst.sg_len)) { - dma_unmap_sg(cryp->dev, aes->src.sg, - aes->src.nents, DMA_TO_DEVICE); + dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, + DMA_TO_DEVICE); goto sg_map_err; } } @@ -484,7 +484,7 @@ static int mtk_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, u32 keylen) { struct mtk_aes_base_ctx *ctx = crypto_ablkcipher_ctx(tfm); - const u32 *key_tmp = (const u32 *)key; + const u32 *aes_key = (const u32 *)key; u32 *key_state = ctx->tfm.state; int i; @@ -498,7 +498,7 @@ static int mtk_aes_setkey(struct crypto_ablkcipher *tfm, ctx->keylen = SIZE_IN_WORDS(keylen); for (i = 0; i < ctx->keylen; i++) - key_state[i] = cpu_to_le32(key_tmp[i]); + key_state[i] = cpu_to_le32(aes_key[i]); return 0; } @@ -512,26 +512,26 @@ static int mtk_aes_crypt(struct ablkcipher_request *req, u64 mode) rctx = ablkcipher_request_ctx(req); rctx->mode = mode; - return mtk_aes_handle_queue(ctx->cryp, - !(mode & AES_FLAGS_ENCRYPT), &req->base); + return mtk_aes_handle_queue(ctx->cryp, !(mode & AES_FLAGS_ENCRYPT), + &req->base); } -static int mtk_ecb_encrypt(struct ablkcipher_request *req) +static int mtk_aes_ecb_encrypt(struct ablkcipher_request *req) { return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_ECB); } -static int mtk_ecb_decrypt(struct ablkcipher_request *req) +static int mtk_aes_ecb_decrypt(struct ablkcipher_request *req) { return mtk_aes_crypt(req, AES_FLAGS_ECB); } -static int mtk_cbc_encrypt(struct ablkcipher_request *req) +static int mtk_aes_cbc_encrypt(struct ablkcipher_request *req) { return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CBC); } -static int mtk_cbc_decrypt(struct ablkcipher_request *req) +static int mtk_aes_cbc_decrypt(struct ablkcipher_request *req) { return mtk_aes_crypt(req, AES_FLAGS_CBC); } @@ -554,44 +554,44 @@ static int mtk_aes_cra_init(struct crypto_tfm *tfm) static struct crypto_alg aes_algs[] = { { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-mtk", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | - CRYPTO_ALG_ASYNC, - .cra_init = mtk_aes_cra_init, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_aes_ctx), - .cra_alignmask = 15, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_u.ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = mtk_aes_setkey, - .encrypt = mtk_cbc_encrypt, - .decrypt = mtk_cbc_decrypt, - .ivsize = AES_BLOCK_SIZE, + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_cra_init, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_aes_ctx), + .cra_alignmask = 0xf, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_aes_cbc_encrypt, + .decrypt = mtk_aes_cbc_decrypt, + .ivsize = AES_BLOCK_SIZE, } }, { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-aes-mtk", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | - CRYPTO_ALG_ASYNC, - .cra_init = mtk_aes_cra_init, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct mtk_aes_ctx), - .cra_alignmask = 15, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_u.ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = mtk_aes_setkey, - .encrypt = mtk_ecb_encrypt, - .decrypt = mtk_ecb_decrypt, + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_cra_init, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_aes_ctx), + .cra_alignmask = 0xf, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_aes_ecb_encrypt, + .decrypt = mtk_aes_ecb_decrypt, } }, }; diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h index 36d166bb29fd..7cd5f98d25a5 100644 --- a/drivers/crypto/mediatek/mtk-platform.h +++ b/drivers/crypto/mediatek/mtk-platform.h @@ -124,7 +124,7 @@ typedef int (*mtk_aes_fn)(struct mtk_cryp *cryp, struct mtk_aes_rec *aes); /** * struct mtk_aes_rec - AES operation record * @queue: crypto request queue - * @req: pointer to async request + * @areq: pointer to async request * @task: the tasklet is use in AES interrupt * @ctx: pointer to current context * @src: the structure that holds source sg list info diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index 2536ebc53602..55e3805fba07 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -317,9 +317,9 @@ static void mtk_sha_info_init(struct mtk_sha_reqctx *ctx) * Update input data length field of transform information and * map it to DMA region. */ -static int mtk_sha_info_map(struct mtk_cryp *cryp, - struct mtk_sha_rec *sha, - size_t len) +static int mtk_sha_info_update(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + size_t len) { struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); struct mtk_sha_info *info = &ctx->info; @@ -338,7 +338,7 @@ static int mtk_sha_info_map(struct mtk_cryp *cryp, ctx->digcnt += len; ctx->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), - DMA_BIDIRECTIONAL); + DMA_BIDIRECTIONAL); if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) { dev_err(cryp->dev, "dma %zu bytes error\n", sizeof(*info)); return -EINVAL; @@ -430,20 +430,15 @@ static int mtk_sha_xmit(struct mtk_cryp *cryp, struct mtk_sha_rec *sha, struct mtk_desc *res = ring->res_base + ring->res_pos; int err; - err = mtk_sha_info_map(cryp, sha, len); + err = mtk_sha_info_update(cryp, sha, len); if (err) return err; /* Fill in the command/result descriptors */ - res->hdr = MTK_DESC_FIRST | - MTK_DESC_LAST | - MTK_DESC_BUF_LEN(len); - + res->hdr = MTK_DESC_FIRST | MTK_DESC_LAST | MTK_DESC_BUF_LEN(len); res->buf = cpu_to_le32(cryp->tmp_dma); - cmd->hdr = MTK_DESC_FIRST | - MTK_DESC_LAST | - MTK_DESC_BUF_LEN(len) | + cmd->hdr = MTK_DESC_FIRST | MTK_DESC_LAST | MTK_DESC_BUF_LEN(len) | MTK_DESC_CT_LEN(ctx->ct_size); cmd->buf = cpu_to_le32(addr); @@ -477,7 +472,7 @@ static int mtk_sha_xmit2(struct mtk_cryp *cryp, struct mtk_desc *res = ring->res_base + ring->res_pos; int err; - err = mtk_sha_info_map(cryp, sha, len1 + len2); + err = mtk_sha_info_update(cryp, sha, len1 + len2); if (err) return err; @@ -485,8 +480,7 @@ static int mtk_sha_xmit2(struct mtk_cryp *cryp, res->hdr = MTK_DESC_BUF_LEN(len1) | MTK_DESC_FIRST; res->buf = cpu_to_le32(cryp->tmp_dma); - cmd->hdr = MTK_DESC_BUF_LEN(len1) | - MTK_DESC_FIRST | + cmd->hdr = MTK_DESC_BUF_LEN(len1) | MTK_DESC_FIRST | MTK_DESC_CT_LEN(ctx->ct_size); cmd->buf = cpu_to_le32(sg_dma_address(ctx->sg)); cmd->ct = cpu_to_le32(ctx->ct_dma); @@ -530,7 +524,7 @@ static int mtk_sha_dma_map(struct mtk_cryp *cryp, size_t count) { ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer, - SHA_BUF_SIZE, DMA_TO_DEVICE); + SHA_BUF_SIZE, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) { dev_err(cryp->dev, "dma map error\n"); return -EINVAL; @@ -619,7 +613,7 @@ static int mtk_sha_update_start(struct mtk_cryp *cryp, mtk_sha_fill_padding(ctx, len); ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer, - SHA_BUF_SIZE, DMA_TO_DEVICE); + SHA_BUF_SIZE, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) { dev_err(cryp->dev, "dma map bytes error\n"); return -EINVAL; @@ -658,8 +652,7 @@ static int mtk_sha_update_start(struct mtk_cryp *cryp, static int mtk_sha_final_req(struct mtk_cryp *cryp, struct mtk_sha_rec *sha) { - struct ahash_request *req = sha->req; - struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); size_t count; mtk_sha_fill_padding(ctx, 0); @@ -690,7 +683,8 @@ static int mtk_sha_finish(struct ahash_request *req) } static void mtk_sha_finish_req(struct mtk_cryp *cryp, - struct mtk_sha_rec *sha, int err) + struct mtk_sha_rec *sha, + int err) { if (likely(!err && (SHA_FLAGS_FINAL & sha->flags))) err = mtk_sha_finish(sha->req); @@ -850,8 +844,8 @@ static int mtk_sha_digest(struct ahash_request *req) return mtk_sha_init(req) ?: mtk_sha_finup(req); } -static int mtk_sha_setkey(struct crypto_ahash *tfm, - const unsigned char *key, u32 keylen) +static int mtk_sha_setkey(struct crypto_ahash *tfm, const u8 *key, + u32 keylen) { struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm); struct mtk_sha_hmac_ctx *bctx = tctx->base; @@ -863,7 +857,7 @@ static int mtk_sha_setkey(struct crypto_ahash *tfm, shash->tfm = bctx->shash; shash->flags = crypto_shash_get_flags(bctx->shash) & - CRYPTO_TFM_REQ_MAY_SLEEP; + CRYPTO_TFM_REQ_MAY_SLEEP; if (keylen > bs) { err = crypto_shash_digest(shash, key, keylen, bctx->ipad); From e04a31d7f5712a757245cde7f535a105b67ca99b Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Fri, 20 Jan 2017 13:41:14 +0800 Subject: [PATCH 063/142] crypto: mediatek - add support to CTR mode This patch adds support to the CTR mode. Signed-off-by: Ryder Lee Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-aes.c | 151 +++++++++++++++++++++++++++++- 1 file changed, 146 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c index 5e7c3ceea00e..bb5b4ff9ab0d 100644 --- a/drivers/crypto/mediatek/mtk-aes.c +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -23,8 +23,10 @@ /* AES command token size */ #define AES_CT_SIZE_ECB 2 #define AES_CT_SIZE_CBC 3 +#define AES_CT_SIZE_CTR 3 #define AES_CT_CTRL_HDR cpu_to_le32(0x00220000) -/* AES-CBC/ECB command token */ + +/* AES-CBC/ECB/CTR command token */ #define AES_CMD0 cpu_to_le32(0x05000000) #define AES_CMD1 cpu_to_le32(0x2d060000) #define AES_CMD2 cpu_to_le32(0xe4a63806) @@ -39,13 +41,15 @@ /* AES transform information word 1 fields */ #define AES_TFM_ECB cpu_to_le32(0x0 << 0) #define AES_TFM_CBC cpu_to_le32(0x1 << 0) -#define AES_TFM_FULL_IV cpu_to_le32(0xf << 5) +#define AES_TFM_CTR_LOAD cpu_to_le32(0x6 << 0) /* load/reuse counter */ +#define AES_TFM_FULL_IV cpu_to_le32(0xf << 5) /* using IV 0-3 */ /* AES flags */ #define AES_FLAGS_ECB BIT(0) #define AES_FLAGS_CBC BIT(1) -#define AES_FLAGS_ENCRYPT BIT(2) -#define AES_FLAGS_BUSY BIT(3) +#define AES_FLAGS_CTR BIT(2) +#define AES_FLAGS_ENCRYPT BIT(3) +#define AES_FLAGS_BUSY BIT(4) /** * Command token(CT) is a set of hardware instructions that @@ -90,6 +94,15 @@ struct mtk_aes_ctx { struct mtk_aes_base_ctx base; }; +struct mtk_aes_ctr_ctx { + struct mtk_aes_base_ctx base; + + u32 iv[AES_BLOCK_SIZE / sizeof(u32)]; + size_t offset; + struct scatterlist src[2]; + struct scatterlist dst[2]; +}; + struct mtk_aes_drv { struct list_head dev_list; /* Device list lock */ @@ -332,7 +345,7 @@ tfm_map_err: return -EINVAL; } -/* Initialize transform information of CBC/ECB mode */ +/* Initialize transform information of CBC/ECB/CTR mode */ static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, size_t len) { @@ -374,6 +387,13 @@ static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, ctx->tfm.ctrl[1] = AES_TFM_ECB; ctx->ct_size = AES_CT_SIZE_ECB; + } else if (aes->flags & AES_FLAGS_CTR) { + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen + + SIZE_IN_WORDS(AES_BLOCK_SIZE)); + ctx->tfm.ctrl[1] = AES_TFM_CTR_LOAD | AES_TFM_FULL_IV; + + ctx->ct.cmd[2] = AES_CMD2; + ctx->ct_size = AES_CT_SIZE_CTR; } } @@ -479,6 +499,80 @@ static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) return mtk_aes_dma(cryp, aes, req->src, req->dst, req->nbytes); } +static inline struct mtk_aes_ctr_ctx * +mtk_aes_ctr_ctx_cast(struct mtk_aes_base_ctx *ctx) +{ + return container_of(ctx, struct mtk_aes_ctr_ctx, base); +} + +static int mtk_aes_ctr_transfer(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_base_ctx *ctx = aes->ctx; + struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(ctx); + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct scatterlist *src, *dst; + int i; + u32 start, end, ctr, blocks, *iv_state; + size_t datalen; + bool fragmented = false; + + /* Check for transfer completion. */ + cctx->offset += aes->total; + if (cctx->offset >= req->nbytes) + return mtk_aes_complete(cryp, aes); + + /* Compute data length. */ + datalen = req->nbytes - cctx->offset; + blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE); + ctr = be32_to_cpu(cctx->iv[3]); + + /* Check 32bit counter overflow. */ + start = ctr; + end = start + blocks - 1; + if (end < start) { + ctr |= 0xffffffff; + datalen = AES_BLOCK_SIZE * -start; + fragmented = true; + } + + /* Jump to offset. */ + src = scatterwalk_ffwd(cctx->src, req->src, cctx->offset); + dst = ((req->src == req->dst) ? src : + scatterwalk_ffwd(cctx->dst, req->dst, cctx->offset)); + + /* Write IVs into transform state buffer. */ + iv_state = ctx->tfm.state + ctx->keylen; + for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++) + iv_state[i] = cpu_to_le32(cctx->iv[i]); + + if (unlikely(fragmented)) { + /* + * Increment the counter manually to cope with the hardware + * counter overflow. + */ + cctx->iv[3] = cpu_to_be32(ctr); + crypto_inc((u8 *)cctx->iv, AES_BLOCK_SIZE); + } + aes->resume = mtk_aes_ctr_transfer; + + return mtk_aes_dma(cryp, aes, src, dst, datalen); +} + +static int mtk_aes_ctr_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(aes->ctx); + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req); + + mtk_aes_set_mode(aes, rctx); + + memcpy(cctx->iv, req->info, AES_BLOCK_SIZE); + cctx->offset = 0; + aes->total = 0; + + return mtk_aes_ctr_transfer(cryp, aes); +} + /* Check and set the AES key to transform state buffer */ static int mtk_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, u32 keylen) @@ -536,6 +630,16 @@ static int mtk_aes_cbc_decrypt(struct ablkcipher_request *req) return mtk_aes_crypt(req, AES_FLAGS_CBC); } +static int mtk_aes_ctr_encrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CTR); +} + +static int mtk_aes_ctr_decrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_CTR); +} + static int mtk_aes_cra_init(struct crypto_tfm *tfm) { struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm); @@ -552,6 +656,22 @@ static int mtk_aes_cra_init(struct crypto_tfm *tfm) return 0; } +static int mtk_aes_ctr_cra_init(struct crypto_tfm *tfm) +{ + struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_aes_find_dev(&ctx->base); + if (!cryp) { + pr_err("can't find crypto device\n"); + return -ENODEV; + } + + tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx); + ctx->base.start = mtk_aes_ctr_start; + return 0; +} + static struct crypto_alg aes_algs[] = { { .cra_name = "cbc(aes)", @@ -594,6 +714,27 @@ static struct crypto_alg aes_algs[] = { .decrypt = mtk_aes_ecb_decrypt, } }, +{ + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_ctr_cra_init, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct mtk_aes_ctr_ctx), + .cra_alignmask = 0xf, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_aes_ctr_encrypt, + .decrypt = mtk_aes_ctr_decrypt, + } +}, }; static void mtk_aes_enc_task(unsigned long data) From d03f7b0d58ac1bed9d98960dfe831f69a6f15aab Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Fri, 20 Jan 2017 13:41:15 +0800 Subject: [PATCH 064/142] crypto: mediatek - add support to GCM mode This patch adds support to the GCM mode. Signed-off-by: Ryder Lee Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 2 + drivers/crypto/mediatek/mtk-aes.c | 369 ++++++++++++++++++++++++- drivers/crypto/mediatek/mtk-platform.h | 2 + 3 files changed, 369 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index ee5057a2914b..bf7da55cffe6 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -557,7 +557,9 @@ config CRYPTO_DEV_MEDIATEK tristate "MediaTek's EIP97 Cryptographic Engine driver" depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST select CRYPTO_AES + select CRYPTO_AEAD select CRYPTO_BLKCIPHER + select CRYPTO_CTR select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c index bb5b4ff9ab0d..3a47cdb8f0c8 100644 --- a/drivers/crypto/mediatek/mtk-aes.c +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -24,16 +24,28 @@ #define AES_CT_SIZE_ECB 2 #define AES_CT_SIZE_CBC 3 #define AES_CT_SIZE_CTR 3 +#define AES_CT_SIZE_GCM_OUT 5 +#define AES_CT_SIZE_GCM_IN 6 #define AES_CT_CTRL_HDR cpu_to_le32(0x00220000) /* AES-CBC/ECB/CTR command token */ #define AES_CMD0 cpu_to_le32(0x05000000) #define AES_CMD1 cpu_to_le32(0x2d060000) #define AES_CMD2 cpu_to_le32(0xe4a63806) +/* AES-GCM command token */ +#define AES_GCM_CMD0 cpu_to_le32(0x0b000000) +#define AES_GCM_CMD1 cpu_to_le32(0xa0800000) +#define AES_GCM_CMD2 cpu_to_le32(0x25000010) +#define AES_GCM_CMD3 cpu_to_le32(0x0f020000) +#define AES_GCM_CMD4 cpu_to_le32(0x21e60000) +#define AES_GCM_CMD5 cpu_to_le32(0x40e60000) +#define AES_GCM_CMD6 cpu_to_le32(0xd0070000) /* AES transform information word 0 fields */ #define AES_TFM_BASIC_OUT cpu_to_le32(0x4 << 0) #define AES_TFM_BASIC_IN cpu_to_le32(0x5 << 0) +#define AES_TFM_GCM_OUT cpu_to_le32(0x6 << 0) +#define AES_TFM_GCM_IN cpu_to_le32(0xf << 0) #define AES_TFM_SIZE(x) cpu_to_le32((x) << 8) #define AES_TFM_128BITS cpu_to_le32(0xb << 16) #define AES_TFM_192BITS cpu_to_le32(0xd << 16) @@ -41,15 +53,22 @@ /* AES transform information word 1 fields */ #define AES_TFM_ECB cpu_to_le32(0x0 << 0) #define AES_TFM_CBC cpu_to_le32(0x1 << 0) +#define AES_TFM_CTR_INIT cpu_to_le32(0x2 << 0) /* init counter to 1 */ #define AES_TFM_CTR_LOAD cpu_to_le32(0x6 << 0) /* load/reuse counter */ +#define AES_TFM_3IV cpu_to_le32(0x7 << 5) /* using IV 0-2 */ #define AES_TFM_FULL_IV cpu_to_le32(0xf << 5) /* using IV 0-3 */ +#define AES_TFM_IV_CTR_MODE cpu_to_le32(0x1 << 10) +#define AES_TFM_ENC_HASH cpu_to_le32(0x1 << 17) +#define AES_TFM_GHASH_DIG cpu_to_le32(0x2 << 21) +#define AES_TFM_GHASH cpu_to_le32(0x4 << 23) /* AES flags */ #define AES_FLAGS_ECB BIT(0) #define AES_FLAGS_CBC BIT(1) #define AES_FLAGS_CTR BIT(2) -#define AES_FLAGS_ENCRYPT BIT(3) -#define AES_FLAGS_BUSY BIT(4) +#define AES_FLAGS_GCM BIT(3) +#define AES_FLAGS_ENCRYPT BIT(4) +#define AES_FLAGS_BUSY BIT(5) /** * Command token(CT) is a set of hardware instructions that @@ -62,14 +81,23 @@ * - Commands decoding and control of the engine's data path. * - Coordinating hardware data fetch and store operations. * - Result token construction and output. + * + * Memory map of GCM's TFM: + * /-----------\ + * | AES KEY | 128/196/256 bits + * |-----------| + * | HASH KEY | a string 128 zero bits encrypted using the block cipher + * |-----------| + * | IVs | 4 * 4 bytes + * \-----------/ */ struct mtk_aes_ct { - __le32 cmd[AES_CT_SIZE_CBC]; + __le32 cmd[AES_CT_SIZE_GCM_IN]; }; struct mtk_aes_tfm { __le32 ctrl[2]; - __le32 state[SIZE_IN_WORDS(AES_KEYSIZE_256 + AES_BLOCK_SIZE)]; + __le32 state[SIZE_IN_WORDS(AES_KEYSIZE_256 + AES_BLOCK_SIZE * 2)]; }; struct mtk_aes_reqctx { @@ -103,6 +131,20 @@ struct mtk_aes_ctr_ctx { struct scatterlist dst[2]; }; +struct mtk_aes_gcm_ctx { + struct mtk_aes_base_ctx base; + + u32 authsize; + size_t textlen; + + struct crypto_skcipher *ctr; +}; + +struct mtk_aes_gcm_setkey_result { + int err; + struct completion completion; +}; + struct mtk_aes_drv { struct list_head dev_list; /* Device list lock */ @@ -251,6 +293,10 @@ static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) } res->hdr |= MTK_DESC_LAST; + /* Prepare enough space for authenticated tag */ + if (aes->flags & AES_FLAGS_GCM) + res->hdr += AES_BLOCK_SIZE; + /* * Make sure that all changes to the DMA ring are done before we * start engine. @@ -737,6 +783,315 @@ static struct crypto_alg aes_algs[] = { }, }; +static inline struct mtk_aes_gcm_ctx * +mtk_aes_gcm_ctx_cast(struct mtk_aes_base_ctx *ctx) +{ + return container_of(ctx, struct mtk_aes_gcm_ctx, base); +} + +/* Initialize transform information of GCM mode */ +static void mtk_aes_gcm_info_init(struct mtk_cryp *cryp, + struct mtk_aes_rec *aes, + size_t len) +{ + struct aead_request *req = aead_request_cast(aes->areq); + struct mtk_aes_base_ctx *ctx = aes->ctx; + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); + const u32 *iv = (const u32 *)req->iv; + u32 *iv_state = ctx->tfm.state + ctx->keylen + + SIZE_IN_WORDS(AES_BLOCK_SIZE); + u32 ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(req)); + int i; + + ctx->ct_hdr = AES_CT_CTRL_HDR | len; + + ctx->ct.cmd[0] = AES_GCM_CMD0 | cpu_to_le32(req->assoclen); + ctx->ct.cmd[1] = AES_GCM_CMD1 | cpu_to_le32(req->assoclen); + ctx->ct.cmd[2] = AES_GCM_CMD2; + ctx->ct.cmd[3] = AES_GCM_CMD3 | cpu_to_le32(gctx->textlen); + + if (aes->flags & AES_FLAGS_ENCRYPT) { + ctx->ct.cmd[4] = AES_GCM_CMD4 | cpu_to_le32(gctx->authsize); + ctx->ct_size = AES_CT_SIZE_GCM_OUT; + ctx->tfm.ctrl[0] = AES_TFM_GCM_OUT; + } else { + ctx->ct.cmd[4] = AES_GCM_CMD5 | cpu_to_le32(gctx->authsize); + ctx->ct.cmd[5] = AES_GCM_CMD6 | cpu_to_le32(gctx->authsize); + ctx->ct_size = AES_CT_SIZE_GCM_IN; + ctx->tfm.ctrl[0] = AES_TFM_GCM_IN; + } + + if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128)) + ctx->tfm.ctrl[0] |= AES_TFM_128BITS; + else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256)) + ctx->tfm.ctrl[0] |= AES_TFM_256BITS; + else + ctx->tfm.ctrl[0] |= AES_TFM_192BITS; + + ctx->tfm.ctrl[0] |= AES_TFM_GHASH_DIG | AES_TFM_GHASH | + AES_TFM_SIZE(ctx->keylen + SIZE_IN_WORDS( + AES_BLOCK_SIZE + ivsize)); + ctx->tfm.ctrl[1] = AES_TFM_CTR_INIT | AES_TFM_IV_CTR_MODE | + AES_TFM_3IV | AES_TFM_ENC_HASH; + + for (i = 0; i < SIZE_IN_WORDS(ivsize); i++) + iv_state[i] = cpu_to_le32(iv[i]); +} + +static int mtk_aes_gcm_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, + struct scatterlist *src, struct scatterlist *dst, + size_t len) +{ + bool src_aligned, dst_aligned; + + aes->src.sg = src; + aes->dst.sg = dst; + aes->real_dst = dst; + + src_aligned = mtk_aes_check_aligned(src, len, &aes->src); + if (src == dst) + dst_aligned = src_aligned; + else + dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst); + + if (!src_aligned || !dst_aligned) { + if (aes->total > AES_BUF_SIZE) + return -ENOMEM; + + if (!src_aligned) { + sg_copy_to_buffer(src, sg_nents(src), aes->buf, len); + aes->src.sg = &aes->aligned_sg; + aes->src.nents = 1; + aes->src.remainder = 0; + } + + if (!dst_aligned) { + aes->dst.sg = &aes->aligned_sg; + aes->dst.nents = 1; + aes->dst.remainder = 0; + } + + sg_init_table(&aes->aligned_sg, 1); + sg_set_buf(&aes->aligned_sg, aes->buf, aes->total); + } + + mtk_aes_gcm_info_init(cryp, aes, len); + + return mtk_aes_map(cryp, aes); +} + +/* Todo: GMAC */ +static int mtk_aes_gcm_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(aes->ctx); + struct aead_request *req = aead_request_cast(aes->areq); + struct mtk_aes_reqctx *rctx = aead_request_ctx(req); + u32 len = req->assoclen + req->cryptlen; + + mtk_aes_set_mode(aes, rctx); + + if (aes->flags & AES_FLAGS_ENCRYPT) { + u32 tag[4]; + /* Compute total process length. */ + aes->total = len + gctx->authsize; + /* Compute text length. */ + gctx->textlen = req->cryptlen; + /* Hardware will append authenticated tag to output buffer */ + scatterwalk_map_and_copy(tag, req->dst, len, gctx->authsize, 1); + } else { + aes->total = len; + gctx->textlen = req->cryptlen - gctx->authsize; + } + aes->resume = mtk_aes_complete; + + return mtk_aes_gcm_dma(cryp, aes, req->src, req->dst, len); +} + +static int mtk_aes_gcm_crypt(struct aead_request *req, u64 mode) +{ + struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct mtk_aes_reqctx *rctx = aead_request_ctx(req); + + rctx->mode = AES_FLAGS_GCM | mode; + + return mtk_aes_handle_queue(ctx->cryp, !!(mode & AES_FLAGS_ENCRYPT), + &req->base); +} + +static void mtk_gcm_setkey_done(struct crypto_async_request *req, int err) +{ + struct mtk_aes_gcm_setkey_result *result = req->data; + + if (err == -EINPROGRESS) + return; + + result->err = err; + complete(&result->completion); +} + +/* + * Because of the hardware limitation, we need to pre-calculate key(H) + * for the GHASH operation. The result of the encryption operation + * need to be stored in the transform state buffer. + */ +static int mtk_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, + u32 keylen) +{ + struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead); + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); + struct crypto_skcipher *ctr = gctx->ctr; + struct { + u32 hash[4]; + u8 iv[8]; + + struct mtk_aes_gcm_setkey_result result; + + struct scatterlist sg[1]; + struct skcipher_request req; + } *data; + const u32 *aes_key; + u32 *key_state, *hash_state; + int err, i; + + if (keylen != AES_KEYSIZE_256 && + keylen != AES_KEYSIZE_192 && + keylen != AES_KEYSIZE_128) { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + key_state = ctx->tfm.state; + aes_key = (u32 *)key; + ctx->keylen = SIZE_IN_WORDS(keylen); + + for (i = 0; i < ctx->keylen; i++) + ctx->tfm.state[i] = cpu_to_le32(aes_key[i]); + + /* Same as crypto_gcm_setkey() from crypto/gcm.c */ + crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(ctr, key, keylen); + crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) & + CRYPTO_TFM_RES_MASK); + if (err) + return err; + + data = kzalloc(sizeof(*data) + crypto_skcipher_reqsize(ctr), + GFP_KERNEL); + if (!data) + return -ENOMEM; + + init_completion(&data->result.completion); + sg_init_one(data->sg, &data->hash, AES_BLOCK_SIZE); + skcipher_request_set_tfm(&data->req, ctr); + skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + mtk_gcm_setkey_done, &data->result); + skcipher_request_set_crypt(&data->req, data->sg, data->sg, + AES_BLOCK_SIZE, data->iv); + + err = crypto_skcipher_encrypt(&data->req); + if (err == -EINPROGRESS || err == -EBUSY) { + err = wait_for_completion_interruptible( + &data->result.completion); + if (!err) + err = data->result.err; + } + if (err) + goto out; + + hash_state = key_state + ctx->keylen; + + for (i = 0; i < 4; i++) + hash_state[i] = cpu_to_be32(data->hash[i]); +out: + kzfree(data); + return err; +} + +static int mtk_aes_gcm_setauthsize(struct crypto_aead *aead, + u32 authsize) +{ + struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead); + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); + + /* Same as crypto_gcm_authsize() from crypto/gcm.c */ + switch (authsize) { + case 8: + case 12: + case 16: + break; + default: + return -EINVAL; + } + + gctx->authsize = authsize; + return 0; +} + +static int mtk_aes_gcm_encrypt(struct aead_request *req) +{ + return mtk_aes_gcm_crypt(req, AES_FLAGS_ENCRYPT); +} + +static int mtk_aes_gcm_decrypt(struct aead_request *req) +{ + return mtk_aes_gcm_crypt(req, 0); +} + +static int mtk_aes_gcm_init(struct crypto_aead *aead) +{ + struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_aes_find_dev(&ctx->base); + if (!cryp) { + pr_err("can't find crypto device\n"); + return -ENODEV; + } + + ctx->ctr = crypto_alloc_skcipher("ctr(aes)", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->ctr)) { + pr_err("Error allocating ctr(aes)\n"); + return PTR_ERR(ctx->ctr); + } + + crypto_aead_set_reqsize(aead, sizeof(struct mtk_aes_reqctx)); + ctx->base.start = mtk_aes_gcm_start; + return 0; +} + +static void mtk_aes_gcm_exit(struct crypto_aead *aead) +{ + struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); + + crypto_free_skcipher(ctx->ctr); +} + +static struct aead_alg aes_gcm_alg = { + .setkey = mtk_aes_gcm_setkey, + .setauthsize = mtk_aes_gcm_setauthsize, + .encrypt = mtk_aes_gcm_encrypt, + .decrypt = mtk_aes_gcm_decrypt, + .init = mtk_aes_gcm_init, + .exit = mtk_aes_gcm_exit, + .ivsize = 12, + .maxauthsize = AES_BLOCK_SIZE, + + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct mtk_aes_gcm_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}; + static void mtk_aes_enc_task(unsigned long data) { struct mtk_cryp *cryp = (struct mtk_cryp *)data; @@ -851,6 +1206,8 @@ static void mtk_aes_unregister_algs(void) { int i; + crypto_unregister_aead(&aes_gcm_alg); + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) crypto_unregister_alg(&aes_algs[i]); } @@ -865,6 +1222,10 @@ static int mtk_aes_register_algs(void) goto err_aes_algs; } + err = crypto_register_aead(&aes_gcm_alg); + if (err) + goto err_aes_algs; + return 0; err_aes_algs: diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h index 7cd5f98d25a5..ed6d8717f7f4 100644 --- a/drivers/crypto/mediatek/mtk-platform.h +++ b/drivers/crypto/mediatek/mtk-platform.h @@ -13,8 +13,10 @@ #define __MTK_PLATFORM_H_ #include +#include #include #include +#include #include #include #include From 7dede913fc2ab9c0d3bff3a49e26fa9e858b0c13 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Fri, 20 Jan 2017 16:35:33 +0800 Subject: [PATCH 065/142] crypto: vmx - disable preemption to enable vsx in aes_ctr.c Some preemptible check warnings were reported from enable_kernel_vsx(). This patch disables preemption in aes_ctr.c before enabling vsx, and they are now consistent with other files in the same directory. Signed-off-by: Li Zhong Signed-off-by: Herbert Xu --- drivers/crypto/vmx/aes_ctr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 38ed10d761d0..7cf6d31c1123 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -80,11 +80,13 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key, int ret; struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); disable_kernel_vsx(); pagefault_enable(); + preempt_enable(); ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); return ret; @@ -99,11 +101,13 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx, u8 *dst = walk->dst.virt.addr; unsigned int nbytes = walk->nbytes; + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); disable_kernel_vsx(); pagefault_enable(); + preempt_enable(); crypto_xor(keystream, src, nbytes); memcpy(dst, keystream, nbytes); @@ -132,6 +136,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, @@ -143,6 +148,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, walk.iv); disable_kernel_vsx(); pagefault_enable(); + preempt_enable(); /* We need to update IV mostly for last bytes/round */ inc = (nbytes & AES_BLOCK_MASK) / AES_BLOCK_SIZE; From 379d972b81151b811ab769db5ab8da9c71bbfb00 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 26 Jan 2017 16:33:00 +0100 Subject: [PATCH 066/142] crypto: doc - Fix hash export state information The documentation states that crypto_ahash_reqsize() provides the size of the state structure used by crypto_ahash_export(). But it's actually crypto_ahash_statesize() which provides this size. Signed-off-by: Rabin Vincent Signed-off-by: Herbert Xu --- Documentation/crypto/api-digest.rst | 2 +- include/crypto/hash.h | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/Documentation/crypto/api-digest.rst b/Documentation/crypto/api-digest.rst index 07356fa99200..7a1e670d6ce1 100644 --- a/Documentation/crypto/api-digest.rst +++ b/Documentation/crypto/api-digest.rst @@ -14,7 +14,7 @@ Asynchronous Message Digest API :doc: Asynchronous Message Digest API .. kernel-doc:: include/crypto/hash.h - :functions: crypto_alloc_ahash crypto_free_ahash crypto_ahash_init crypto_ahash_digestsize crypto_ahash_reqtfm crypto_ahash_reqsize crypto_ahash_setkey crypto_ahash_finup crypto_ahash_final crypto_ahash_digest crypto_ahash_export crypto_ahash_import + :functions: crypto_alloc_ahash crypto_free_ahash crypto_ahash_init crypto_ahash_digestsize crypto_ahash_reqtfm crypto_ahash_reqsize crypto_ahash_statesize crypto_ahash_setkey crypto_ahash_finup crypto_ahash_final crypto_ahash_digest crypto_ahash_export crypto_ahash_import Asynchronous Hash Request Handle -------------------------------- diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 216a2b876147..b5727bcd2336 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -329,6 +329,16 @@ static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) return crypto_hash_alg_common(tfm)->digestsize; } +/** + * crypto_ahash_statesize() - obtain size of the ahash state + * @tfm: cipher handle + * + * Return the size of the ahash state. With the crypto_ahash_export() + * function, the caller can export the state into a buffer whose size is + * defined with this function. + * + * Return: size of the ahash state + */ static inline unsigned int crypto_ahash_statesize(struct crypto_ahash *tfm) { return crypto_hash_alg_common(tfm)->statesize; @@ -369,11 +379,7 @@ static inline struct crypto_ahash *crypto_ahash_reqtfm( * crypto_ahash_reqsize() - obtain size of the request data structure * @tfm: cipher handle * - * Return the size of the ahash state size. With the crypto_ahash_export - * function, the caller can export the state into a buffer whose size is - * defined with this function. - * - * Return: size of the ahash state + * Return: size of the request data */ static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm) { @@ -453,7 +459,7 @@ int crypto_ahash_digest(struct ahash_request *req); * * This function exports the hash state of the ahash_request handle into the * caller-allocated output buffer out which must have sufficient size (e.g. by - * calling crypto_ahash_reqsize). + * calling crypto_ahash_statesize()). * * Return: 0 if the export was successful; < 0 if an error occurred */ From 8340c7fd28d1fc0b559c84be8f0e7413d2716fc2 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:46 +0100 Subject: [PATCH 067/142] crypto: atmel-sha - create function to get an Atmel SHA device This is a transitional patch: it creates the atmel_sha_find_dev() function, which will be used in further patches to share the source code responsible for finding a Atmel SHA device. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 97e34799e077..33a36e667547 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -260,11 +260,8 @@ static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) } } -static int atmel_sha_init(struct ahash_request *req) +static struct atmel_sha_dev *atmel_sha_find_dev(struct atmel_sha_ctx *tctx) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm); - struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); struct atmel_sha_dev *dd = NULL; struct atmel_sha_dev *tmp; @@ -281,6 +278,16 @@ static int atmel_sha_init(struct ahash_request *req) spin_unlock_bh(&atmel_sha.lock); + return dd; +} + +static int atmel_sha_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct atmel_sha_dev *dd = atmel_sha_find_dev(tctx); + ctx->dd = dd; ctx->flags = 0; From a29af939b24dc98c11e1e8a77be7669c4e4f5719 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:47 +0100 Subject: [PATCH 068/142] crypto: atmel-sha - update request queue management to make it more generic This patch is a transitional patch. It splits the atmel_sha_handle_queue() function. Now atmel_sha_handle_queue() only manages the request queue and calls a new .start() hook from the atmel_sha_ctx structure. This hook allows to implement different kind of requests still handled by a single queue. Also when the req parameter of atmel_sha_handle_queue() refers to the very same request as the one returned by crypto_dequeue_request(), the queue management now gives a chance to this crypto request to be handled synchronously, hence reducing latencies. The .start() hook returns 0 if the crypto request was handled synchronously and -EINPROGRESS if the crypto request still need to be handled asynchronously. Besides, the new .is_async member of the atmel_sha_dev structure helps tagging this asynchronous state. Indeed, the req->base.complete() callback should not be called if the crypto request is handled synchronously. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha.c | 74 +++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 20 deletions(-) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 33a36e667547..2dbed8bb8d26 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -105,8 +105,11 @@ struct atmel_sha_reqctx { u8 buffer[SHA_BUFFER_LEN + SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); }; +typedef int (*atmel_sha_fn_t)(struct atmel_sha_dev *); + struct atmel_sha_ctx { struct atmel_sha_dev *dd; + atmel_sha_fn_t start; unsigned long flags; }; @@ -134,6 +137,7 @@ struct atmel_sha_dev { unsigned long flags; struct crypto_queue queue; struct ahash_request *req; + bool is_async; struct atmel_sha_dma dma_lch_in; @@ -163,6 +167,24 @@ static inline void atmel_sha_write(struct atmel_sha_dev *dd, writel_relaxed(value, dd->io_base + offset); } +static inline int atmel_sha_complete(struct atmel_sha_dev *dd, int err) +{ + struct ahash_request *req = dd->req; + + dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU | + SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY); + + clk_disable(dd->iclk); + + if (dd->is_async && req->base.complete) + req->base.complete(&req->base, err); + + /* handle new request */ + tasklet_schedule(&dd->queue_task); + + return err; +} + static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx) { size_t count; @@ -474,6 +496,8 @@ static void atmel_sha_dma_callback(void *data) { struct atmel_sha_dev *dd = data; + dd->is_async = true; + /* dma_lch_in - completed - wait DATRDY */ atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); } @@ -509,7 +533,7 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); } if (!in_desc) - return -EINVAL; + atmel_sha_complete(dd, -EINVAL); in_desc->callback = atmel_sha_dma_callback; in_desc->callback_param = dd; @@ -566,7 +590,7 @@ static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd, if (dma_mapping_error(dd->dev, ctx->dma_addr)) { dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen + ctx->block_size); - return -EINVAL; + atmel_sha_complete(dd, -EINVAL); } ctx->flags &= ~SHA_FLAGS_SG; @@ -657,7 +681,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) if (dma_mapping_error(dd->dev, ctx->dma_addr)) { dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen + ctx->block_size); - return -EINVAL; + atmel_sha_complete(dd, -EINVAL); } if (length == 0) { @@ -671,7 +695,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) { dev_err(dd->dev, "dma_map_sg error\n"); - return -EINVAL; + atmel_sha_complete(dd, -EINVAL); } ctx->flags |= SHA_FLAGS_SG; @@ -685,7 +709,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) { dev_err(dd->dev, "dma_map_sg error\n"); - return -EINVAL; + atmel_sha_complete(dd, -EINVAL); } ctx->flags |= SHA_FLAGS_SG; @@ -843,16 +867,7 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err) } /* atomic operation is not needed here */ - dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU | - SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY); - - clk_disable(dd->iclk); - - if (req->base.complete) - req->base.complete(&req->base, err); - - /* handle new request */ - tasklet_schedule(&dd->queue_task); + (void)atmel_sha_complete(dd, err); } static int atmel_sha_hw_init(struct atmel_sha_dev *dd) @@ -893,8 +908,9 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, struct ahash_request *req) { struct crypto_async_request *async_req, *backlog; - struct atmel_sha_reqctx *ctx; + struct atmel_sha_ctx *ctx; unsigned long flags; + bool start_async; int err = 0, ret = 0; spin_lock_irqsave(&dd->lock, flags); @@ -919,9 +935,22 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, if (backlog) backlog->complete(backlog, -EINPROGRESS); - req = ahash_request_cast(async_req); - dd->req = req; - ctx = ahash_request_ctx(req); + ctx = crypto_tfm_ctx(async_req->tfm); + + dd->req = ahash_request_cast(async_req); + start_async = (dd->req != req); + dd->is_async = start_async; + + /* WARNING: ctx->start() MAY change dd->is_async. */ + err = ctx->start(dd); + return (start_async) ? ret : err; +} + +static int atmel_sha_start(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + int err; dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n", ctx->op, req->nbytes); @@ -947,7 +976,7 @@ err1: dev_dbg(dd->dev, "exit, err: %d\n", err); - return ret; + return err; } static int atmel_sha_enqueue(struct ahash_request *req, unsigned int op) @@ -1043,8 +1072,11 @@ static int atmel_sha_import(struct ahash_request *req, const void *in) static int atmel_sha_cra_init(struct crypto_tfm *tfm) { + struct atmel_sha_ctx *ctx = crypto_tfm_ctx(tfm); + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct atmel_sha_reqctx)); + ctx->start = atmel_sha_start; return 0; } @@ -1188,6 +1220,8 @@ static void atmel_sha_done_task(unsigned long data) struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data; int err = 0; + dd->is_async = true; + if (SHA_FLAGS_CPU & dd->flags) { if (SHA_FLAGS_OUTPUT_READY & dd->flags) { dd->flags &= ~SHA_FLAGS_OUTPUT_READY; From b5ce82a7b4938f278fc6da28ce00da34e7a0773c Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:48 +0100 Subject: [PATCH 069/142] crypto: atmel-sha - make atmel_sha_done_task more generic This patch is a transitional patch. It updates atmel_sha_done_task() to make it more generic. Indeed, it adds a new .resume() member in the atmel_sha_dev structure. This hook is called from atmel_sha_done_task() to resume processing an asynchronous request. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 2dbed8bb8d26..643d79a05dda 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -138,6 +138,7 @@ struct atmel_sha_dev { struct crypto_queue queue; struct ahash_request *req; bool is_async; + atmel_sha_fn_t resume; struct atmel_sha_dma dma_lch_in; @@ -946,6 +947,8 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, return (start_async) ? ret : err; } +static int atmel_sha_done(struct atmel_sha_dev *dd); + static int atmel_sha_start(struct atmel_sha_dev *dd) { struct ahash_request *req = dd->req; @@ -960,6 +963,7 @@ static int atmel_sha_start(struct atmel_sha_dev *dd) if (err) goto err1; + dd->resume = atmel_sha_done; if (ctx->op == SHA_OP_UPDATE) { err = atmel_sha_update_req(dd); if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) @@ -1215,13 +1219,10 @@ static void atmel_sha_queue_task(unsigned long data) atmel_sha_handle_queue(dd, NULL); } -static void atmel_sha_done_task(unsigned long data) +static int atmel_sha_done(struct atmel_sha_dev *dd) { - struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data; int err = 0; - dd->is_async = true; - if (SHA_FLAGS_CPU & dd->flags) { if (SHA_FLAGS_OUTPUT_READY & dd->flags) { dd->flags &= ~SHA_FLAGS_OUTPUT_READY; @@ -1245,11 +1246,21 @@ static void atmel_sha_done_task(unsigned long data) goto finish; } } - return; + return err; finish: /* finish curent request */ atmel_sha_finish_req(dd->req, err); + + return err; +} + +static void atmel_sha_done_task(unsigned long data) +{ + struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data; + + dd->is_async = true; + (void)dd->resume(dd); } static irqreturn_t atmel_sha_irq(int irq, void *dev_id) From f07cebad63b28562d030eee8c762833eca50e46e Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:49 +0100 Subject: [PATCH 070/142] crypto: atmel-sha - redefine SHA_FLAGS_SHA* flags to match SHA_MR_ALGO_SHA* This patch modifies the SHA_FLAGS_SHA* flags: those algo flags are now organized as values of a single bitfield instead of individual bits. This allows to reduce the number of bits needed to encode all possible values. Also the new values match the SHA_MR_ALGO_SHA* values hence the algorithm bitfield of the SHA_MR register could simply be set with: mr = (mr & ~SHA_FLAGS_ALGO_MASK) | (ctx->flags & SHA_FLAGS_ALGO_MASK) Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha-regs.h | 1 + drivers/crypto/atmel-sha.c | 45 +++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h index e08897109cab..deb0b0b15096 100644 --- a/drivers/crypto/atmel-sha-regs.h +++ b/drivers/crypto/atmel-sha-regs.h @@ -19,6 +19,7 @@ #define SHA_MR_PROCDLY (1 << 4) #define SHA_MR_UIHV (1 << 5) #define SHA_MR_UIEHV (1 << 6) +#define SHA_MR_ALGO_MASK GENMASK(10, 8) #define SHA_MR_ALGO_SHA1 (0 << 8) #define SHA_MR_ALGO_SHA256 (1 << 8) #define SHA_MR_ALGO_SHA384 (2 << 8) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 643d79a05dda..b29a4e5bc404 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -51,14 +51,16 @@ #define SHA_FLAGS_CPU BIT(5) #define SHA_FLAGS_DMA_READY BIT(6) +/* bits[10:8] are reserved. */ +#define SHA_FLAGS_ALGO_MASK SHA_MR_ALGO_MASK +#define SHA_FLAGS_SHA1 SHA_MR_ALGO_SHA1 +#define SHA_FLAGS_SHA256 SHA_MR_ALGO_SHA256 +#define SHA_FLAGS_SHA384 SHA_MR_ALGO_SHA384 +#define SHA_FLAGS_SHA512 SHA_MR_ALGO_SHA512 +#define SHA_FLAGS_SHA224 SHA_MR_ALGO_SHA224 + #define SHA_FLAGS_FINUP BIT(16) #define SHA_FLAGS_SG BIT(17) -#define SHA_FLAGS_ALGO_MASK GENMASK(22, 18) -#define SHA_FLAGS_SHA1 BIT(18) -#define SHA_FLAGS_SHA224 BIT(19) -#define SHA_FLAGS_SHA256 BIT(20) -#define SHA_FLAGS_SHA384 BIT(21) -#define SHA_FLAGS_SHA512 BIT(22) #define SHA_FLAGS_ERROR BIT(23) #define SHA_FLAGS_PAD BIT(24) #define SHA_FLAGS_RESTORE BIT(25) @@ -264,7 +266,9 @@ static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) bits[1] = cpu_to_be64(size[0] << 3); bits[0] = cpu_to_be64(size[1] << 3 | size[0] >> 61); - if (ctx->flags & (SHA_FLAGS_SHA384 | SHA_FLAGS_SHA512)) { + switch (ctx->flags & SHA_FLAGS_ALGO_MASK) { + case SHA_FLAGS_SHA384: + case SHA_FLAGS_SHA512: index = ctx->bufcnt & 0x7f; padlen = (index < 112) ? (112 - index) : ((128+112) - index); *(ctx->buffer + ctx->bufcnt) = 0x80; @@ -272,7 +276,9 @@ static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) memcpy(ctx->buffer + ctx->bufcnt + padlen, bits, 16); ctx->bufcnt += padlen + 16; ctx->flags |= SHA_FLAGS_PAD; - } else { + break; + + default: index = ctx->bufcnt & 0x3f; padlen = (index < 56) ? (56 - index) : ((64+56) - index); *(ctx->buffer + ctx->bufcnt) = 0x80; @@ -280,6 +286,7 @@ static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) memcpy(ctx->buffer + ctx->bufcnt + padlen, &bits[1], 8); ctx->bufcnt += padlen + 8; ctx->flags |= SHA_FLAGS_PAD; + break; } } @@ -828,16 +835,28 @@ static void atmel_sha_copy_ready_hash(struct ahash_request *req) if (!req->result) return; - if (ctx->flags & SHA_FLAGS_SHA1) + switch (ctx->flags & SHA_FLAGS_ALGO_MASK) { + default: + case SHA_FLAGS_SHA1: memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE); - else if (ctx->flags & SHA_FLAGS_SHA224) + break; + + case SHA_FLAGS_SHA224: memcpy(req->result, ctx->digest, SHA224_DIGEST_SIZE); - else if (ctx->flags & SHA_FLAGS_SHA256) + break; + + case SHA_FLAGS_SHA256: memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE); - else if (ctx->flags & SHA_FLAGS_SHA384) + break; + + case SHA_FLAGS_SHA384: memcpy(req->result, ctx->digest, SHA384_DIGEST_SIZE); - else + break; + + case SHA_FLAGS_SHA512: memcpy(req->result, ctx->digest, SHA512_DIGEST_SIZE); + break; + } } static int atmel_sha_finish(struct ahash_request *req) From 9064ed92695b4d9d20e4d5d72fe72465eaa1c162 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:50 +0100 Subject: [PATCH 071/142] crypto: atmel-sha - add atmel_sha_wait_for_data_ready() This patch simply defines a helper function to test the 'Data Ready' flag of the Status Register. It also gives a chance for the crypto request to be processed synchronously if this 'Data Ready' flag is already set when polling the Status Register. Indeed, running synchronously avoid the latency of the 'Data Ready' interrupt. When the 'Data Ready' flag has not been set yet, we enable the associated interrupt and resume processing the crypto request asynchronously from the 'done' task just as before. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index b29a4e5bc404..be0d72cf4352 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -434,6 +434,19 @@ static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma) atmel_sha_write(dd, SHA_MR, valmr); } +static inline int atmel_sha_wait_for_data_ready(struct atmel_sha_dev *dd, + atmel_sha_fn_t resume) +{ + u32 isr = atmel_sha_read(dd, SHA_ISR); + + if (unlikely(isr & SHA_INT_DATARDY)) + return resume(dd); + + dd->resume = resume; + atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); + return -EINPROGRESS; +} + static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf, size_t length, int final) { From 563c47df79747412bf5d0fdb8fd24089d7316c2b Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:51 +0100 Subject: [PATCH 072/142] crypto: atmel-sha - add SHA_MR_MODE_IDATAR0 This patch defines an alias macro to SHA_MR_MODE_PDC, which is not suited for DMA usage. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha-regs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h index deb0b0b15096..8d62d31eda08 100644 --- a/drivers/crypto/atmel-sha-regs.h +++ b/drivers/crypto/atmel-sha-regs.h @@ -16,6 +16,7 @@ #define SHA_MR_MODE_MANUAL 0x0 #define SHA_MR_MODE_AUTO 0x1 #define SHA_MR_MODE_PDC 0x2 +#define SHA_MR_MODE_IDATAR0 0x2 #define SHA_MR_PROCDLY (1 << 4) #define SHA_MR_UIHV (1 << 5) #define SHA_MR_UIEHV (1 << 6) From eec12f66b02c3812252103d5efcb80754b04012c Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:52 +0100 Subject: [PATCH 073/142] crypto: atmel-sha - add atmel_sha_cpu_start() This patch adds a simple function to perform data transfer with PIO, hence handled by the CPU. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha.c | 90 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index be0d72cf4352..58d9ca8ac0f2 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -64,6 +64,8 @@ #define SHA_FLAGS_ERROR BIT(23) #define SHA_FLAGS_PAD BIT(24) #define SHA_FLAGS_RESTORE BIT(25) +#define SHA_FLAGS_IDATAR0 BIT(26) +#define SHA_FLAGS_WAIT_DATARDY BIT(27) #define SHA_OP_UPDATE 1 #define SHA_OP_FINAL 2 @@ -141,6 +143,7 @@ struct atmel_sha_dev { struct ahash_request *req; bool is_async; atmel_sha_fn_t resume; + atmel_sha_fn_t cpu_transfer_complete; struct atmel_sha_dma dma_lch_in; @@ -1317,6 +1320,93 @@ static irqreturn_t atmel_sha_irq(int irq, void *dev_id) return IRQ_NONE; } + +/* CPU transfer functions */ + +static int atmel_sha_cpu_transfer(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + const u32 *words = (const u32 *)ctx->buffer; + size_t i, num_words; + u32 isr, din, din_inc; + + din_inc = (ctx->flags & SHA_FLAGS_IDATAR0) ? 0 : 1; + for (;;) { + /* Write data into the Input Data Registers. */ + num_words = DIV_ROUND_UP(ctx->bufcnt, sizeof(u32)); + for (i = 0, din = 0; i < num_words; ++i, din += din_inc) + atmel_sha_write(dd, SHA_REG_DIN(din), words[i]); + + ctx->offset += ctx->bufcnt; + ctx->total -= ctx->bufcnt; + + if (!ctx->total) + break; + + /* + * Prepare next block: + * Fill ctx->buffer now with the next data to be written into + * IDATARx: it gives time for the SHA hardware to process + * the current data so the SHA_INT_DATARDY flag might be set + * in SHA_ISR when polling this register at the beginning of + * the next loop. + */ + ctx->bufcnt = min_t(size_t, ctx->block_size, ctx->total); + scatterwalk_map_and_copy(ctx->buffer, ctx->sg, + ctx->offset, ctx->bufcnt, 0); + + /* Wait for hardware to be ready again. */ + isr = atmel_sha_read(dd, SHA_ISR); + if (!(isr & SHA_INT_DATARDY)) { + /* Not ready yet. */ + dd->resume = atmel_sha_cpu_transfer; + atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); + return -EINPROGRESS; + } + } + + if (unlikely(!(ctx->flags & SHA_FLAGS_WAIT_DATARDY))) + return dd->cpu_transfer_complete(dd); + + return atmel_sha_wait_for_data_ready(dd, dd->cpu_transfer_complete); +} + +static int atmel_sha_cpu_start(struct atmel_sha_dev *dd, + struct scatterlist *sg, + unsigned int len, + bool idatar0_only, + bool wait_data_ready, + atmel_sha_fn_t resume) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + + if (!len) + return resume(dd); + + ctx->flags &= ~(SHA_FLAGS_IDATAR0 | SHA_FLAGS_WAIT_DATARDY); + + if (idatar0_only) + ctx->flags |= SHA_FLAGS_IDATAR0; + + if (wait_data_ready) + ctx->flags |= SHA_FLAGS_WAIT_DATARDY; + + ctx->sg = sg; + ctx->total = len; + ctx->offset = 0; + + /* Prepare the first block to be written. */ + ctx->bufcnt = min_t(size_t, ctx->block_size, ctx->total); + scatterwalk_map_and_copy(ctx->buffer, ctx->sg, + ctx->offset, ctx->bufcnt, 0); + + dd->cpu_transfer_complete = resume; + return atmel_sha_cpu_transfer(dd); +} + + static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd) { int i; From 69303cf0f1dcfb1ace2956e1c0a1aa3a5222ce4c Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:53 +0100 Subject: [PATCH 074/142] crypto: atmel-sha - add simple DMA transfers This patch adds a simple function to perform data transfer with the DMA controller. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha.c | 116 +++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 58d9ca8ac0f2..a4fc60b67099 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -123,6 +123,9 @@ struct atmel_sha_ctx { struct atmel_sha_dma { struct dma_chan *chan; struct dma_slave_config dma_conf; + struct scatterlist *sg; + int nents; + unsigned int last_sg_length; }; struct atmel_sha_dev { @@ -1321,6 +1324,119 @@ static irqreturn_t atmel_sha_irq(int irq, void *dev_id) } +/* DMA transfer functions */ + +static bool atmel_sha_dma_check_aligned(struct atmel_sha_dev *dd, + struct scatterlist *sg, + size_t len) +{ + struct atmel_sha_dma *dma = &dd->dma_lch_in; + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + size_t bs = ctx->block_size; + int nents; + + for (nents = 0; sg; sg = sg_next(sg), ++nents) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) + return false; + + /* + * This is the last sg, the only one that is allowed to + * have an unaligned length. + */ + if (len <= sg->length) { + dma->nents = nents + 1; + dma->last_sg_length = sg->length; + sg->length = ALIGN(len, sizeof(u32)); + return true; + } + + /* All other sg lengths MUST be aligned to the block size. */ + if (!IS_ALIGNED(sg->length, bs)) + return false; + + len -= sg->length; + } + + return false; +} + +static void atmel_sha_dma_callback2(void *data) +{ + struct atmel_sha_dev *dd = data; + struct atmel_sha_dma *dma = &dd->dma_lch_in; + struct scatterlist *sg; + int nents; + + dmaengine_terminate_all(dma->chan); + dma_unmap_sg(dd->dev, dma->sg, dma->nents, DMA_TO_DEVICE); + + sg = dma->sg; + for (nents = 0; nents < dma->nents - 1; ++nents) + sg = sg_next(sg); + sg->length = dma->last_sg_length; + + dd->is_async = true; + (void)atmel_sha_wait_for_data_ready(dd, dd->resume); +} + +static int atmel_sha_dma_start(struct atmel_sha_dev *dd, + struct scatterlist *src, + size_t len, + atmel_sha_fn_t resume) +{ + struct atmel_sha_dma *dma = &dd->dma_lch_in; + struct dma_slave_config *config = &dma->dma_conf; + struct dma_chan *chan = dma->chan; + struct dma_async_tx_descriptor *desc; + dma_cookie_t cookie; + unsigned int sg_len; + int err; + + dd->resume = resume; + + /* + * dma->nents has already been initialized by + * atmel_sha_dma_check_aligned(). + */ + dma->sg = src; + sg_len = dma_map_sg(dd->dev, dma->sg, dma->nents, DMA_TO_DEVICE); + if (!sg_len) { + err = -ENOMEM; + goto exit; + } + + config->src_maxburst = 16; + config->dst_maxburst = 16; + err = dmaengine_slave_config(chan, config); + if (err) + goto unmap_sg; + + desc = dmaengine_prep_slave_sg(chan, dma->sg, sg_len, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!desc) { + err = -ENOMEM; + goto unmap_sg; + } + + desc->callback = atmel_sha_dma_callback2; + desc->callback_param = dd; + cookie = dmaengine_submit(desc); + err = dma_submit_error(cookie); + if (err) + goto unmap_sg; + + dma_async_issue_pending(chan); + + return -EINPROGRESS; + +unmap_sg: + dma_unmap_sg(dd->dev, dma->sg, dma->nents, DMA_TO_DEVICE); +exit: + return atmel_sha_complete(dd, err); +} + + /* CPU transfer functions */ static int atmel_sha_cpu_transfer(struct atmel_sha_dev *dd) From 81d8750b2b59cd4e0fe4bf4fcf64e5348686c1f0 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:54 +0100 Subject: [PATCH 075/142] crypto: atmel-sha - add support to hmac(shaX) This patch adds support to the hmac(shaX) algorithms. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha-regs.h | 4 + drivers/crypto/atmel-sha.c | 598 +++++++++++++++++++++++++++++++- 2 files changed, 601 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h index 8d62d31eda08..1b9f3d33079e 100644 --- a/drivers/crypto/atmel-sha-regs.h +++ b/drivers/crypto/atmel-sha-regs.h @@ -26,6 +26,7 @@ #define SHA_MR_ALGO_SHA384 (2 << 8) #define SHA_MR_ALGO_SHA512 (3 << 8) #define SHA_MR_ALGO_SHA224 (4 << 8) +#define SHA_MR_HMAC (1 << 11) #define SHA_MR_DUALBUFF (1 << 16) #define SHA_IER 0x10 @@ -42,6 +43,9 @@ #define SHA_ISR_URAT_MR (0x2 << 12) #define SHA_ISR_URAT_WO (0x5 << 12) +#define SHA_MSR 0x20 +#define SHA_BCR 0x30 + #define SHA_HW_VERSION 0xFC #define SHA_TPR 0x108 diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index a4fc60b67099..78c3c02e4483 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -51,13 +51,20 @@ #define SHA_FLAGS_CPU BIT(5) #define SHA_FLAGS_DMA_READY BIT(6) -/* bits[10:8] are reserved. */ +/* bits[11:8] are reserved. */ #define SHA_FLAGS_ALGO_MASK SHA_MR_ALGO_MASK #define SHA_FLAGS_SHA1 SHA_MR_ALGO_SHA1 #define SHA_FLAGS_SHA256 SHA_MR_ALGO_SHA256 #define SHA_FLAGS_SHA384 SHA_MR_ALGO_SHA384 #define SHA_FLAGS_SHA512 SHA_MR_ALGO_SHA512 #define SHA_FLAGS_SHA224 SHA_MR_ALGO_SHA224 +#define SHA_FLAGS_HMAC SHA_MR_HMAC +#define SHA_FLAGS_HMAC_SHA1 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA1) +#define SHA_FLAGS_HMAC_SHA256 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA256) +#define SHA_FLAGS_HMAC_SHA384 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA384) +#define SHA_FLAGS_HMAC_SHA512 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA512) +#define SHA_FLAGS_HMAC_SHA224 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA224) +#define SHA_FLAGS_MODE_MASK (SHA_FLAGS_HMAC | SHA_FLAGS_ALGO_MASK) #define SHA_FLAGS_FINUP BIT(16) #define SHA_FLAGS_SG BIT(17) @@ -67,8 +74,10 @@ #define SHA_FLAGS_IDATAR0 BIT(26) #define SHA_FLAGS_WAIT_DATARDY BIT(27) +#define SHA_OP_INIT 0 #define SHA_OP_UPDATE 1 #define SHA_OP_FINAL 2 +#define SHA_OP_DIGEST 3 #define SHA_BUFFER_LEN (PAGE_SIZE / 16) @@ -80,6 +89,7 @@ struct atmel_sha_caps { bool has_sha224; bool has_sha_384_512; bool has_uihv; + bool has_hmac; }; struct atmel_sha_dev; @@ -105,6 +115,7 @@ struct atmel_sha_reqctx { unsigned int total; /* total request */ size_t block_size; + size_t hash_size; u8 buffer[SHA_BUFFER_LEN + SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); }; @@ -152,6 +163,8 @@ struct atmel_sha_dev { struct atmel_sha_caps caps; + struct scatterlist tmp; + u32 hw_version; }; @@ -1522,11 +1535,579 @@ static int atmel_sha_cpu_start(struct atmel_sha_dev *dd, return atmel_sha_cpu_transfer(dd); } +static int atmel_sha_cpu_hash(struct atmel_sha_dev *dd, + const void *data, unsigned int datalen, + bool auto_padding, + atmel_sha_fn_t resume) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + u32 msglen = (auto_padding) ? datalen : 0; + u32 mr = SHA_MR_MODE_AUTO; + + if (!(IS_ALIGNED(datalen, ctx->block_size) || auto_padding)) + return atmel_sha_complete(dd, -EINVAL); + + mr |= (ctx->flags & SHA_FLAGS_ALGO_MASK); + atmel_sha_write(dd, SHA_MR, mr); + atmel_sha_write(dd, SHA_MSR, msglen); + atmel_sha_write(dd, SHA_BCR, msglen); + atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST); + + sg_init_one(&dd->tmp, data, datalen); + return atmel_sha_cpu_start(dd, &dd->tmp, datalen, false, true, resume); +} + + +/* hmac functions */ + +struct atmel_sha_hmac_key { + bool valid; + unsigned int keylen; + u8 buffer[SHA512_BLOCK_SIZE]; + u8 *keydup; +}; + +static inline void atmel_sha_hmac_key_init(struct atmel_sha_hmac_key *hkey) +{ + memset(hkey, 0, sizeof(*hkey)); +} + +static inline void atmel_sha_hmac_key_release(struct atmel_sha_hmac_key *hkey) +{ + kfree(hkey->keydup); + memset(hkey, 0, sizeof(*hkey)); +} + +static inline int atmel_sha_hmac_key_set(struct atmel_sha_hmac_key *hkey, + const u8 *key, + unsigned int keylen) +{ + atmel_sha_hmac_key_release(hkey); + + if (keylen > sizeof(hkey->buffer)) { + hkey->keydup = kmemdup(key, keylen, GFP_KERNEL); + if (!hkey->keydup) + return -ENOMEM; + + } else { + memcpy(hkey->buffer, key, keylen); + } + + hkey->valid = true; + hkey->keylen = keylen; + return 0; +} + +static inline bool atmel_sha_hmac_key_get(const struct atmel_sha_hmac_key *hkey, + const u8 **key, + unsigned int *keylen) +{ + if (!hkey->valid) + return false; + + *keylen = hkey->keylen; + *key = (hkey->keydup) ? hkey->keydup : hkey->buffer; + return true; +} + + +struct atmel_sha_hmac_ctx { + struct atmel_sha_ctx base; + + struct atmel_sha_hmac_key hkey; + u32 ipad[SHA512_BLOCK_SIZE / sizeof(u32)]; + u32 opad[SHA512_BLOCK_SIZE / sizeof(u32)]; + atmel_sha_fn_t resume; +}; + +static int atmel_sha_hmac_setup(struct atmel_sha_dev *dd, + atmel_sha_fn_t resume); +static int atmel_sha_hmac_prehash_key(struct atmel_sha_dev *dd, + const u8 *key, unsigned int keylen); +static int atmel_sha_hmac_prehash_key_done(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_compute_ipad_hash(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_compute_opad_hash(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_setup_done(struct atmel_sha_dev *dd); + +static int atmel_sha_hmac_init_done(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_final(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_final_done(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_digest2(struct atmel_sha_dev *dd); + +static int atmel_sha_hmac_setup(struct atmel_sha_dev *dd, + atmel_sha_fn_t resume) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + unsigned int keylen; + const u8 *key; + size_t bs; + + hmac->resume = resume; + switch (ctx->flags & SHA_FLAGS_ALGO_MASK) { + case SHA_FLAGS_SHA1: + ctx->block_size = SHA1_BLOCK_SIZE; + ctx->hash_size = SHA1_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA224: + ctx->block_size = SHA224_BLOCK_SIZE; + ctx->hash_size = SHA256_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA256: + ctx->block_size = SHA256_BLOCK_SIZE; + ctx->hash_size = SHA256_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA384: + ctx->block_size = SHA384_BLOCK_SIZE; + ctx->hash_size = SHA512_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA512: + ctx->block_size = SHA512_BLOCK_SIZE; + ctx->hash_size = SHA512_DIGEST_SIZE; + break; + + default: + return atmel_sha_complete(dd, -EINVAL); + } + bs = ctx->block_size; + + if (likely(!atmel_sha_hmac_key_get(&hmac->hkey, &key, &keylen))) + return resume(dd); + + /* Compute K' from K. */ + if (unlikely(keylen > bs)) + return atmel_sha_hmac_prehash_key(dd, key, keylen); + + /* Prepare ipad. */ + memcpy((u8 *)hmac->ipad, key, keylen); + memset((u8 *)hmac->ipad + keylen, 0, bs - keylen); + return atmel_sha_hmac_compute_ipad_hash(dd); +} + +static int atmel_sha_hmac_prehash_key(struct atmel_sha_dev *dd, + const u8 *key, unsigned int keylen) +{ + return atmel_sha_cpu_hash(dd, key, keylen, true, + atmel_sha_hmac_prehash_key_done); +} + +static int atmel_sha_hmac_prehash_key_done(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + size_t ds = crypto_ahash_digestsize(tfm); + size_t bs = ctx->block_size; + size_t i, num_words = ds / sizeof(u32); + + /* Prepare ipad. */ + for (i = 0; i < num_words; ++i) + hmac->ipad[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i)); + memset((u8 *)hmac->ipad + ds, 0, bs - ds); + return atmel_sha_hmac_compute_ipad_hash(dd); +} + +static int atmel_sha_hmac_compute_ipad_hash(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + size_t bs = ctx->block_size; + size_t i, num_words = bs / sizeof(u32); + + memcpy(hmac->opad, hmac->ipad, bs); + for (i = 0; i < num_words; ++i) { + hmac->ipad[i] ^= 0x36363636; + hmac->opad[i] ^= 0x5c5c5c5c; + } + + return atmel_sha_cpu_hash(dd, hmac->ipad, bs, false, + atmel_sha_hmac_compute_opad_hash); +} + +static int atmel_sha_hmac_compute_opad_hash(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + size_t bs = ctx->block_size; + size_t hs = ctx->hash_size; + size_t i, num_words = hs / sizeof(u32); + + for (i = 0; i < num_words; ++i) + hmac->ipad[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i)); + return atmel_sha_cpu_hash(dd, hmac->opad, bs, false, + atmel_sha_hmac_setup_done); +} + +static int atmel_sha_hmac_setup_done(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + size_t hs = ctx->hash_size; + size_t i, num_words = hs / sizeof(u32); + + for (i = 0; i < num_words; ++i) + hmac->opad[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i)); + atmel_sha_hmac_key_release(&hmac->hkey); + return hmac->resume(dd); +} + +static int atmel_sha_hmac_start(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + int err; + + err = atmel_sha_hw_init(dd); + if (err) + return atmel_sha_complete(dd, err); + + switch (ctx->op) { + case SHA_OP_INIT: + err = atmel_sha_hmac_setup(dd, atmel_sha_hmac_init_done); + break; + + case SHA_OP_UPDATE: + dd->resume = atmel_sha_done; + err = atmel_sha_update_req(dd); + break; + + case SHA_OP_FINAL: + dd->resume = atmel_sha_hmac_final; + err = atmel_sha_final_req(dd); + break; + + case SHA_OP_DIGEST: + err = atmel_sha_hmac_setup(dd, atmel_sha_hmac_digest2); + break; + + default: + return atmel_sha_complete(dd, -EINVAL); + } + + return err; +} + +static int atmel_sha_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + + if (atmel_sha_hmac_key_set(&hmac->hkey, key, keylen)) { + crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + return 0; +} + +static int atmel_sha_hmac_init(struct ahash_request *req) +{ + int err; + + err = atmel_sha_init(req); + if (err) + return err; + + return atmel_sha_enqueue(req, SHA_OP_INIT); +} + +static int atmel_sha_hmac_init_done(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + size_t bs = ctx->block_size; + size_t hs = ctx->hash_size; + + ctx->bufcnt = 0; + ctx->digcnt[0] = bs; + ctx->digcnt[1] = 0; + ctx->flags |= SHA_FLAGS_RESTORE; + memcpy(ctx->digest, hmac->ipad, hs); + return atmel_sha_complete(dd, 0); +} + +static int atmel_sha_hmac_final(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + u32 *digest = (u32 *)ctx->digest; + size_t ds = crypto_ahash_digestsize(tfm); + size_t bs = ctx->block_size; + size_t hs = ctx->hash_size; + size_t i, num_words; + u32 mr; + + /* Save d = SHA((K' + ipad) | msg). */ + num_words = ds / sizeof(u32); + for (i = 0; i < num_words; ++i) + digest[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i)); + + /* Restore context to finish computing SHA((K' + opad) | d). */ + atmel_sha_write(dd, SHA_CR, SHA_CR_WUIHV); + num_words = hs / sizeof(u32); + for (i = 0; i < num_words; ++i) + atmel_sha_write(dd, SHA_REG_DIN(i), hmac->opad[i]); + + mr = SHA_MR_MODE_AUTO | SHA_MR_UIHV; + mr |= (ctx->flags & SHA_FLAGS_ALGO_MASK); + atmel_sha_write(dd, SHA_MR, mr); + atmel_sha_write(dd, SHA_MSR, bs + ds); + atmel_sha_write(dd, SHA_BCR, ds); + atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST); + + sg_init_one(&dd->tmp, digest, ds); + return atmel_sha_cpu_start(dd, &dd->tmp, ds, false, true, + atmel_sha_hmac_final_done); +} + +static int atmel_sha_hmac_final_done(struct atmel_sha_dev *dd) +{ + /* + * req->result might not be sizeof(u32) aligned, so copy the + * digest into ctx->digest[] before memcpy() the data into + * req->result. + */ + atmel_sha_copy_hash(dd->req); + atmel_sha_copy_ready_hash(dd->req); + return atmel_sha_complete(dd, 0); +} + +static int atmel_sha_hmac_digest(struct ahash_request *req) +{ + int err; + + err = atmel_sha_init(req); + if (err) + return err; + + return atmel_sha_enqueue(req, SHA_OP_DIGEST); +} + +static int atmel_sha_hmac_digest2(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + size_t hs = ctx->hash_size; + size_t i, num_words = hs / sizeof(u32); + bool use_dma = false; + u32 mr; + + /* Special case for empty message. */ + if (!req->nbytes) + return atmel_sha_complete(dd, -EINVAL); // TODO: + + /* Check DMA threshold and alignment. */ + if (req->nbytes > ATMEL_SHA_DMA_THRESHOLD && + atmel_sha_dma_check_aligned(dd, req->src, req->nbytes)) + use_dma = true; + + /* Write both initial hash values to compute a HMAC. */ + atmel_sha_write(dd, SHA_CR, SHA_CR_WUIHV); + for (i = 0; i < num_words; ++i) + atmel_sha_write(dd, SHA_REG_DIN(i), hmac->ipad[i]); + + atmel_sha_write(dd, SHA_CR, SHA_CR_WUIEHV); + for (i = 0; i < num_words; ++i) + atmel_sha_write(dd, SHA_REG_DIN(i), hmac->opad[i]); + + /* Write the Mode, Message Size, Bytes Count then Control Registers. */ + mr = (SHA_MR_HMAC | SHA_MR_DUALBUFF); + mr |= ctx->flags & SHA_FLAGS_ALGO_MASK; + if (use_dma) + mr |= SHA_MR_MODE_IDATAR0; + else + mr |= SHA_MR_MODE_AUTO; + atmel_sha_write(dd, SHA_MR, mr); + + atmel_sha_write(dd, SHA_MSR, req->nbytes); + atmel_sha_write(dd, SHA_BCR, req->nbytes); + + atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST); + + /* Process data. */ + if (use_dma) + return atmel_sha_dma_start(dd, req->src, req->nbytes, + atmel_sha_hmac_final_done); + + return atmel_sha_cpu_start(dd, req->src, req->nbytes, false, true, + atmel_sha_hmac_final_done); +} + +static int atmel_sha_hmac_cra_init(struct crypto_tfm *tfm) +{ + struct atmel_sha_hmac_ctx *hmac = crypto_tfm_ctx(tfm); + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct atmel_sha_reqctx)); + hmac->base.start = atmel_sha_hmac_start; + atmel_sha_hmac_key_init(&hmac->hkey); + + return 0; +} + +static void atmel_sha_hmac_cra_exit(struct crypto_tfm *tfm) +{ + struct atmel_sha_hmac_ctx *hmac = crypto_tfm_ctx(tfm); + + atmel_sha_hmac_key_release(&hmac->hkey); +} + +static struct ahash_alg sha_hmac_algs[] = { +{ + .init = atmel_sha_hmac_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .digest = atmel_sha_hmac_digest, + .setkey = atmel_sha_hmac_setkey, + .export = atmel_sha_export, + .import = atmel_sha_import, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct atmel_sha_reqctx), + .base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "atmel-hmac-sha1", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_hmac_cra_init, + .cra_exit = atmel_sha_hmac_cra_exit, + } + } +}, +{ + .init = atmel_sha_hmac_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .digest = atmel_sha_hmac_digest, + .setkey = atmel_sha_hmac_setkey, + .export = atmel_sha_export, + .import = atmel_sha_import, + .halg = { + .digestsize = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct atmel_sha_reqctx), + .base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "atmel-hmac-sha224", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_hmac_cra_init, + .cra_exit = atmel_sha_hmac_cra_exit, + } + } +}, +{ + .init = atmel_sha_hmac_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .digest = atmel_sha_hmac_digest, + .setkey = atmel_sha_hmac_setkey, + .export = atmel_sha_export, + .import = atmel_sha_import, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct atmel_sha_reqctx), + .base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "atmel-hmac-sha256", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_hmac_cra_init, + .cra_exit = atmel_sha_hmac_cra_exit, + } + } +}, +{ + .init = atmel_sha_hmac_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .digest = atmel_sha_hmac_digest, + .setkey = atmel_sha_hmac_setkey, + .export = atmel_sha_export, + .import = atmel_sha_import, + .halg = { + .digestsize = SHA384_DIGEST_SIZE, + .statesize = sizeof(struct atmel_sha_reqctx), + .base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "atmel-hmac-sha384", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_hmac_cra_init, + .cra_exit = atmel_sha_hmac_cra_exit, + } + } +}, +{ + .init = atmel_sha_hmac_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .digest = atmel_sha_hmac_digest, + .setkey = atmel_sha_hmac_setkey, + .export = atmel_sha_export, + .import = atmel_sha_import, + .halg = { + .digestsize = SHA512_DIGEST_SIZE, + .statesize = sizeof(struct atmel_sha_reqctx), + .base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "atmel-hmac-sha512", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_hmac_cra_init, + .cra_exit = atmel_sha_hmac_cra_exit, + } + } +}, +}; static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd) { int i; + if (dd->caps.has_hmac) + for (i = 0; i < ARRAY_SIZE(sha_hmac_algs); i++) + crypto_unregister_ahash(&sha_hmac_algs[i]); + for (i = 0; i < ARRAY_SIZE(sha_1_256_algs); i++) crypto_unregister_ahash(&sha_1_256_algs[i]); @@ -1563,8 +2144,21 @@ static int atmel_sha_register_algs(struct atmel_sha_dev *dd) } } + if (dd->caps.has_hmac) { + for (i = 0; i < ARRAY_SIZE(sha_hmac_algs); i++) { + err = crypto_register_ahash(&sha_hmac_algs[i]); + if (err) + goto err_sha_hmac_algs; + } + } + return 0; + /*i = ARRAY_SIZE(sha_hmac_algs);*/ +err_sha_hmac_algs: + for (j = 0; j < i; j++) + crypto_unregister_ahash(&sha_hmac_algs[j]); + i = ARRAY_SIZE(sha_384_512_algs); err_sha_384_512_algs: for (j = 0; j < i; j++) crypto_unregister_ahash(&sha_384_512_algs[j]); @@ -1634,6 +2228,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd) dd->caps.has_sha224 = 0; dd->caps.has_sha_384_512 = 0; dd->caps.has_uihv = 0; + dd->caps.has_hmac = 0; /* keep only major version number */ switch (dd->hw_version & 0xff0) { @@ -1643,6 +2238,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd) dd->caps.has_sha224 = 1; dd->caps.has_sha_384_512 = 1; dd->caps.has_uihv = 1; + dd->caps.has_hmac = 1; break; case 0x420: dd->caps.has_dma = 1; From a1f613f167a36610d238b66f5e49bfdb1d04aa89 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:55 +0100 Subject: [PATCH 076/142] crypto: atmel-aes - fix atmel_aes_handle_queue() This patch fixes the value returned by atmel_aes_handle_queue(), which could have been wrong previously when the crypto request was started synchronously but became asynchronous during the ctx->start() call. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-aes.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 0e3d0d655b96..9fd2f63b8bc0 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -879,6 +879,7 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd, struct crypto_async_request *areq, *backlog; struct atmel_aes_base_ctx *ctx; unsigned long flags; + bool start_async; int err, ret = 0; spin_lock_irqsave(&dd->lock, flags); @@ -904,10 +905,12 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd, dd->areq = areq; dd->ctx = ctx; - dd->is_async = (areq != new_areq); + start_async = (areq != new_areq); + dd->is_async = start_async; + /* WARNING: ctx->start() MAY change dd->is_async. */ err = ctx->start(dd); - return (dd->is_async) ? ret : err; + return (start_async) ? ret : err; } From 89a82ef87e012061989fcaf7dd51d706ff2090e3 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:56 +0100 Subject: [PATCH 077/142] crypto: atmel-authenc - add support to authenc(hmac(shaX), Y(aes)) modes This patchs allows to combine the AES and SHA hardware accelerators on some Atmel SoCs. Doing so, AES blocks are only written to/read from the AES hardware. Those blocks are also transferred from the AES to the SHA accelerator internally, without additionnal accesses to the system busses. Hence, the AES and SHA accelerators work in parallel to process all the data blocks, instead of serializing the process by (de)crypting those blocks first then authenticating them after like the generic crypto/authenc.c driver does. Of course, both the AES and SHA hardware accelerators need to be available before we can start to process the data blocks. Hence we use their crypto request queue to synchronize both drivers. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 12 + drivers/crypto/atmel-aes-regs.h | 16 ++ drivers/crypto/atmel-aes.c | 448 +++++++++++++++++++++++++++++++- drivers/crypto/atmel-authenc.h | 64 +++++ drivers/crypto/atmel-sha-regs.h | 14 + drivers/crypto/atmel-sha.c | 344 +++++++++++++++++++++++- 6 files changed, 883 insertions(+), 15 deletions(-) create mode 100644 drivers/crypto/atmel-authenc.h diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index bf7da55cffe6..74824612d3e9 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -415,6 +415,18 @@ config CRYPTO_DEV_BFIN_CRC Newer Blackfin processors have CRC hardware. Select this if you want to use the Blackfin CRC module. +config CRYPTO_DEV_ATMEL_AUTHENC + tristate "Support for Atmel IPSEC/SSL hw accelerator" + depends on (ARCH_AT91 && HAS_DMA) || COMPILE_TEST + select CRYPTO_AUTHENC + select CRYPTO_DEV_ATMEL_AES + select CRYPTO_DEV_ATMEL_SHA + help + Some Atmel processors can combine the AES and SHA hw accelerators + to enhance support of IPSEC/SSL. + Select this if you want to use the Atmel modules for + authenc(hmac(shaX),Y(cbc)) algorithms. + config CRYPTO_DEV_ATMEL_AES tristate "Support for Atmel AES hw accelerator" depends on HAS_DMA diff --git a/drivers/crypto/atmel-aes-regs.h b/drivers/crypto/atmel-aes-regs.h index 0ec04407b533..7694679802b3 100644 --- a/drivers/crypto/atmel-aes-regs.h +++ b/drivers/crypto/atmel-aes-regs.h @@ -68,6 +68,22 @@ #define AES_CTRR 0x98 #define AES_GCMHR(x) (0x9c + ((x) * 0x04)) +#define AES_EMR 0xb0 +#define AES_EMR_APEN BIT(0) /* Auto Padding Enable */ +#define AES_EMR_APM BIT(1) /* Auto Padding Mode */ +#define AES_EMR_APM_IPSEC 0x0 +#define AES_EMR_APM_SSL BIT(1) +#define AES_EMR_PLIPEN BIT(4) /* PLIP Enable */ +#define AES_EMR_PLIPD BIT(5) /* PLIP Decipher */ +#define AES_EMR_PADLEN_MASK (0xFu << 8) +#define AES_EMR_PADLEN_OFFSET 8 +#define AES_EMR_PADLEN(padlen) (((padlen) << AES_EMR_PADLEN_OFFSET) &\ + AES_EMR_PADLEN_MASK) +#define AES_EMR_NHEAD_MASK (0xFu << 16) +#define AES_EMR_NHEAD_OFFSET 16 +#define AES_EMR_NHEAD(nhead) (((nhead) << AES_EMR_NHEAD_OFFSET) &\ + AES_EMR_NHEAD_MASK) + #define AES_TWR(x) (0xc0 + ((x) * 0x04)) #define AES_ALPHAR(x) (0xd0 + ((x) * 0x04)) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 9fd2f63b8bc0..29e20c37f3a6 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -41,6 +41,7 @@ #include #include #include "atmel-aes-regs.h" +#include "atmel-authenc.h" #define ATMEL_AES_PRIORITY 300 @@ -78,6 +79,7 @@ #define AES_FLAGS_INIT BIT(2) #define AES_FLAGS_BUSY BIT(3) #define AES_FLAGS_DUMP_REG BIT(4) +#define AES_FLAGS_OWN_SHA BIT(5) #define AES_FLAGS_PERSISTENT (AES_FLAGS_INIT | AES_FLAGS_BUSY) @@ -92,6 +94,7 @@ struct atmel_aes_caps { bool has_ctr32; bool has_gcm; bool has_xts; + bool has_authenc; u32 max_burst_size; }; @@ -144,10 +147,31 @@ struct atmel_aes_xts_ctx { u32 key2[AES_KEYSIZE_256 / sizeof(u32)]; }; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC +struct atmel_aes_authenc_ctx { + struct atmel_aes_base_ctx base; + struct atmel_sha_authenc_ctx *auth; +}; +#endif + struct atmel_aes_reqctx { unsigned long mode; }; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC +struct atmel_aes_authenc_reqctx { + struct atmel_aes_reqctx base; + + struct scatterlist src[2]; + struct scatterlist dst[2]; + size_t textlen; + u32 digest[SHA512_DIGEST_SIZE / sizeof(u32)]; + + /* auth_req MUST be place last. */ + struct ahash_request auth_req; +}; +#endif + struct atmel_aes_dma { struct dma_chan *chan; struct scatterlist *sg; @@ -291,6 +315,9 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz) snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2); break; + case AES_EMR: + return "EMR"; + case AES_TWR(0): case AES_TWR(1): case AES_TWR(2): @@ -463,8 +490,16 @@ static inline bool atmel_aes_is_encrypt(const struct atmel_aes_dev *dd) return (dd->flags & AES_FLAGS_ENCRYPT); } +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC +static void atmel_aes_authenc_complete(struct atmel_aes_dev *dd, int err); +#endif + static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err) { +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + atmel_aes_authenc_complete(dd, err); +#endif + clk_disable(dd->iclk); dd->flags &= ~AES_FLAGS_BUSY; @@ -1931,6 +1966,384 @@ static struct crypto_alg aes_xts_alg = { } }; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC +/* authenc aead functions */ + +static int atmel_aes_authenc_start(struct atmel_aes_dev *dd); +static int atmel_aes_authenc_init(struct atmel_aes_dev *dd, int err, + bool is_async); +static int atmel_aes_authenc_transfer(struct atmel_aes_dev *dd, int err, + bool is_async); +static int atmel_aes_authenc_digest(struct atmel_aes_dev *dd); +static int atmel_aes_authenc_final(struct atmel_aes_dev *dd, int err, + bool is_async); + +static void atmel_aes_authenc_complete(struct atmel_aes_dev *dd, int err) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + + if (err && (dd->flags & AES_FLAGS_OWN_SHA)) + atmel_sha_authenc_abort(&rctx->auth_req); + dd->flags &= ~AES_FLAGS_OWN_SHA; +} + +static int atmel_aes_authenc_start(struct atmel_aes_dev *dd) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm); + int err; + + atmel_aes_set_mode(dd, &rctx->base); + + err = atmel_aes_hw_init(dd); + if (err) + return atmel_aes_complete(dd, err); + + return atmel_sha_authenc_schedule(&rctx->auth_req, ctx->auth, + atmel_aes_authenc_init, dd); +} + +static int atmel_aes_authenc_init(struct atmel_aes_dev *dd, int err, + bool is_async) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + + if (is_async) + dd->is_async = true; + if (err) + return atmel_aes_complete(dd, err); + + /* If here, we've got the ownership of the SHA device. */ + dd->flags |= AES_FLAGS_OWN_SHA; + + /* Configure the SHA device. */ + return atmel_sha_authenc_init(&rctx->auth_req, + req->src, req->assoclen, + rctx->textlen, + atmel_aes_authenc_transfer, dd); +} + +static int atmel_aes_authenc_transfer(struct atmel_aes_dev *dd, int err, + bool is_async) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + bool enc = atmel_aes_is_encrypt(dd); + struct scatterlist *src, *dst; + u32 iv[AES_BLOCK_SIZE / sizeof(u32)]; + u32 emr; + + if (is_async) + dd->is_async = true; + if (err) + return atmel_aes_complete(dd, err); + + /* Prepare src and dst scatter-lists to transfer cipher/plain texts. */ + src = scatterwalk_ffwd(rctx->src, req->src, req->assoclen); + dst = src; + + if (req->src != req->dst) + dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen); + + /* Configure the AES device. */ + memcpy(iv, req->iv, sizeof(iv)); + + /* + * Here we always set the 2nd parameter of atmel_aes_write_ctrl() to + * 'true' even if the data transfer is actually performed by the CPU (so + * not by the DMA) because we must force the AES_MR_SMOD bitfield to the + * value AES_MR_SMOD_IDATAR0. Indeed, both AES_MR_SMOD and SHA_MR_SMOD + * must be set to *_MR_SMOD_IDATAR0. + */ + atmel_aes_write_ctrl(dd, true, iv); + emr = AES_EMR_PLIPEN; + if (!enc) + emr |= AES_EMR_PLIPD; + atmel_aes_write(dd, AES_EMR, emr); + + /* Transfer data. */ + return atmel_aes_dma_start(dd, src, dst, rctx->textlen, + atmel_aes_authenc_digest); +} + +static int atmel_aes_authenc_digest(struct atmel_aes_dev *dd) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + + /* atmel_sha_authenc_final() releases the SHA device. */ + dd->flags &= ~AES_FLAGS_OWN_SHA; + return atmel_sha_authenc_final(&rctx->auth_req, + rctx->digest, sizeof(rctx->digest), + atmel_aes_authenc_final, dd); +} + +static int atmel_aes_authenc_final(struct atmel_aes_dev *dd, int err, + bool is_async) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + bool enc = atmel_aes_is_encrypt(dd); + u32 idigest[SHA512_DIGEST_SIZE / sizeof(u32)], *odigest = rctx->digest; + u32 offs, authsize; + + if (is_async) + dd->is_async = true; + if (err) + goto complete; + + offs = req->assoclen + rctx->textlen; + authsize = crypto_aead_authsize(tfm); + if (enc) { + scatterwalk_map_and_copy(odigest, req->dst, offs, authsize, 1); + } else { + scatterwalk_map_and_copy(idigest, req->src, offs, authsize, 0); + if (crypto_memneq(idigest, odigest, authsize)) + err = -EBADMSG; + } + +complete: + return atmel_aes_complete(dd, err); +} + +static int atmel_aes_authenc_setkey(struct crypto_aead *tfm, const u8 *key, + unsigned int keylen) +{ + struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm); + struct crypto_authenc_keys keys; + u32 flags; + int err; + + if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) + goto badkey; + + if (keys.enckeylen > sizeof(ctx->base.key)) + goto badkey; + + /* Save auth key. */ + flags = crypto_aead_get_flags(tfm); + err = atmel_sha_authenc_setkey(ctx->auth, + keys.authkey, keys.authkeylen, + &flags); + crypto_aead_set_flags(tfm, flags & CRYPTO_TFM_RES_MASK); + if (err) { + memzero_explicit(&keys, sizeof(keys)); + return err; + } + + /* Save enc key. */ + ctx->base.keylen = keys.enckeylen; + memcpy(ctx->base.key, keys.enckey, keys.enckeylen); + + memzero_explicit(&keys, sizeof(keys)); + return 0; + +badkey: + crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + memzero_explicit(&key, sizeof(keys)); + return -EINVAL; +} + +static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm, + unsigned long auth_mode) +{ + struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm); + unsigned int auth_reqsize = atmel_sha_authenc_get_reqsize(); + + ctx->auth = atmel_sha_authenc_spawn(auth_mode); + if (IS_ERR(ctx->auth)) + return PTR_ERR(ctx->auth); + + crypto_aead_set_reqsize(tfm, (sizeof(struct atmel_aes_authenc_reqctx) + + auth_reqsize)); + ctx->base.start = atmel_aes_authenc_start; + + return 0; +} + +static int atmel_aes_authenc_hmac_sha1_init_tfm(struct crypto_aead *tfm) +{ + return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA1); +} + +static int atmel_aes_authenc_hmac_sha224_init_tfm(struct crypto_aead *tfm) +{ + return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA224); +} + +static int atmel_aes_authenc_hmac_sha256_init_tfm(struct crypto_aead *tfm) +{ + return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA256); +} + +static int atmel_aes_authenc_hmac_sha384_init_tfm(struct crypto_aead *tfm) +{ + return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA384); +} + +static int atmel_aes_authenc_hmac_sha512_init_tfm(struct crypto_aead *tfm) +{ + return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA512); +} + +static void atmel_aes_authenc_exit_tfm(struct crypto_aead *tfm) +{ + struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm); + + atmel_sha_authenc_free(ctx->auth); +} + +static int atmel_aes_authenc_crypt(struct aead_request *req, + unsigned long mode) +{ + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct atmel_aes_base_ctx *ctx = crypto_aead_ctx(tfm); + u32 authsize = crypto_aead_authsize(tfm); + bool enc = (mode & AES_FLAGS_ENCRYPT); + struct atmel_aes_dev *dd; + + /* Compute text length. */ + if (!enc && req->cryptlen < authsize) + return -EINVAL; + rctx->textlen = req->cryptlen - (enc ? 0 : authsize); + + /* + * Currently, empty messages are not supported yet: + * the SHA auto-padding can be used only on non-empty messages. + * Hence a special case needs to be implemented for empty message. + */ + if (!rctx->textlen && !req->assoclen) + return -EINVAL; + + rctx->base.mode = mode; + ctx->block_size = AES_BLOCK_SIZE; + + dd = atmel_aes_find_dev(ctx); + if (!dd) + return -ENODEV; + + return atmel_aes_handle_queue(dd, &req->base); +} + +static int atmel_aes_authenc_cbc_aes_encrypt(struct aead_request *req) +{ + return atmel_aes_authenc_crypt(req, AES_FLAGS_CBC | AES_FLAGS_ENCRYPT); +} + +static int atmel_aes_authenc_cbc_aes_decrypt(struct aead_request *req) +{ + return atmel_aes_authenc_crypt(req, AES_FLAGS_CBC); +} + +static struct aead_alg aes_authenc_algs[] = { +{ + .setkey = atmel_aes_authenc_setkey, + .encrypt = atmel_aes_authenc_cbc_aes_encrypt, + .decrypt = atmel_aes_authenc_cbc_aes_decrypt, + .init = atmel_aes_authenc_hmac_sha1_init_tfm, + .exit = atmel_aes_authenc_exit_tfm, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "atmel-authenc-hmac-sha1-cbc-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_authenc_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}, +{ + .setkey = atmel_aes_authenc_setkey, + .encrypt = atmel_aes_authenc_cbc_aes_encrypt, + .decrypt = atmel_aes_authenc_cbc_aes_decrypt, + .init = atmel_aes_authenc_hmac_sha224_init_tfm, + .exit = atmel_aes_authenc_exit_tfm, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + + .base = { + .cra_name = "authenc(hmac(sha224),cbc(aes))", + .cra_driver_name = "atmel-authenc-hmac-sha224-cbc-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_authenc_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}, +{ + .setkey = atmel_aes_authenc_setkey, + .encrypt = atmel_aes_authenc_cbc_aes_encrypt, + .decrypt = atmel_aes_authenc_cbc_aes_decrypt, + .init = atmel_aes_authenc_hmac_sha256_init_tfm, + .exit = atmel_aes_authenc_exit_tfm, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "atmel-authenc-hmac-sha256-cbc-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_authenc_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}, +{ + .setkey = atmel_aes_authenc_setkey, + .encrypt = atmel_aes_authenc_cbc_aes_encrypt, + .decrypt = atmel_aes_authenc_cbc_aes_decrypt, + .init = atmel_aes_authenc_hmac_sha384_init_tfm, + .exit = atmel_aes_authenc_exit_tfm, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, + + .base = { + .cra_name = "authenc(hmac(sha384),cbc(aes))", + .cra_driver_name = "atmel-authenc-hmac-sha384-cbc-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_authenc_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}, +{ + .setkey = atmel_aes_authenc_setkey, + .encrypt = atmel_aes_authenc_cbc_aes_encrypt, + .decrypt = atmel_aes_authenc_cbc_aes_decrypt, + .init = atmel_aes_authenc_hmac_sha512_init_tfm, + .exit = atmel_aes_authenc_exit_tfm, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, + + .base = { + .cra_name = "authenc(hmac(sha512),cbc(aes))", + .cra_driver_name = "atmel-authenc-hmac-sha512-cbc-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_authenc_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}, +}; +#endif /* CONFIG_CRYPTO_DEV_ATMEL_AUTHENC */ /* Probe functions */ @@ -2040,6 +2453,12 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd) { int i; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + if (dd->caps.has_authenc) + for (i = 0; i < ARRAY_SIZE(aes_authenc_algs); i++) + crypto_unregister_aead(&aes_authenc_algs[i]); +#endif + if (dd->caps.has_xts) crypto_unregister_alg(&aes_xts_alg); @@ -2081,8 +2500,25 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd) goto err_aes_xts_alg; } +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + if (dd->caps.has_authenc) { + for (i = 0; i < ARRAY_SIZE(aes_authenc_algs); i++) { + err = crypto_register_aead(&aes_authenc_algs[i]); + if (err) + goto err_aes_authenc_alg; + } + } +#endif + return 0; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + /* i = ARRAY_SIZE(aes_authenc_algs); */ +err_aes_authenc_alg: + for (j = 0; j < i; j++) + crypto_unregister_aead(&aes_authenc_algs[j]); + crypto_unregister_alg(&aes_xts_alg); +#endif err_aes_xts_alg: crypto_unregister_aead(&aes_gcm_alg); err_aes_gcm_alg: @@ -2103,6 +2539,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) dd->caps.has_ctr32 = 0; dd->caps.has_gcm = 0; dd->caps.has_xts = 0; + dd->caps.has_authenc = 0; dd->caps.max_burst_size = 1; /* keep only major version number */ @@ -2113,6 +2550,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; dd->caps.has_xts = 1; + dd->caps.has_authenc = 1; dd->caps.max_burst_size = 4; break; case 0x200: @@ -2271,6 +2709,13 @@ static int atmel_aes_probe(struct platform_device *pdev) atmel_aes_get_cap(aes_dd); +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + if (aes_dd->caps.has_authenc && !atmel_sha_authenc_is_ready()) { + err = -EPROBE_DEFER; + goto iclk_unprepare; + } +#endif + err = atmel_aes_buff_init(aes_dd); if (err) goto err_aes_buff; @@ -2307,7 +2752,8 @@ res_err: tasklet_kill(&aes_dd->done_task); tasklet_kill(&aes_dd->queue_task); aes_dd_err: - dev_err(dev, "initialization failed.\n"); + if (err != -EPROBE_DEFER) + dev_err(dev, "initialization failed.\n"); return err; } diff --git a/drivers/crypto/atmel-authenc.h b/drivers/crypto/atmel-authenc.h new file mode 100644 index 000000000000..2a60d1224143 --- /dev/null +++ b/drivers/crypto/atmel-authenc.h @@ -0,0 +1,64 @@ +/* + * API for Atmel Secure Protocol Layers Improved Performances (SPLIP) + * + * Copyright (C) 2016 Atmel Corporation + * + * Author: Cyrille Pitchen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * This driver is based on drivers/mtd/spi-nor/fsl-quadspi.c from Freescale. + */ + +#ifndef __ATMEL_AUTHENC_H__ +#define __ATMEL_AUTHENC_H__ + +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + +#include +#include +#include +#include "atmel-sha-regs.h" + +struct atmel_aes_dev; +typedef int (*atmel_aes_authenc_fn_t)(struct atmel_aes_dev *, int, bool); + +struct atmel_sha_authenc_ctx; + +bool atmel_sha_authenc_is_ready(void); +unsigned int atmel_sha_authenc_get_reqsize(void); + +struct atmel_sha_authenc_ctx *atmel_sha_authenc_spawn(unsigned long mode); +void atmel_sha_authenc_free(struct atmel_sha_authenc_ctx *auth); +int atmel_sha_authenc_setkey(struct atmel_sha_authenc_ctx *auth, + const u8 *key, unsigned int keylen, + u32 *flags); + +int atmel_sha_authenc_schedule(struct ahash_request *req, + struct atmel_sha_authenc_ctx *auth, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *dd); +int atmel_sha_authenc_init(struct ahash_request *req, + struct scatterlist *assoc, unsigned int assoclen, + unsigned int textlen, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *dd); +int atmel_sha_authenc_final(struct ahash_request *req, + u32 *digest, unsigned int digestlen, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *dd); +void atmel_sha_authenc_abort(struct ahash_request *req); + +#endif /* CONFIG_CRYPTO_DEV_ATMEL_AUTHENC */ + +#endif /* __ATMEL_AUTHENC_H__ */ diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h index 1b9f3d33079e..1b0eba4a2706 100644 --- a/drivers/crypto/atmel-sha-regs.h +++ b/drivers/crypto/atmel-sha-regs.h @@ -29,6 +29,20 @@ #define SHA_MR_HMAC (1 << 11) #define SHA_MR_DUALBUFF (1 << 16) +#define SHA_FLAGS_ALGO_MASK SHA_MR_ALGO_MASK +#define SHA_FLAGS_SHA1 SHA_MR_ALGO_SHA1 +#define SHA_FLAGS_SHA256 SHA_MR_ALGO_SHA256 +#define SHA_FLAGS_SHA384 SHA_MR_ALGO_SHA384 +#define SHA_FLAGS_SHA512 SHA_MR_ALGO_SHA512 +#define SHA_FLAGS_SHA224 SHA_MR_ALGO_SHA224 +#define SHA_FLAGS_HMAC SHA_MR_HMAC +#define SHA_FLAGS_HMAC_SHA1 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA1) +#define SHA_FLAGS_HMAC_SHA256 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA256) +#define SHA_FLAGS_HMAC_SHA384 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA384) +#define SHA_FLAGS_HMAC_SHA512 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA512) +#define SHA_FLAGS_HMAC_SHA224 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA224) +#define SHA_FLAGS_MODE_MASK (SHA_FLAGS_HMAC | SHA_FLAGS_ALGO_MASK) + #define SHA_IER 0x10 #define SHA_IDR 0x14 #define SHA_IMR 0x18 diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 78c3c02e4483..cc5294dbead4 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -41,6 +41,7 @@ #include #include #include "atmel-sha-regs.h" +#include "atmel-authenc.h" /* SHA flags */ #define SHA_FLAGS_BUSY BIT(0) @@ -52,19 +53,6 @@ #define SHA_FLAGS_DMA_READY BIT(6) /* bits[11:8] are reserved. */ -#define SHA_FLAGS_ALGO_MASK SHA_MR_ALGO_MASK -#define SHA_FLAGS_SHA1 SHA_MR_ALGO_SHA1 -#define SHA_FLAGS_SHA256 SHA_MR_ALGO_SHA256 -#define SHA_FLAGS_SHA384 SHA_MR_ALGO_SHA384 -#define SHA_FLAGS_SHA512 SHA_MR_ALGO_SHA512 -#define SHA_FLAGS_SHA224 SHA_MR_ALGO_SHA224 -#define SHA_FLAGS_HMAC SHA_MR_HMAC -#define SHA_FLAGS_HMAC_SHA1 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA1) -#define SHA_FLAGS_HMAC_SHA256 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA256) -#define SHA_FLAGS_HMAC_SHA384 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA384) -#define SHA_FLAGS_HMAC_SHA512 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA512) -#define SHA_FLAGS_HMAC_SHA224 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA224) -#define SHA_FLAGS_MODE_MASK (SHA_FLAGS_HMAC | SHA_FLAGS_ALGO_MASK) #define SHA_FLAGS_FINUP BIT(16) #define SHA_FLAGS_SG BIT(17) @@ -156,6 +144,7 @@ struct atmel_sha_dev { struct crypto_queue queue; struct ahash_request *req; bool is_async; + bool force_complete; atmel_sha_fn_t resume; atmel_sha_fn_t cpu_transfer_complete; @@ -198,7 +187,7 @@ static inline int atmel_sha_complete(struct atmel_sha_dev *dd, int err) clk_disable(dd->iclk); - if (dd->is_async && req->base.complete) + if ((dd->is_async || dd->force_complete) && req->base.complete) req->base.complete(&req->base, err); /* handle new request */ @@ -992,6 +981,7 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, dd->req = ahash_request_cast(async_req); start_async = (dd->req != req); dd->is_async = start_async; + dd->force_complete = false; /* WARNING: ctx->start() MAY change dd->is_async. */ err = ctx->start(dd); @@ -2100,6 +2090,332 @@ static struct ahash_alg sha_hmac_algs[] = { }, }; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC +/* authenc functions */ + +static int atmel_sha_authenc_init2(struct atmel_sha_dev *dd); +static int atmel_sha_authenc_init_done(struct atmel_sha_dev *dd); +static int atmel_sha_authenc_final_done(struct atmel_sha_dev *dd); + + +struct atmel_sha_authenc_ctx { + struct crypto_ahash *tfm; +}; + +struct atmel_sha_authenc_reqctx { + struct atmel_sha_reqctx base; + + atmel_aes_authenc_fn_t cb; + struct atmel_aes_dev *aes_dev; + + /* _init() parameters. */ + struct scatterlist *assoc; + u32 assoclen; + u32 textlen; + + /* _final() parameters. */ + u32 *digest; + unsigned int digestlen; +}; + +static void atmel_sha_authenc_complete(struct crypto_async_request *areq, + int err) +{ + struct ahash_request *req = areq->data; + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + + authctx->cb(authctx->aes_dev, err, authctx->base.dd->is_async); +} + +static int atmel_sha_authenc_start(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + int err; + + /* + * Force atmel_sha_complete() to call req->base.complete(), ie + * atmel_sha_authenc_complete(), which in turn calls authctx->cb(). + */ + dd->force_complete = true; + + err = atmel_sha_hw_init(dd); + return authctx->cb(authctx->aes_dev, err, dd->is_async); +} + +bool atmel_sha_authenc_is_ready(void) +{ + struct atmel_sha_ctx dummy; + + dummy.dd = NULL; + return (atmel_sha_find_dev(&dummy) != NULL); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_is_ready); + +unsigned int atmel_sha_authenc_get_reqsize(void) +{ + return sizeof(struct atmel_sha_authenc_reqctx); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_get_reqsize); + +struct atmel_sha_authenc_ctx *atmel_sha_authenc_spawn(unsigned long mode) +{ + struct atmel_sha_authenc_ctx *auth; + struct crypto_ahash *tfm; + struct atmel_sha_ctx *tctx; + const char *name; + int err = -EINVAL; + + switch (mode & SHA_FLAGS_MODE_MASK) { + case SHA_FLAGS_HMAC_SHA1: + name = "atmel-hmac-sha1"; + break; + + case SHA_FLAGS_HMAC_SHA224: + name = "atmel-hmac-sha224"; + break; + + case SHA_FLAGS_HMAC_SHA256: + name = "atmel-hmac-sha256"; + break; + + case SHA_FLAGS_HMAC_SHA384: + name = "atmel-hmac-sha384"; + break; + + case SHA_FLAGS_HMAC_SHA512: + name = "atmel-hmac-sha512"; + break; + + default: + goto error; + } + + tfm = crypto_alloc_ahash(name, + CRYPTO_ALG_TYPE_AHASH, + CRYPTO_ALG_TYPE_AHASH_MASK); + if (IS_ERR(tfm)) { + err = PTR_ERR(tfm); + goto error; + } + tctx = crypto_ahash_ctx(tfm); + tctx->start = atmel_sha_authenc_start; + tctx->flags = mode; + + auth = kzalloc(sizeof(*auth), GFP_KERNEL); + if (!auth) { + err = -ENOMEM; + goto err_free_ahash; + } + auth->tfm = tfm; + + return auth; + +err_free_ahash: + crypto_free_ahash(tfm); +error: + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_spawn); + +void atmel_sha_authenc_free(struct atmel_sha_authenc_ctx *auth) +{ + if (auth) + crypto_free_ahash(auth->tfm); + kfree(auth); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_free); + +int atmel_sha_authenc_setkey(struct atmel_sha_authenc_ctx *auth, + const u8 *key, unsigned int keylen, + u32 *flags) +{ + struct crypto_ahash *tfm = auth->tfm; + int err; + + crypto_ahash_clear_flags(tfm, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(tfm, *flags & CRYPTO_TFM_REQ_MASK); + err = crypto_ahash_setkey(tfm, key, keylen); + *flags = crypto_ahash_get_flags(tfm); + + return err; +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_setkey); + +int atmel_sha_authenc_schedule(struct ahash_request *req, + struct atmel_sha_authenc_ctx *auth, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *aes_dev) +{ + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + struct atmel_sha_reqctx *ctx = &authctx->base; + struct crypto_ahash *tfm = auth->tfm; + struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct atmel_sha_dev *dd; + + /* Reset request context (MUST be done first). */ + memset(authctx, 0, sizeof(*authctx)); + + /* Get SHA device. */ + dd = atmel_sha_find_dev(tctx); + if (!dd) + return cb(aes_dev, -ENODEV, false); + + /* Init request context. */ + ctx->dd = dd; + ctx->buflen = SHA_BUFFER_LEN; + authctx->cb = cb; + authctx->aes_dev = aes_dev; + ahash_request_set_tfm(req, tfm); + ahash_request_set_callback(req, 0, atmel_sha_authenc_complete, req); + + return atmel_sha_handle_queue(dd, req); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_schedule); + +int atmel_sha_authenc_init(struct ahash_request *req, + struct scatterlist *assoc, unsigned int assoclen, + unsigned int textlen, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *aes_dev) +{ + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + struct atmel_sha_reqctx *ctx = &authctx->base; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + struct atmel_sha_dev *dd = ctx->dd; + + if (unlikely(!IS_ALIGNED(assoclen, sizeof(u32)))) + return atmel_sha_complete(dd, -EINVAL); + + authctx->cb = cb; + authctx->aes_dev = aes_dev; + authctx->assoc = assoc; + authctx->assoclen = assoclen; + authctx->textlen = textlen; + + ctx->flags = hmac->base.flags; + return atmel_sha_hmac_setup(dd, atmel_sha_authenc_init2); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_init); + +static int atmel_sha_authenc_init2(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + struct atmel_sha_reqctx *ctx = &authctx->base; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + size_t hs = ctx->hash_size; + size_t i, num_words = hs / sizeof(u32); + u32 mr, msg_size; + + atmel_sha_write(dd, SHA_CR, SHA_CR_WUIHV); + for (i = 0; i < num_words; ++i) + atmel_sha_write(dd, SHA_REG_DIN(i), hmac->ipad[i]); + + atmel_sha_write(dd, SHA_CR, SHA_CR_WUIEHV); + for (i = 0; i < num_words; ++i) + atmel_sha_write(dd, SHA_REG_DIN(i), hmac->opad[i]); + + mr = (SHA_MR_MODE_IDATAR0 | + SHA_MR_HMAC | + SHA_MR_DUALBUFF); + mr |= ctx->flags & SHA_FLAGS_ALGO_MASK; + atmel_sha_write(dd, SHA_MR, mr); + + msg_size = authctx->assoclen + authctx->textlen; + atmel_sha_write(dd, SHA_MSR, msg_size); + atmel_sha_write(dd, SHA_BCR, msg_size); + + atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST); + + /* Process assoc data. */ + return atmel_sha_cpu_start(dd, authctx->assoc, authctx->assoclen, + true, false, + atmel_sha_authenc_init_done); +} + +static int atmel_sha_authenc_init_done(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + + return authctx->cb(authctx->aes_dev, 0, dd->is_async); +} + +int atmel_sha_authenc_final(struct ahash_request *req, + u32 *digest, unsigned int digestlen, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *aes_dev) +{ + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + struct atmel_sha_reqctx *ctx = &authctx->base; + struct atmel_sha_dev *dd = ctx->dd; + + switch (ctx->flags & SHA_FLAGS_ALGO_MASK) { + case SHA_FLAGS_SHA1: + authctx->digestlen = SHA1_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA224: + authctx->digestlen = SHA224_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA256: + authctx->digestlen = SHA256_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA384: + authctx->digestlen = SHA384_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA512: + authctx->digestlen = SHA512_DIGEST_SIZE; + break; + + default: + return atmel_sha_complete(dd, -EINVAL); + } + if (authctx->digestlen > digestlen) + authctx->digestlen = digestlen; + + authctx->cb = cb; + authctx->aes_dev = aes_dev; + authctx->digest = digest; + return atmel_sha_wait_for_data_ready(dd, + atmel_sha_authenc_final_done); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_final); + +static int atmel_sha_authenc_final_done(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + size_t i, num_words = authctx->digestlen / sizeof(u32); + + for (i = 0; i < num_words; ++i) + authctx->digest[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i)); + + return atmel_sha_complete(dd, 0); +} + +void atmel_sha_authenc_abort(struct ahash_request *req) +{ + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + struct atmel_sha_reqctx *ctx = &authctx->base; + struct atmel_sha_dev *dd = ctx->dd; + + /* Prevent atmel_sha_complete() from calling req->base.complete(). */ + dd->is_async = false; + dd->force_complete = false; + (void)atmel_sha_complete(dd, 0); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_abort); + +#endif /* CONFIG_CRYPTO_DEV_ATMEL_AUTHENC */ + + static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd) { int i; From 0569fc46f09b46dc313f27c45283a7b62d96390a Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 26 Jan 2017 17:07:57 +0100 Subject: [PATCH 078/142] crypto: atmel-sha - add verbose debug facilities to print hw register names When VERBOSE_DEBUG is defined and SHA_FLAGS_DUMP_REG flag is set in dd->flags, this patch prints the register names and values when performing IO accesses. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha.c | 110 ++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index cc5294dbead4..22d0c0c118da 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -51,6 +51,7 @@ #define SHA_FLAGS_INIT BIT(4) #define SHA_FLAGS_CPU BIT(5) #define SHA_FLAGS_DMA_READY BIT(6) +#define SHA_FLAGS_DUMP_REG BIT(7) /* bits[11:8] are reserved. */ @@ -167,14 +168,118 @@ static struct atmel_sha_drv atmel_sha = { .lock = __SPIN_LOCK_UNLOCKED(atmel_sha.lock), }; +#ifdef VERBOSE_DEBUG +static const char *atmel_sha_reg_name(u32 offset, char *tmp, size_t sz, bool wr) +{ + switch (offset) { + case SHA_CR: + return "CR"; + + case SHA_MR: + return "MR"; + + case SHA_IER: + return "IER"; + + case SHA_IDR: + return "IDR"; + + case SHA_IMR: + return "IMR"; + + case SHA_ISR: + return "ISR"; + + case SHA_MSR: + return "MSR"; + + case SHA_BCR: + return "BCR"; + + case SHA_REG_DIN(0): + case SHA_REG_DIN(1): + case SHA_REG_DIN(2): + case SHA_REG_DIN(3): + case SHA_REG_DIN(4): + case SHA_REG_DIN(5): + case SHA_REG_DIN(6): + case SHA_REG_DIN(7): + case SHA_REG_DIN(8): + case SHA_REG_DIN(9): + case SHA_REG_DIN(10): + case SHA_REG_DIN(11): + case SHA_REG_DIN(12): + case SHA_REG_DIN(13): + case SHA_REG_DIN(14): + case SHA_REG_DIN(15): + snprintf(tmp, sz, "IDATAR[%u]", (offset - SHA_REG_DIN(0)) >> 2); + break; + + case SHA_REG_DIGEST(0): + case SHA_REG_DIGEST(1): + case SHA_REG_DIGEST(2): + case SHA_REG_DIGEST(3): + case SHA_REG_DIGEST(4): + case SHA_REG_DIGEST(5): + case SHA_REG_DIGEST(6): + case SHA_REG_DIGEST(7): + case SHA_REG_DIGEST(8): + case SHA_REG_DIGEST(9): + case SHA_REG_DIGEST(10): + case SHA_REG_DIGEST(11): + case SHA_REG_DIGEST(12): + case SHA_REG_DIGEST(13): + case SHA_REG_DIGEST(14): + case SHA_REG_DIGEST(15): + if (wr) + snprintf(tmp, sz, "IDATAR[%u]", + 16u + ((offset - SHA_REG_DIGEST(0)) >> 2)); + else + snprintf(tmp, sz, "ODATAR[%u]", + (offset - SHA_REG_DIGEST(0)) >> 2); + break; + + case SHA_HW_VERSION: + return "HWVER"; + + default: + snprintf(tmp, sz, "0x%02x", offset); + break; + } + + return tmp; +} + +#endif /* VERBOSE_DEBUG */ + static inline u32 atmel_sha_read(struct atmel_sha_dev *dd, u32 offset) { - return readl_relaxed(dd->io_base + offset); + u32 value = readl_relaxed(dd->io_base + offset); + +#ifdef VERBOSE_DEBUG + if (dd->flags & SHA_FLAGS_DUMP_REG) { + char tmp[16]; + + dev_vdbg(dd->dev, "read 0x%08x from %s\n", value, + atmel_sha_reg_name(offset, tmp, sizeof(tmp), false)); + } +#endif /* VERBOSE_DEBUG */ + + return value; } static inline void atmel_sha_write(struct atmel_sha_dev *dd, u32 offset, u32 value) { +#ifdef VERBOSE_DEBUG + if (dd->flags & SHA_FLAGS_DUMP_REG) { + char tmp[16]; + + dev_vdbg(dd->dev, "write 0x%08x into %s\n", value, + atmel_sha_reg_name(offset, tmp, sizeof(tmp), true)); + } +#endif /* VERBOSE_DEBUG */ + writel_relaxed(value, dd->io_base + offset); } @@ -183,7 +288,8 @@ static inline int atmel_sha_complete(struct atmel_sha_dev *dd, int err) struct ahash_request *req = dd->req; dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU | - SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY); + SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY | + SHA_FLAGS_DUMP_REG); clk_disable(dd->iclk); From 8a13449fceb03f5423664f373254c5e19b853d46 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Fri, 27 Jan 2017 16:09:05 +0530 Subject: [PATCH 079/142] crypto: chcr - Change flow IDs Change assign flowc id to each outgoing request.Firmware use flowc id to schedule each request onto HW. FW reply may miss without this change. Reviewed-by: Hariprasad Shenai Signed-off-by: Atul Gupta Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 18 ++++++++++-------- drivers/crypto/chelsio/chcr_algo.h | 9 +++++---- drivers/crypto/chelsio/chcr_core.h | 1 + drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 8 ++++++++ 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index b4b78b37f8a6..6c2dea3e3193 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -542,10 +542,11 @@ static inline void create_wreq(struct chcr_context *ctx, (calc_tx_flits_ofld(skb) * 8), 16))); chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req); chcr_req->wreq.rx_chid_to_rx_q_id = - FILL_WR_RX_Q_ID(ctx->dev->tx_channel_id, qid, - is_iv ? iv_loc : IV_NOP); + FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid, + is_iv ? iv_loc : IV_NOP, ctx->tx_channel_id); - chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id); + chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id, + qid); chcr_req->ulptx.len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8), 16) - ((sizeof(chcr_req->wreq)) >> 4))); @@ -606,7 +607,7 @@ static struct sk_buff chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len); memset(chcr_req, 0, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1); + FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(ivsize + req->nbytes); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -782,6 +783,7 @@ static int chcr_device_init(struct chcr_context *ctx) spin_lock(&ctx->dev->lock_chcr_dev); ctx->tx_channel_id = rxq_idx; ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id; + ctx->dev->rx_channel_id = 0; spin_unlock(&ctx->dev->lock_chcr_dev); } out: @@ -874,7 +876,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, memset(chcr_req, 0, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0); + FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 0); chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -1425,7 +1427,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, * to the hardware spec */ chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, + FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, (ivsize ? (assoclen + 1) : 0)); chcr_req->sec_cpl.pldlen = htonl(assoclen + ivsize + req->cryptlen); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( @@ -1601,7 +1603,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, unsigned int ivsize = AES_BLOCK_SIZE; unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM; unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC; - unsigned int c_id = chcrctx->dev->tx_channel_id; + unsigned int c_id = chcrctx->dev->rx_channel_id; unsigned int ccm_xtra; unsigned char tag_offset = 0, auth_offset = 0; unsigned char hmac_ctrl = get_hmac(crypto_aead_authsize(tfm)); @@ -1877,7 +1879,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, tag_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize; chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR( - ctx->dev->tx_channel_id, 2, (ivsize ? + ctx->dev->rx_channel_id, 2, (ivsize ? (req->assoclen + 1) : 0)); chcr_req->sec_cpl.pldlen = htonl(req->assoclen + ivsize + crypt_len); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h index 3c7c51f7bedf..ba38bae7ce80 100644 --- a/drivers/crypto/chelsio/chcr_algo.h +++ b/drivers/crypto/chelsio/chcr_algo.h @@ -185,20 +185,21 @@ FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(1) | \ FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V((ctx_len))) -#define FILL_WR_RX_Q_ID(cid, qid, wr_iv) \ +#define FILL_WR_RX_Q_ID(cid, qid, wr_iv, fid) \ htonl( \ FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V((cid)) | \ FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V((qid)) | \ FW_CRYPTO_LOOKASIDE_WR_LCB_V(0) | \ - FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv))) + FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv)) | \ + FW_CRYPTO_LOOKASIDE_WR_FQIDX_V(fid)) -#define FILL_ULPTX_CMD_DEST(cid) \ +#define FILL_ULPTX_CMD_DEST(cid, qid) \ htonl(ULPTX_CMD_V(ULP_TX_PKT) | \ ULP_TXPKT_DEST_V(0) | \ ULP_TXPKT_DATAMODIFY_V(0) | \ ULP_TXPKT_CHANNELID_V((cid)) | \ ULP_TXPKT_RO_V(1) | \ - ULP_TXPKT_FID_V(0)) + ULP_TXPKT_FID_V(qid)) #define KEYCTX_ALIGN_PAD(bs) ({unsigned int _bs = (bs);\ _bs == SHA1_DIGEST_SIZE ? 12 : 0; }) diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h index c7088a4e0a49..79da22b5cdc9 100644 --- a/drivers/crypto/chelsio/chcr_core.h +++ b/drivers/crypto/chelsio/chcr_core.h @@ -75,6 +75,7 @@ struct chcr_dev { spinlock_t lock_chcr_dev; struct uld_ctx *u_ctx; unsigned char tx_channel_id; + unsigned char rx_channel_id; }; struct uld_ctx { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 8d9e4b7a8e84..ccc05f874419 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -3385,6 +3385,14 @@ struct fw_crypto_lookaside_wr { #define FW_CRYPTO_LOOKASIDE_WR_IV_G(x) \ (((x) >> FW_CRYPTO_LOOKASIDE_WR_IV_S) & FW_CRYPTO_LOOKASIDE_WR_IV_M) +#define FW_CRYPTO_LOOKASIDE_WR_FQIDX_S 15 +#define FW_CRYPTO_LOOKASIDE_WR_FQIDX_M 0xff +#define FW_CRYPTO_LOOKASIDE_WR_FQIDX_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_FQIDX_S) +#define FW_CRYPTO_LOOKASIDE_WR_FQIDX_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_FQIDX_S) & \ + FW_CRYPTO_LOOKASIDE_WR_FQIDX_M) + #define FW_CRYPTO_LOOKASIDE_WR_TX_CH_S 10 #define FW_CRYPTO_LOOKASIDE_WR_TX_CH_M 0x3 #define FW_CRYPTO_LOOKASIDE_WR_TX_CH_V(x) \ From ee3bd84f55d670961f36df332be299f3386b7690 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Fri, 27 Jan 2017 16:09:07 +0530 Subject: [PATCH 080/142] crypto: chcr - fix itnull.cocci warnings The first argument to list_for_each_entry cannot be NULL. Generated by: scripts/coccinelle/iterators/itnull.cocci Signed-off-by: Julia Lawall Signed-off-by: Fengguang Wu Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 1c65f07e1cc9..2bfd61ae5ad5 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -61,7 +61,7 @@ int assign_chcr_device(struct chcr_dev **dev) */ mutex_lock(&dev_mutex); /* TODO ? */ list_for_each_entry(u_ctx, &uld_ctx_list, entry) - if (u_ctx && u_ctx->dev) { + if (u_ctx->dev) { *dev = u_ctx->dev; ret = 0; break; From 8356ea515ba1396d6a24dd1e80f101ee9a20ff3c Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Fri, 27 Jan 2017 16:09:08 +0530 Subject: [PATCH 081/142] crypto: chcr - Use cipher instead of Block Cipher in gcm setkey 1 Block of encrption can be done with aes-generic. no need of cbc(aes). This patch replaces cbc(aes-generic) with aes-generic. Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 6c2dea3e3193..d3359439bcd3 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -2189,8 +2189,7 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key, struct chcr_context *ctx = crypto_aead_ctx(aead); struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_gcm_ctx *gctx = GCM_CTX(aeadctx); - struct blkcipher_desc h_desc; - struct scatterlist src[1]; + struct crypto_cipher *cipher; unsigned int ck_size; int ret = 0, key_ctx_size = 0; @@ -2223,27 +2222,26 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key, CHCR_KEYCTX_MAC_KEY_SIZE_128, 0, 0, key_ctx_size >> 4); - /* Calculate the H = CIPH(K, 0 repeated 16 times) using sync aes - * blkcipher It will go on key context + /* Calculate the H = CIPH(K, 0 repeated 16 times). + * It will go in key context */ - h_desc.tfm = crypto_alloc_blkcipher("cbc(aes-generic)", 0, 0); - if (IS_ERR(h_desc.tfm)) { + cipher = crypto_alloc_cipher("aes-generic", 0, 0); + if (IS_ERR(cipher)) { aeadctx->enckey_len = 0; ret = -ENOMEM; goto out; } - h_desc.flags = 0; - ret = crypto_blkcipher_setkey(h_desc.tfm, key, keylen); + + ret = crypto_cipher_setkey(cipher, key, keylen); if (ret) { aeadctx->enckey_len = 0; goto out1; } memset(gctx->ghash_h, 0, AEAD_H_SIZE); - sg_init_one(&src[0], gctx->ghash_h, AEAD_H_SIZE); - ret = crypto_blkcipher_encrypt(&h_desc, &src[0], &src[0], AEAD_H_SIZE); + crypto_cipher_encrypt_one(cipher, gctx->ghash_h, gctx->ghash_h); out1: - crypto_free_blkcipher(h_desc.tfm); + crypto_free_cipher(cipher); out: return ret; } From 44e9f79916160f9a69542712e8529ee8da15c410 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Fri, 27 Jan 2017 16:09:09 +0530 Subject: [PATCH 082/142] crypto: chcr - Change cra_flags for cipher algos Change cipher algos flags to CRYPTO_ALG_TYPE_ABLKCIPHER. Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index d3359439bcd3..21fc04c6f272 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -171,7 +171,7 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, } break; - case CRYPTO_ALG_TYPE_BLKCIPHER: + case CRYPTO_ALG_TYPE_ABLKCIPHER: ctx_req.req.ablk_req = (struct ablkcipher_request *)req; ctx_req.ctx.ablk_ctx = ablkcipher_request_ctx(ctx_req.req.ablk_req); @@ -2492,7 +2492,7 @@ static struct chcr_alg_template driver_algs[] = { .cra_name = "cbc(aes)", .cra_driver_name = "cbc-aes-chcr", .cra_priority = CHCR_CRA_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct chcr_context) @@ -2519,7 +2519,7 @@ static struct chcr_alg_template driver_algs[] = { .cra_name = "xts(aes)", .cra_driver_name = "xts-aes-chcr", .cra_priority = CHCR_CRA_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct chcr_context) + From 8f06601501995eee85e2449430af5d5f0b0480a4 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Fri, 27 Jan 2017 16:09:10 +0530 Subject: [PATCH 083/142] crypto: chcr - Change algo priority Update priorities to 3000 Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_crypto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index 7ec0a8f12475..81cfd0ba132e 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -48,7 +48,7 @@ * giving the processed data */ -#define CHCR_CRA_PRIORITY 300 +#define CHCR_CRA_PRIORITY 3000 #define CHCR_AES_MAX_KEY_LEN (2 * (AES_MAX_KEY_SIZE)) /* consider xts */ #define CHCR_MAX_CRYPTO_IV_LEN 16 /* AES IV len */ From d2826056cb5e4b4ddc90227b509473f71e15f011 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Fri, 27 Jan 2017 16:09:11 +0530 Subject: [PATCH 084/142] crypto: chcr - Fix wrong typecasting Typecast the pointer with correct structure. Signed-off-by: Atul Gupta Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 2bfd61ae5ad5..c28e018e0773 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -151,18 +151,17 @@ int chcr_uld_rx_handler(void *handle, const __be64 *rsp, { struct uld_ctx *u_ctx = (struct uld_ctx *)handle; struct chcr_dev *dev = u_ctx->dev; - const struct cpl_act_establish *rpl = (struct cpl_act_establish - *)rsp; + const struct cpl_fw6_pld *rpl = (struct cpl_fw6_pld *)rsp; - if (rpl->ot.opcode != CPL_FW6_PLD) { + if (rpl->opcode != CPL_FW6_PLD) { pr_err("Unsupported opcode\n"); return 0; } if (!pgl) - work_handlers[rpl->ot.opcode](dev, (unsigned char *)&rsp[1]); + work_handlers[rpl->opcode](dev, (unsigned char *)&rsp[1]); else - work_handlers[rpl->ot.opcode](dev, pgl->va); + work_handlers[rpl->opcode](dev, pgl->va); return 0; } From 5ba042c094f9438d9441ab5f8ba75f0d121df0a1 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Fri, 27 Jan 2017 16:09:12 +0530 Subject: [PATCH 085/142] crypto: chcr - Fix Smatch Complaint Initialise variable after null check. Reported-by: Dan Carpenter Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) mode change 100644 => 100755 drivers/crypto/chelsio/chcr_algo.c diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c old mode 100644 new mode 100755 index 21fc04c6f272..41bc7f4f58cd --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -2456,13 +2456,14 @@ static int chcr_aead_op(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_context *ctx = crypto_aead_ctx(tfm); - struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct uld_ctx *u_ctx; struct sk_buff *skb; - if (ctx && !ctx->dev) { + if (!ctx->dev) { pr_err("chcr : %s : No crypto device.\n", __func__); return -ENXIO; } + u_ctx = ULD_CTX(ctx); if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], ctx->tx_channel_id)) { if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) From 1465fb13d3599e465b3b202f8ebbb5e1ca9b1970 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 28 Jan 2017 23:25:31 +0000 Subject: [PATCH 086/142] crypto: arm/aes-ce - remove cra_alignmask Remove the unnecessary alignmask: it is much more efficient to deal with the misalignment in the core algorithm than relying on the crypto API to copy the data to a suitably aligned buffer. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-core.S | 84 +++++++++++++++++------------------ arch/arm/crypto/aes-ce-glue.c | 15 +++---- 2 files changed, 47 insertions(+), 52 deletions(-) diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 987aa632c9f0..ba8e6a32fdc9 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -169,19 +169,19 @@ ENTRY(ce_aes_ecb_encrypt) .Lecbencloop3x: subs r4, r4, #3 bmi .Lecbenc1x - vld1.8 {q0-q1}, [r1, :64]! - vld1.8 {q2}, [r1, :64]! + vld1.8 {q0-q1}, [r1]! + vld1.8 {q2}, [r1]! bl aes_encrypt_3x - vst1.8 {q0-q1}, [r0, :64]! - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! + vst1.8 {q2}, [r0]! b .Lecbencloop3x .Lecbenc1x: adds r4, r4, #3 beq .Lecbencout .Lecbencloop: - vld1.8 {q0}, [r1, :64]! + vld1.8 {q0}, [r1]! bl aes_encrypt - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 bne .Lecbencloop .Lecbencout: @@ -195,19 +195,19 @@ ENTRY(ce_aes_ecb_decrypt) .Lecbdecloop3x: subs r4, r4, #3 bmi .Lecbdec1x - vld1.8 {q0-q1}, [r1, :64]! - vld1.8 {q2}, [r1, :64]! + vld1.8 {q0-q1}, [r1]! + vld1.8 {q2}, [r1]! bl aes_decrypt_3x - vst1.8 {q0-q1}, [r0, :64]! - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! + vst1.8 {q2}, [r0]! b .Lecbdecloop3x .Lecbdec1x: adds r4, r4, #3 beq .Lecbdecout .Lecbdecloop: - vld1.8 {q0}, [r1, :64]! + vld1.8 {q0}, [r1]! bl aes_decrypt - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 bne .Lecbdecloop .Lecbdecout: @@ -226,10 +226,10 @@ ENTRY(ce_aes_cbc_encrypt) vld1.8 {q0}, [r5] prepare_key r2, r3 .Lcbcencloop: - vld1.8 {q1}, [r1, :64]! @ get next pt block + vld1.8 {q1}, [r1]! @ get next pt block veor q0, q0, q1 @ ..and xor with iv bl aes_encrypt - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 bne .Lcbcencloop vst1.8 {q0}, [r5] @@ -244,8 +244,8 @@ ENTRY(ce_aes_cbc_decrypt) .Lcbcdecloop3x: subs r4, r4, #3 bmi .Lcbcdec1x - vld1.8 {q0-q1}, [r1, :64]! - vld1.8 {q2}, [r1, :64]! + vld1.8 {q0-q1}, [r1]! + vld1.8 {q2}, [r1]! vmov q3, q0 vmov q4, q1 vmov q5, q2 @@ -254,19 +254,19 @@ ENTRY(ce_aes_cbc_decrypt) veor q1, q1, q3 veor q2, q2, q4 vmov q6, q5 - vst1.8 {q0-q1}, [r0, :64]! - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! + vst1.8 {q2}, [r0]! b .Lcbcdecloop3x .Lcbcdec1x: adds r4, r4, #3 beq .Lcbcdecout vmov q15, q14 @ preserve last round key .Lcbcdecloop: - vld1.8 {q0}, [r1, :64]! @ get next ct block + vld1.8 {q0}, [r1]! @ get next ct block veor q14, q15, q6 @ combine prev ct with last key vmov q6, q0 bl aes_decrypt - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 bne .Lcbcdecloop .Lcbcdecout: @@ -300,15 +300,15 @@ ENTRY(ce_aes_ctr_encrypt) rev ip, r6 add r6, r6, #1 vmov s11, ip - vld1.8 {q3-q4}, [r1, :64]! - vld1.8 {q5}, [r1, :64]! + vld1.8 {q3-q4}, [r1]! + vld1.8 {q5}, [r1]! bl aes_encrypt_3x veor q0, q0, q3 veor q1, q1, q4 veor q2, q2, q5 rev ip, r6 - vst1.8 {q0-q1}, [r0, :64]! - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! + vst1.8 {q2}, [r0]! vmov s27, ip b .Lctrloop3x .Lctr1x: @@ -318,10 +318,10 @@ ENTRY(ce_aes_ctr_encrypt) vmov q0, q6 bl aes_encrypt subs r4, r4, #1 - bmi .Lctrhalfblock @ blocks < 0 means 1/2 block - vld1.8 {q3}, [r1, :64]! + bmi .Lctrtailblock @ blocks < 0 means tail block + vld1.8 {q3}, [r1]! veor q3, q0, q3 - vst1.8 {q3}, [r0, :64]! + vst1.8 {q3}, [r0]! adds r6, r6, #1 @ increment BE ctr rev ip, r6 @@ -333,10 +333,8 @@ ENTRY(ce_aes_ctr_encrypt) vst1.8 {q6}, [r5] pop {r4-r6, pc} -.Lctrhalfblock: - vld1.8 {d1}, [r1, :64] - veor d0, d0, d1 - vst1.8 {d0}, [r0, :64] +.Lctrtailblock: + vst1.8 {q0}, [r0, :64] @ return just the key stream pop {r4-r6, pc} .Lctrcarry: @@ -405,8 +403,8 @@ ENTRY(ce_aes_xts_encrypt) .Lxtsenc3x: subs r4, r4, #3 bmi .Lxtsenc1x - vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks - vld1.8 {q2}, [r1, :64]! + vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks + vld1.8 {q2}, [r1]! next_tweak q4, q3, q7, q6 veor q0, q0, q3 next_tweak q5, q4, q7, q6 @@ -416,8 +414,8 @@ ENTRY(ce_aes_xts_encrypt) veor q0, q0, q3 veor q1, q1, q4 veor q2, q2, q5 - vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks + vst1.8 {q2}, [r0]! vmov q3, q5 teq r4, #0 beq .Lxtsencout @@ -426,11 +424,11 @@ ENTRY(ce_aes_xts_encrypt) adds r4, r4, #3 beq .Lxtsencout .Lxtsencloop: - vld1.8 {q0}, [r1, :64]! + vld1.8 {q0}, [r1]! veor q0, q0, q3 bl aes_encrypt veor q0, q0, q3 - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 beq .Lxtsencout next_tweak q3, q3, q7, q6 @@ -456,8 +454,8 @@ ENTRY(ce_aes_xts_decrypt) .Lxtsdec3x: subs r4, r4, #3 bmi .Lxtsdec1x - vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks - vld1.8 {q2}, [r1, :64]! + vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks + vld1.8 {q2}, [r1]! next_tweak q4, q3, q7, q6 veor q0, q0, q3 next_tweak q5, q4, q7, q6 @@ -467,8 +465,8 @@ ENTRY(ce_aes_xts_decrypt) veor q0, q0, q3 veor q1, q1, q4 veor q2, q2, q5 - vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks + vst1.8 {q2}, [r0]! vmov q3, q5 teq r4, #0 beq .Lxtsdecout @@ -477,12 +475,12 @@ ENTRY(ce_aes_xts_decrypt) adds r4, r4, #3 beq .Lxtsdecout .Lxtsdecloop: - vld1.8 {q0}, [r1, :64]! + vld1.8 {q0}, [r1]! veor q0, q0, q3 add ip, r2, #32 @ 3rd round key bl aes_decrypt veor q0, q0, q3 - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 beq .Lxtsdecout next_tweak q3, q3, q7, q6 diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 8857531915bf..883b84d828c5 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -278,14 +278,15 @@ static int ctr_encrypt(struct skcipher_request *req) u8 *tsrc = walk.src.virt.addr; /* - * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need - * to tell aes_ctr_encrypt() to only read half a block. + * Tell aes_ctr_encrypt() to process a tail block. */ - blocks = (nbytes <= 8) ? -1 : 1; + blocks = -1; - ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, + ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, num_rounds(ctx), blocks, walk.iv); - memcpy(tdst, tail, nbytes); + if (tdst != tsrc) + memcpy(tdst, tsrc, nbytes); + crypto_xor(tdst, tail, nbytes); err = skcipher_walk_done(&walk, 0); } kernel_neon_end(); @@ -345,7 +346,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -361,7 +361,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -378,7 +377,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -396,7 +394,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = 2 * AES_MIN_KEY_SIZE, From 4a70b52620357a680ac71d5b63ceb533eb6216cb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 28 Jan 2017 23:25:32 +0000 Subject: [PATCH 087/142] crypto: arm/chacha20 - remove cra_alignmask Remove the unnecessary alignmask: it is much more efficient to deal with the misalignment in the core algorithm than relying on the crypto API to copy the data to a suitably aligned buffer. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/chacha20-neon-glue.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c index 592f75ae4fa1..59a7be08e80c 100644 --- a/arch/arm/crypto/chacha20-neon-glue.c +++ b/arch/arm/crypto/chacha20-neon-glue.c @@ -94,7 +94,6 @@ static struct skcipher_alg alg = { .base.cra_priority = 300, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_alignmask = 1, .base.cra_module = THIS_MODULE, .min_keysize = CHACHA20_KEY_SIZE, From 8f4102dbd9b6a050491a966a74f030e65e29d33d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 28 Jan 2017 23:25:33 +0000 Subject: [PATCH 088/142] crypto: arm64/aes-ce-ccm - remove cra_alignmask Remove the unnecessary alignmask: it is much more efficient to deal with the misalignment in the core algorithm than relying on the crypto API to copy the data to a suitably aligned buffer. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-ce-ccm-glue.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index cc5515dac74a..6a7dbc7c83a6 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -258,7 +258,6 @@ static struct aead_alg ccm_aes_alg = { .cra_priority = 300, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .ivsize = AES_BLOCK_SIZE, From ccc5d51ef968d0d7634d36afbaf0286126e12f09 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 28 Jan 2017 23:25:34 +0000 Subject: [PATCH 089/142] crypto: arm64/aes-blk - remove cra_alignmask Remove the unnecessary alignmask: it is much more efficient to deal with the misalignment in the core algorithm than relying on the crypto API to copy the data to a suitably aligned buffer. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 16 ++++++---------- arch/arm64/crypto/aes-modes.S | 8 +++----- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 5164aaf82c6a..8ee1fb7aaa4f 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -215,14 +215,15 @@ static int ctr_encrypt(struct skcipher_request *req) u8 *tsrc = walk.src.virt.addr; /* - * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need - * to tell aes_ctr_encrypt() to only read half a block. + * Tell aes_ctr_encrypt() to process a tail block. */ - blocks = (nbytes <= 8) ? -1 : 1; + blocks = -1; - aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds, + aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); - memcpy(tdst, tail, nbytes); + if (tdst != tsrc) + memcpy(tdst, tsrc, nbytes); + crypto_xor(tdst, tail, nbytes); err = skcipher_walk_done(&walk, 0); } kernel_neon_end(); @@ -282,7 +283,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -298,7 +298,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -315,7 +314,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -332,7 +330,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_priority = PRIO - 1, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -350,7 +347,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = 2 * AES_MIN_KEY_SIZE, diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 838dad5c209f..92b982a8b112 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -337,7 +337,7 @@ AES_ENTRY(aes_ctr_encrypt) .Lctrcarrydone: subs w4, w4, #1 - bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ + bmi .Lctrtailblock /* blocks <0 means tail block */ ld1 {v3.16b}, [x1], #16 eor v3.16b, v0.16b, v3.16b st1 {v3.16b}, [x0], #16 @@ -348,10 +348,8 @@ AES_ENTRY(aes_ctr_encrypt) FRAME_POP ret -.Lctrhalfblock: - ld1 {v3.8b}, [x1] - eor v3.8b, v0.8b, v3.8b - st1 {v3.8b}, [x0] +.Lctrtailblock: + st1 {v0.16b}, [x0] FRAME_POP ret From 4d1108fd747f88a6555e644073ee5629bad610b9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 28 Jan 2017 23:25:35 +0000 Subject: [PATCH 090/142] crypto: arm64/chacha20 - remove cra_alignmask Remove the unnecessary alignmask: it is much more efficient to deal with the misalignment in the core algorithm than relying on the crypto API to copy the data to a suitably aligned buffer. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/chacha20-neon-glue.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c index a7f2337d46cf..a7cd575ea223 100644 --- a/arch/arm64/crypto/chacha20-neon-glue.c +++ b/arch/arm64/crypto/chacha20-neon-glue.c @@ -93,7 +93,6 @@ static struct skcipher_alg alg = { .base.cra_priority = 300, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_alignmask = 1, .base.cra_module = THIS_MODULE, .min_keysize = CHACHA20_KEY_SIZE, From 262ea4f670b792d0985090b1187b1f1ce2c2c648 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 28 Jan 2017 23:25:36 +0000 Subject: [PATCH 091/142] crypto: arm64/aes - avoid literals for cross-module symbol references Using simple adrp/add pairs to refer to the AES lookup tables exposed by the generic AES driver (which could be loaded far away from this driver when KASLR is in effect) was unreliable at module load time before commit 41c066f2c4d4 ("arm64: assembler: make adr_l work in modules under KASLR"), which is why the AES code used literals instead. So now we can get rid of the literals, and switch to the adr_l macro. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-cipher-core.S | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S index 37590ab8121a..cd58c61e6677 100644 --- a/arch/arm64/crypto/aes-cipher-core.S +++ b/arch/arm64/crypto/aes-cipher-core.S @@ -89,8 +89,8 @@ CPU_BE( rev w8, w8 ) eor w7, w7, w11 eor w8, w8, w12 - ldr tt, =\ttab - ldr lt, =\ltab + adr_l tt, \ttab + adr_l lt, \ltab tbnz rounds, #1, 1f @@ -111,9 +111,6 @@ CPU_BE( rev w8, w8 ) stp w5, w6, [out] stp w7, w8, [out, #8] ret - - .align 4 - .ltorg .endm .align 5 From c458c4ada0e3e3c898a56d0640d2ef70c9f702e3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 28 Jan 2017 23:25:37 +0000 Subject: [PATCH 092/142] crypto: arm64/aes - performance tweak Shuffle some instructions around in the __hround macro to shave off 0.1 cycles per byte on Cortex-A57. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-cipher-core.S | 56 +++++++++++------------------ 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S index cd58c61e6677..f2f9cc519309 100644 --- a/arch/arm64/crypto/aes-cipher-core.S +++ b/arch/arm64/crypto/aes-cipher-core.S @@ -20,46 +20,32 @@ tt .req x4 lt .req x2 + .macro __pair, enc, reg0, reg1, in0, in1e, in1d, shift + ubfx \reg0, \in0, #\shift, #8 + .if \enc + ubfx \reg1, \in1e, #\shift, #8 + .else + ubfx \reg1, \in1d, #\shift, #8 + .endif + ldr \reg0, [tt, \reg0, uxtw #2] + ldr \reg1, [tt, \reg1, uxtw #2] + .endm + .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc ldp \out0, \out1, [rk], #8 - ubfx w13, \in0, #0, #8 - ubfx w14, \in1, #8, #8 - ldr w13, [tt, w13, uxtw #2] - ldr w14, [tt, w14, uxtw #2] - - .if \enc - ubfx w17, \in1, #0, #8 - ubfx w18, \in2, #8, #8 - .else - ubfx w17, \in3, #0, #8 - ubfx w18, \in0, #8, #8 - .endif - ldr w17, [tt, w17, uxtw #2] - ldr w18, [tt, w18, uxtw #2] - - ubfx w15, \in2, #16, #8 - ubfx w16, \in3, #24, #8 - ldr w15, [tt, w15, uxtw #2] - ldr w16, [tt, w16, uxtw #2] - - .if \enc - ubfx \t0, \in3, #16, #8 - ubfx \t1, \in0, #24, #8 - .else - ubfx \t0, \in1, #16, #8 - ubfx \t1, \in2, #24, #8 - .endif - ldr \t0, [tt, \t0, uxtw #2] - ldr \t1, [tt, \t1, uxtw #2] + __pair \enc, w13, w14, \in0, \in1, \in3, 0 + __pair \enc, w15, w16, \in1, \in2, \in0, 8 + __pair \enc, w17, w18, \in2, \in3, \in1, 16 + __pair \enc, \t0, \t1, \in3, \in0, \in2, 24 eor \out0, \out0, w13 - eor \out1, \out1, w17 - eor \out0, \out0, w14, ror #24 - eor \out1, \out1, w18, ror #24 - eor \out0, \out0, w15, ror #16 - eor \out1, \out1, \t0, ror #16 - eor \out0, \out0, w16, ror #8 + eor \out1, \out1, w14 + eor \out0, \out0, w15, ror #24 + eor \out1, \out1, w16, ror #24 + eor \out0, \out0, w17, ror #16 + eor \out1, \out1, w18, ror #16 + eor \out0, \out0, \t0, ror #8 eor \out1, \out1, \t1, ror #8 .endm From 4edd7d015b95abcedde591a0c45965305d7cd524 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 28 Jan 2017 23:25:38 +0000 Subject: [PATCH 093/142] crypto: arm64/aes-neon-blk - tweak performance for low end cores The non-bitsliced AES implementation using the NEON is highly sensitive to micro-architectural details, and, as it turns out, the Cortex-A53 on the Raspberry Pi 3 is a core that can benefit from this code, given that its scalar AES performance is abysmal (32.9 cycles per byte). The new bitsliced AES code manages 19.8 cycles per byte on this core, but can only operate on 8 blocks at a time, which is not supported by all chaining modes. With a bit of tweaking, we can get the plain NEON code to run at 22.0 cycles per byte, making it useful for sequential modes like CBC encryption. (Like bitsliced NEON, the plain NEON implementation does not use any lookup tables, which makes it easy on the D-cache, and invulnerable to cache timing attacks) So tweak the plain NEON AES code to use tbl instructions rather than shl/sri pairs, and to avoid the need to reload permutation vectors or other constants from memory in every round. Also, improve the decryption performance by switching to 16x8 pmul instructions for the performing the multiplications in GF(2^8). To allow the ECB and CBC encrypt routines to be reused by the bitsliced NEON code in a subsequent patch, export them from the module. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 2 + arch/arm64/crypto/aes-neon.S | 247 +++++++++++++++-------------------- 2 files changed, 108 insertions(+), 141 deletions(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 8ee1fb7aaa4f..055bc3f61138 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -409,5 +409,7 @@ unregister_simds: module_cpu_feature_match(AES, aes_init); #else module_init(aes_init); +EXPORT_SYMBOL(neon_aes_ecb_encrypt); +EXPORT_SYMBOL(neon_aes_cbc_encrypt); #endif module_exit(aes_exit); diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index 85f07ead7c5c..f1e3aa2732f9 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -1,7 +1,7 @@ /* * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON * - * Copyright (C) 2013 Linaro Ltd + * Copyright (C) 2013 - 2017 Linaro Ltd. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,17 +17,25 @@ /* multiply by polynomial 'x' in GF(2^8) */ .macro mul_by_x, out, in, temp, const sshr \temp, \in, #7 - add \out, \in, \in + shl \out, \in, #1 and \temp, \temp, \const eor \out, \out, \temp .endm + /* multiply by polynomial 'x^2' in GF(2^8) */ + .macro mul_by_x2, out, in, temp, const + ushr \temp, \in, #6 + shl \out, \in, #2 + pmul \temp, \temp, \const + eor \out, \out, \temp + .endm + /* preload the entire Sbox */ .macro prepare, sbox, shiftrows, temp adr \temp, \sbox - movi v12.16b, #0x40 + movi v12.16b, #0x1b ldr q13, \shiftrows - movi v14.16b, #0x1b + ldr q14, .Lror32by8 ld1 {v16.16b-v19.16b}, [\temp], #64 ld1 {v20.16b-v23.16b}, [\temp], #64 ld1 {v24.16b-v27.16b}, [\temp], #64 @@ -50,37 +58,31 @@ /* apply SubBytes transformation using the the preloaded Sbox */ .macro sub_bytes, in - sub v9.16b, \in\().16b, v12.16b + sub v9.16b, \in\().16b, v15.16b tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b - sub v10.16b, v9.16b, v12.16b + sub v10.16b, v9.16b, v15.16b tbx \in\().16b, {v20.16b-v23.16b}, v9.16b - sub v11.16b, v10.16b, v12.16b + sub v11.16b, v10.16b, v15.16b tbx \in\().16b, {v24.16b-v27.16b}, v10.16b tbx \in\().16b, {v28.16b-v31.16b}, v11.16b .endm /* apply MixColumns transformation */ - .macro mix_columns, in - mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b - rev32 v8.8h, \in\().8h - eor \in\().16b, v10.16b, \in\().16b - shl v9.4s, v8.4s, #24 - shl v11.4s, \in\().4s, #24 - sri v9.4s, v8.4s, #8 - sri v11.4s, \in\().4s, #8 - eor v9.16b, v9.16b, v8.16b - eor v10.16b, v10.16b, v9.16b - eor \in\().16b, v10.16b, v11.16b - .endm - + .macro mix_columns, in, enc + .if \enc == 0 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ - .macro inv_mix_columns, in - mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b - mul_by_x v11.16b, v11.16b, v10.16b, v14.16b - eor \in\().16b, \in\().16b, v11.16b - rev32 v11.8h, v11.8h - eor \in\().16b, \in\().16b, v11.16b - mix_columns \in + mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b + eor \in\().16b, \in\().16b, v8.16b + rev32 v8.8h, v8.8h + eor \in\().16b, \in\().16b, v8.16b + .endif + + mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b + rev32 v8.8h, \in\().8h + eor v8.16b, v8.16b, v9.16b + eor \in\().16b, \in\().16b, v8.16b + tbl \in\().16b, {\in\().16b}, v14.16b + eor \in\().16b, \in\().16b, v8.16b .endm .macro do_block, enc, in, rounds, rk, rkp, i @@ -88,16 +90,13 @@ add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + movi v15.16b, #0x40 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ sub_bytes \in - ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 + ld1 {v15.4s}, [\rkp], #16 beq 2222f - .if \enc == 1 - mix_columns \in - .else - inv_mix_columns \in - .endif + mix_columns \in, \enc b 1111b 2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ .endm @@ -116,139 +115,114 @@ */ .macro sub_bytes_2x, in0, in1 - sub v8.16b, \in0\().16b, v12.16b - sub v9.16b, \in1\().16b, v12.16b + sub v8.16b, \in0\().16b, v15.16b tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b + sub v9.16b, \in1\().16b, v15.16b tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b - sub v10.16b, v8.16b, v12.16b - sub v11.16b, v9.16b, v12.16b + sub v10.16b, v8.16b, v15.16b tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b + sub v11.16b, v9.16b, v15.16b tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b - sub v8.16b, v10.16b, v12.16b - sub v9.16b, v11.16b, v12.16b + sub v8.16b, v10.16b, v15.16b tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b + sub v9.16b, v11.16b, v15.16b tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b .endm .macro sub_bytes_4x, in0, in1, in2, in3 - sub v8.16b, \in0\().16b, v12.16b + sub v8.16b, \in0\().16b, v15.16b tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b - sub v9.16b, \in1\().16b, v12.16b + sub v9.16b, \in1\().16b, v15.16b tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b - sub v10.16b, \in2\().16b, v12.16b + sub v10.16b, \in2\().16b, v15.16b tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b - sub v11.16b, \in3\().16b, v12.16b + sub v11.16b, \in3\().16b, v15.16b tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b - sub v8.16b, v8.16b, v12.16b + sub v8.16b, v8.16b, v15.16b tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b - sub v9.16b, v9.16b, v12.16b + sub v9.16b, v9.16b, v15.16b tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b - sub v10.16b, v10.16b, v12.16b + sub v10.16b, v10.16b, v15.16b tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b - sub v11.16b, v11.16b, v12.16b + sub v11.16b, v11.16b, v15.16b tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b - sub v8.16b, v8.16b, v12.16b + sub v8.16b, v8.16b, v15.16b tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b - sub v9.16b, v9.16b, v12.16b + sub v9.16b, v9.16b, v15.16b tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b - sub v10.16b, v10.16b, v12.16b + sub v10.16b, v10.16b, v15.16b tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b - sub v11.16b, v11.16b, v12.16b + sub v11.16b, v11.16b, v15.16b tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b .endm .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const - sshr \tmp0\().16b, \in0\().16b, #7 - add \out0\().16b, \in0\().16b, \in0\().16b - sshr \tmp1\().16b, \in1\().16b, #7 + sshr \tmp0\().16b, \in0\().16b, #7 + shl \out0\().16b, \in0\().16b, #1 + sshr \tmp1\().16b, \in1\().16b, #7 and \tmp0\().16b, \tmp0\().16b, \const\().16b - add \out1\().16b, \in1\().16b, \in1\().16b + shl \out1\().16b, \in1\().16b, #1 and \tmp1\().16b, \tmp1\().16b, \const\().16b eor \out0\().16b, \out0\().16b, \tmp0\().16b eor \out1\().16b, \out1\().16b, \tmp1\().16b .endm - .macro mix_columns_2x, in0, in1 - mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const + ushr \tmp0\().16b, \in0\().16b, #6 + shl \out0\().16b, \in0\().16b, #2 + ushr \tmp1\().16b, \in1\().16b, #6 + pmul \tmp0\().16b, \tmp0\().16b, \const\().16b + shl \out1\().16b, \in1\().16b, #2 + pmul \tmp1\().16b, \tmp1\().16b, \const\().16b + eor \out0\().16b, \out0\().16b, \tmp0\().16b + eor \out1\().16b, \out1\().16b, \tmp1\().16b + .endm + + .macro mix_columns_2x, in0, in1, enc + .if \enc == 0 + /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ + mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 + eor \in0\().16b, \in0\().16b, v8.16b + rev32 v8.8h, v8.8h + eor \in1\().16b, \in1\().16b, v9.16b + rev32 v9.8h, v9.8h + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + .endif + + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 rev32 v10.8h, \in0\().8h rev32 v11.8h, \in1\().8h - eor \in0\().16b, v8.16b, \in0\().16b - eor \in1\().16b, v9.16b, \in1\().16b - shl v12.4s, v10.4s, #24 - shl v13.4s, v11.4s, #24 - eor v8.16b, v8.16b, v10.16b - sri v12.4s, v10.4s, #8 - shl v10.4s, \in0\().4s, #24 - eor v9.16b, v9.16b, v11.16b - sri v13.4s, v11.4s, #8 - shl v11.4s, \in1\().4s, #24 - sri v10.4s, \in0\().4s, #8 - eor \in0\().16b, v8.16b, v12.16b - sri v11.4s, \in1\().4s, #8 - eor \in1\().16b, v9.16b, v13.16b - eor \in0\().16b, v10.16b, \in0\().16b - eor \in1\().16b, v11.16b, \in1\().16b + eor v10.16b, v10.16b, v8.16b + eor v11.16b, v11.16b, v9.16b + eor \in0\().16b, \in0\().16b, v10.16b + eor \in1\().16b, \in1\().16b, v11.16b + tbl \in0\().16b, {\in0\().16b}, v14.16b + tbl \in1\().16b, {\in1\().16b}, v14.16b + eor \in0\().16b, \in0\().16b, v10.16b + eor \in1\().16b, \in1\().16b, v11.16b .endm - .macro inv_mix_cols_2x, in0, in1 - mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 - mul_by_x_2x v8, v9, v8, v9, v10, v11, v14 - eor \in0\().16b, \in0\().16b, v8.16b - eor \in1\().16b, \in1\().16b, v9.16b - rev32 v8.8h, v8.8h - rev32 v9.8h, v9.8h - eor \in0\().16b, \in0\().16b, v8.16b - eor \in1\().16b, \in1\().16b, v9.16b - mix_columns_2x \in0, \in1 - .endm - - .macro inv_mix_cols_4x, in0, in1, in2, in3 - mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 - mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14 - mul_by_x_2x v8, v9, v8, v9, v12, v13, v14 - mul_by_x_2x v10, v11, v10, v11, v12, v13, v14 - eor \in0\().16b, \in0\().16b, v8.16b - eor \in1\().16b, \in1\().16b, v9.16b - eor \in2\().16b, \in2\().16b, v10.16b - eor \in3\().16b, \in3\().16b, v11.16b - rev32 v8.8h, v8.8h - rev32 v9.8h, v9.8h - rev32 v10.8h, v10.8h - rev32 v11.8h, v11.8h - eor \in0\().16b, \in0\().16b, v8.16b - eor \in1\().16b, \in1\().16b, v9.16b - eor \in2\().16b, \in2\().16b, v10.16b - eor \in3\().16b, \in3\().16b, v11.16b - mix_columns_2x \in0, \in1 - mix_columns_2x \in2, \in3 - .endm - - .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i + .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ - sub_bytes_2x \in0, \in1 + movi v15.16b, #0x40 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.4s}, [\rkp], #16 + sub_bytes_2x \in0, \in1 subs \i, \i, #1 + ld1 {v15.4s}, [\rkp], #16 beq 2222f - .if \enc == 1 - mix_columns_2x \in0, \in1 - ldr q13, .LForward_ShiftRows - .else - inv_mix_cols_2x \in0, \in1 - ldr q13, .LReverse_ShiftRows - .endif - movi v12.16b, #0x40 + mix_columns_2x \in0, \in1, \enc b 1111b 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ @@ -262,23 +236,17 @@ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ - sub_bytes_4x \in0, \in1, \in2, \in3 + movi v15.16b, #0x40 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.4s}, [\rkp], #16 + sub_bytes_4x \in0, \in1, \in2, \in3 subs \i, \i, #1 + ld1 {v15.4s}, [\rkp], #16 beq 2222f - .if \enc == 1 - mix_columns_2x \in0, \in1 - mix_columns_2x \in2, \in3 - ldr q13, .LForward_ShiftRows - .else - inv_mix_cols_4x \in0, \in1, \in2, \in3 - ldr q13, .LReverse_ShiftRows - .endif - movi v12.16b, #0x40 + mix_columns_2x \in0, \in1, \enc + mix_columns_2x \in2, \in3, \enc b 1111b 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ @@ -305,19 +273,7 @@ #include "aes-modes.S" .text - .align 4 -.LForward_ShiftRows: -CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 ) -CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb ) -CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 ) -CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 ) - -.LReverse_ShiftRows: -CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb ) -CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 ) -CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 ) -CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 ) - + .align 6 .LForward_Sbox: .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 @@ -385,3 +341,12 @@ CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 ) .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + +.LForward_ShiftRows: + .octa 0x0b06010c07020d08030e09040f0a0500 + +.LReverse_ShiftRows: + .octa 0x0306090c0f0205080b0e0104070a0d00 + +.Lror32by8: + .octa 0x0c0f0e0d080b0a090407060500030201 From 12fcd92305880504c9827c99ea128fecf1c99f0d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 28 Jan 2017 23:25:39 +0000 Subject: [PATCH 094/142] crypto: arm64/aes - replace scalar fallback with plain NEON fallback The new bitsliced NEON implementation of AES uses a fallback in two places: CBC encryption (which is strictly sequential, whereas this driver can only operate efficiently on 8 blocks at a time), and the XTS tweak generation, which involves encrypting a single AES block with a different key schedule. The plain (i.e., non-bitsliced) NEON code is more suitable as a fallback, given that it is faster than scalar on low end cores (which is what the NEON implementations target, since high end cores have dedicated instructions for AES), and shows similar behavior in terms of D-cache footprint and sensitivity to cache timing attacks. So switch the fallback handling to the plain NEON driver. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 2 +- arch/arm64/crypto/aes-neonbs-glue.c | 42 ++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 5de75c3dcbd4..bed7feddfeed 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -86,7 +86,7 @@ config CRYPTO_AES_ARM64_BS tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm" depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER - select CRYPTO_AES_ARM64 + select CRYPTO_AES_ARM64_NEON_BLK select CRYPTO_SIMD endif diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index 323dd76ae5f0..863e436ecf89 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -10,7 +10,6 @@ #include #include -#include #include #include #include @@ -42,7 +41,12 @@ asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); -asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); +/* borrowed from aes-neon-blk.ko */ +asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], + int rounds, int blocks, int first); +asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], + int rounds, int blocks, u8 iv[], + int first); struct aesbs_ctx { u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32]; @@ -140,16 +144,28 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, return 0; } -static void cbc_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) -{ - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - __aes_arm64_encrypt(ctx->enc, dst, src, ctx->key.rounds); -} - static int cbc_encrypt(struct skcipher_request *req) { - return crypto_cbc_encrypt_walk(req, cbc_encrypt_one); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err, first = 1; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + /* fall back to the non-bitsliced NEON implementation */ + neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->enc, ctx->key.rounds, blocks, walk.iv, + first); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + first = 0; + } + kernel_neon_end(); + return err; } static int cbc_decrypt(struct skcipher_request *req) @@ -254,9 +270,11 @@ static int __xts_crypt(struct skcipher_request *req, err = skcipher_walk_virt(&walk, req, true); - __aes_arm64_encrypt(ctx->twkey, walk.iv, walk.iv, ctx->key.rounds); - kernel_neon_begin(); + + neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, + ctx->key.rounds, 1, 1); + while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; From 88a3f582bea9e1da0346ea412950bbbdc3125cc1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 2 Feb 2017 11:38:55 +0000 Subject: [PATCH 095/142] crypto: arm64/aes - don't use IV buffer to return final keystream block The arm64 bit sliced AES core code uses the IV buffer to pass the final keystream block back to the glue code if the input is not a multiple of the block size, so that the asm code does not have to deal with anything except 16 byte blocks. This is done under the assumption that the outgoing IV is meaningless anyway in this case, given that chaining is no longer possible under these circumstances. However, as it turns out, the CCM driver does expect the IV to retain a value that is equal to the original IV except for the counter value, and even interprets byte zero as a length indicator, which may result in memory corruption if the IV is overwritten with something else. So use a separate buffer to return the final keystream block. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-neonbs-core.S | 37 ++++++++++++++++++----------- arch/arm64/crypto/aes-neonbs-glue.c | 9 +++---- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index 8d0cdaa2768d..ca0472500433 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -853,13 +853,15 @@ ENDPROC(aesbs_xts_decrypt) /* * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - * int rounds, int blocks, u8 iv[], bool final) + * int rounds, int blocks, u8 iv[], u8 final[]) */ ENTRY(aesbs_ctr_encrypt) stp x29, x30, [sp, #-16]! mov x29, sp - add x4, x4, x6 // do one extra block if final + cmp x6, #0 + cset x10, ne + add x4, x4, x10 // do one extra block if final ldp x7, x8, [x5] ld1 {v0.16b}, [x5] @@ -874,19 +876,26 @@ CPU_LE( rev x8, x8 ) csel x4, x4, xzr, pl csel x9, x9, xzr, le + tbnz x9, #1, 0f next_ctr v1 + tbnz x9, #2, 0f next_ctr v2 + tbnz x9, #3, 0f next_ctr v3 + tbnz x9, #4, 0f next_ctr v4 + tbnz x9, #5, 0f next_ctr v5 + tbnz x9, #6, 0f next_ctr v6 + tbnz x9, #7, 0f next_ctr v7 0: mov bskey, x2 mov rounds, x3 bl aesbs_encrypt8 - lsr x9, x9, x6 // disregard the extra block + lsr x9, x9, x10 // disregard the extra block tbnz x9, #0, 0f ld1 {v8.16b}, [x1], #16 @@ -928,36 +937,36 @@ CPU_LE( rev x8, x8 ) eor v5.16b, v5.16b, v15.16b st1 {v5.16b}, [x0], #16 - next_ctr v0 +8: next_ctr v0 cbnz x4, 99b 0: st1 {v0.16b}, [x5] -8: ldp x29, x30, [sp], #16 + ldp x29, x30, [sp], #16 ret /* - * If we are handling the tail of the input (x6 == 1), return the - * final keystream block back to the caller via the IV buffer. + * If we are handling the tail of the input (x6 != NULL), return the + * final keystream block back to the caller. */ 1: cbz x6, 8b - st1 {v1.16b}, [x5] + st1 {v1.16b}, [x6] b 8b 2: cbz x6, 8b - st1 {v4.16b}, [x5] + st1 {v4.16b}, [x6] b 8b 3: cbz x6, 8b - st1 {v6.16b}, [x5] + st1 {v6.16b}, [x6] b 8b 4: cbz x6, 8b - st1 {v3.16b}, [x5] + st1 {v3.16b}, [x6] b 8b 5: cbz x6, 8b - st1 {v7.16b}, [x5] + st1 {v7.16b}, [x6] b 8b 6: cbz x6, 8b - st1 {v2.16b}, [x5] + st1 {v2.16b}, [x6] b 8b 7: cbz x6, 8b - st1 {v5.16b}, [x5] + st1 {v5.16b}, [x6] b 8b ENDPROC(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index 863e436ecf89..db2501d93550 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -34,7 +34,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 iv[], bool final); + int rounds, int blocks, u8 iv[], u8 final[]); asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); @@ -201,6 +201,7 @@ static int ctr_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; + u8 buf[AES_BLOCK_SIZE]; int err; err = skcipher_walk_virt(&walk, req, true); @@ -208,12 +209,12 @@ static int ctr_encrypt(struct skcipher_request *req) kernel_neon_begin(); while (walk.nbytes > 0) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - bool final = (walk.total % AES_BLOCK_SIZE) != 0; + u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; if (walk.nbytes < walk.total) { blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); - final = false; + final = NULL; } aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, @@ -225,7 +226,7 @@ static int ctr_encrypt(struct skcipher_request *req) if (dst != src) memcpy(dst, src, walk.total % AES_BLOCK_SIZE); - crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE); + crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE); err = skcipher_walk_done(&walk, 0); break; From 1a20b96612656b3ff2d6967c3111bec0e21904a8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 2 Feb 2017 11:38:56 +0000 Subject: [PATCH 096/142] crypto: arm/aes - don't use IV buffer to return final keystream block The ARM bit sliced AES core code uses the IV buffer to pass the final keystream block back to the glue code if the input is not a multiple of the block size, so that the asm code does not have to deal with anything except 16 byte blocks. This is done under the assumption that the outgoing IV is meaningless anyway in this case, given that chaining is no longer possible under these circumstances. However, as it turns out, the CCM driver does expect the IV to retain a value that is equal to the original IV except for the counter value, and even interprets byte zero as a length indicator, which may result in memory corruption if the IV is overwritten with something else. So use a separate buffer to return the final keystream block. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-neonbs-core.S | 16 +++++++++------- arch/arm/crypto/aes-neonbs-glue.c | 9 +++++---- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S index 12da247164d1..2b625c6d4712 100644 --- a/arch/arm/crypto/aes-neonbs-core.S +++ b/arch/arm/crypto/aes-neonbs-core.S @@ -779,14 +779,15 @@ ENDPROC(aesbs_cbc_decrypt) /* * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - * int rounds, int blocks, u8 ctr[], bool final) + * int rounds, int blocks, u8 ctr[], u8 final[]) */ ENTRY(aesbs_ctr_encrypt) mov ip, sp push {r4-r10, lr} ldm ip, {r5-r7} // load args 4-6 - add r5, r5, r7 // one extra block if final == 1 + teq r7, #0 + addne r5, r5, #1 // one extra block if final != 0 vld1.8 {q0}, [r6] // load counter vrev32.8 q1, q0 @@ -865,19 +866,20 @@ ENTRY(aesbs_ctr_encrypt) veor q2, q2, q14 vst1.8 {q2}, [r0]! teq r4, #0 // skip last block if 'final' - W(bne) 4f + W(bne) 5f 3: veor q5, q5, q15 vst1.8 {q5}, [r0]! - next_ctr q0 +4: next_ctr q0 subs r5, r5, #8 bgt 99b - vmov q5, q0 - -4: vst1.8 {q5}, [r6] + vst1.8 {q0}, [r6] pop {r4-r10, pc} + +5: vst1.8 {q5}, [r4] + b 4b ENDPROC(aesbs_ctr_encrypt) .macro next_tweak, out, in, const, tmp diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index e262f99a44d3..2920b96dbd36 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -35,7 +35,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 ctr[], bool final); + int rounds, int blocks, u8 ctr[], u8 final[]); asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); @@ -186,6 +186,7 @@ static int ctr_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; + u8 buf[AES_BLOCK_SIZE]; int err; err = skcipher_walk_virt(&walk, req, true); @@ -193,12 +194,12 @@ static int ctr_encrypt(struct skcipher_request *req) kernel_neon_begin(); while (walk.nbytes > 0) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - bool final = (walk.total % AES_BLOCK_SIZE) != 0; + u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; if (walk.nbytes < walk.total) { blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); - final = false; + final = NULL; } aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, @@ -210,7 +211,7 @@ static int ctr_encrypt(struct skcipher_request *req) if (dst != src) memcpy(dst, src, walk.total % AES_BLOCK_SIZE); - crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE); + crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE); err = skcipher_walk_done(&walk, 0); break; From 5d3d9c8bda2c74b13185704e504cdf0aa5210723 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Feb 2017 15:35:40 +0000 Subject: [PATCH 097/142] crypto: arm64/crc32 - merge CRC32 and PMULL instruction based drivers The PMULL based CRC32 implementation already contains code based on the separate, optional CRC32 instructions to fallback to when operating on small quantities of data. We can expose these routines directly on systems that lack the 64x64 PMULL instructions but do implement the CRC32 ones, which makes the driver that is based solely on those CRC32 instructions redundant. So remove it. Note that this aligns arm64 with ARM, whose accelerated CRC32 driver also combines the CRC32 extension based and the PMULL based versions. Signed-off-by: Ard Biesheuvel Tested-by: Matthias Brugger Signed-off-by: Herbert Xu --- arch/arm64/configs/defconfig | 1 - arch/arm64/crypto/Kconfig | 9 +- arch/arm64/crypto/Makefile | 4 - arch/arm64/crypto/crc32-arm64.c | 290 ------------------------------ arch/arm64/crypto/crc32-ce-glue.c | 49 +++-- 5 files changed, 41 insertions(+), 312 deletions(-) delete mode 100644 arch/arm64/crypto/crc32-arm64.c diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 33b744d54739..6fc6f5a2a6e5 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -516,4 +516,3 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE_BLK=y # CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set -CONFIG_CRYPTO_CRC32_ARM64=y diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index bed7feddfeed..d92293747d63 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -37,8 +37,8 @@ config CRYPTO_CRCT10DIF_ARM64_CE select CRYPTO_HASH config CRYPTO_CRC32_ARM64_CE - tristate "CRC32 and CRC32C digest algorithms using PMULL instructions" - depends on KERNEL_MODE_NEON && CRC32 + tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions" + depends on CRC32 select CRYPTO_HASH config CRYPTO_AES_ARM64 @@ -71,11 +71,6 @@ config CRYPTO_AES_ARM64_NEON_BLK select CRYPTO_AES select CRYPTO_SIMD -config CRYPTO_CRC32_ARM64 - tristate "CRC32 and CRC32C using optional ARMv8 instructions" - depends on ARM64 - select CRYPTO_HASH - config CRYPTO_CHACHA20_NEON tristate "NEON accelerated ChaCha20 symmetric cipher" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index d1ae1b9cbe70..b5edc5918c28 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -55,10 +55,6 @@ AFLAGS_aes-neon.o := -DINTERLEAVE=4 CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS -obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o - -CFLAGS_crc32-arm64.o := -mcpu=generic+crc - $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE $(call if_changed_rule,cc_o_c) diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c deleted file mode 100644 index 6a37c3c6b11d..000000000000 --- a/arch/arm64/crypto/crc32-arm64.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * crc32-arm64.c - CRC32 and CRC32C using optional ARMv8 instructions - * - * Module based on crypto/crc32c_generic.c - * - * CRC32 loop taken from Ed Nevill's Hadoop CRC patch - * http://mail-archives.apache.org/mod_mbox/hadoop-common-dev/201406.mbox/%3C1403687030.3355.19.camel%40localhost.localdomain%3E - * - * Using inline assembly instead of intrinsics in order to be backwards - * compatible with older compilers. - * - * Copyright (C) 2014 Linaro Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -#include - -MODULE_AUTHOR("Yazen Ghannam "); -MODULE_DESCRIPTION("CRC32 and CRC32C using optional ARMv8 instructions"); -MODULE_LICENSE("GPL v2"); - -#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value)) -#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) -#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) -#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) -#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value)) -#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) -#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) -#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) - -static u32 crc32_arm64_le_hw(u32 crc, const u8 *p, unsigned int len) -{ - s64 length = len; - - while ((length -= sizeof(u64)) >= 0) { - CRC32X(crc, get_unaligned_le64(p)); - p += sizeof(u64); - } - - /* The following is more efficient than the straight loop */ - if (length & sizeof(u32)) { - CRC32W(crc, get_unaligned_le32(p)); - p += sizeof(u32); - } - if (length & sizeof(u16)) { - CRC32H(crc, get_unaligned_le16(p)); - p += sizeof(u16); - } - if (length & sizeof(u8)) - CRC32B(crc, *p); - - return crc; -} - -static u32 crc32c_arm64_le_hw(u32 crc, const u8 *p, unsigned int len) -{ - s64 length = len; - - while ((length -= sizeof(u64)) >= 0) { - CRC32CX(crc, get_unaligned_le64(p)); - p += sizeof(u64); - } - - /* The following is more efficient than the straight loop */ - if (length & sizeof(u32)) { - CRC32CW(crc, get_unaligned_le32(p)); - p += sizeof(u32); - } - if (length & sizeof(u16)) { - CRC32CH(crc, get_unaligned_le16(p)); - p += sizeof(u16); - } - if (length & sizeof(u8)) - CRC32CB(crc, *p); - - return crc; -} - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -struct chksum_ctx { - u32 key; -}; - -struct chksum_desc_ctx { - u32 crc; -}; - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = mctx->key; - - return 0; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(tfm); - - if (keylen != sizeof(mctx->key)) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - mctx->key = get_unaligned_le32(key); - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc32_arm64_le_hw(ctx->crc, data, length); - return 0; -} - -static int chksumc_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc32c_arm64_le_hw(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - put_unaligned_le32(ctx->crc, out); - return 0; -} - -static int chksumc_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - put_unaligned_le32(~ctx->crc, out); - return 0; -} - -static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) -{ - put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out); - return 0; -} - -static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) -{ - put_unaligned_le32(~crc32c_arm64_le_hw(crc, data, len), out); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(ctx->crc, data, len, out); -} - -static int chksumc_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksumc_finup(ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); - - return __chksum_finup(mctx->key, data, length, out); -} - -static int chksumc_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); - - return __chksumc_finup(mctx->key, data, length, out); -} - -static int crc32_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = 0; - return 0; -} - -static int crc32c_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = ~0; - return 0; -} - -static struct shash_alg crc32_alg = { - .digestsize = CHKSUM_DIGEST_SIZE, - .setkey = chksum_setkey, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crc32", - .cra_driver_name = "crc32-arm64-hw", - .cra_priority = 300, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_alignmask = 0, - .cra_ctxsize = sizeof(struct chksum_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_cra_init, - } -}; - -static struct shash_alg crc32c_alg = { - .digestsize = CHKSUM_DIGEST_SIZE, - .setkey = chksum_setkey, - .init = chksum_init, - .update = chksumc_update, - .final = chksumc_final, - .finup = chksumc_finup, - .digest = chksumc_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-arm64-hw", - .cra_priority = 300, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_alignmask = 0, - .cra_ctxsize = sizeof(struct chksum_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32c_cra_init, - } -}; - -static int __init crc32_mod_init(void) -{ - int err; - - err = crypto_register_shash(&crc32_alg); - - if (err) - return err; - - err = crypto_register_shash(&crc32c_alg); - - if (err) { - crypto_unregister_shash(&crc32_alg); - return err; - } - - return 0; -} - -static void __exit crc32_mod_exit(void) -{ - crypto_unregister_shash(&crc32_alg); - crypto_unregister_shash(&crc32c_alg); -} - -module_cpu_feature_match(CRC32, crc32_mod_init); -module_exit(crc32_mod_exit); diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c index 8594127d5e01..eccb1ae90064 100644 --- a/arch/arm64/crypto/crc32-ce-glue.c +++ b/arch/arm64/crypto/crc32-ce-glue.c @@ -72,6 +72,24 @@ static int crc32_pmull_init(struct shash_desc *desc) return 0; } +static int crc32_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u32 *crc = shash_desc_ctx(desc); + + *crc = crc32_armv8_le(*crc, data, length); + return 0; +} + +static int crc32c_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u32 *crc = shash_desc_ctx(desc); + + *crc = crc32c_armv8_le(*crc, data, length); + return 0; +} + static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, unsigned int length) { @@ -156,7 +174,7 @@ static int crc32c_pmull_final(struct shash_desc *desc, u8 *out) static struct shash_alg crc32_pmull_algs[] = { { .setkey = crc32_pmull_setkey, .init = crc32_pmull_init, - .update = crc32_pmull_update, + .update = crc32_update, .final = crc32_pmull_final, .descsize = sizeof(u32), .digestsize = sizeof(u32), @@ -171,7 +189,7 @@ static struct shash_alg crc32_pmull_algs[] = { { }, { .setkey = crc32_pmull_setkey, .init = crc32_pmull_init, - .update = crc32c_pmull_update, + .update = crc32c_update, .final = crc32c_pmull_final, .descsize = sizeof(u32), .digestsize = sizeof(u32), @@ -187,14 +205,20 @@ static struct shash_alg crc32_pmull_algs[] = { { static int __init crc32_pmull_mod_init(void) { - if (elf_hwcap & HWCAP_CRC32) { - fallback_crc32 = crc32_armv8_le; - fallback_crc32c = crc32c_armv8_le; - } else { - fallback_crc32 = crc32_le; - fallback_crc32c = __crc32c_le; - } + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) { + crc32_pmull_algs[0].update = crc32_pmull_update; + crc32_pmull_algs[1].update = crc32c_pmull_update; + if (elf_hwcap & HWCAP_CRC32) { + fallback_crc32 = crc32_armv8_le; + fallback_crc32c = crc32c_armv8_le; + } else { + fallback_crc32 = crc32_le; + fallback_crc32c = __crc32c_le; + } + } else if (!(elf_hwcap & HWCAP_CRC32)) { + return -ENODEV; + } return crypto_register_shashes(crc32_pmull_algs, ARRAY_SIZE(crc32_pmull_algs)); } @@ -205,7 +229,12 @@ static void __exit crc32_pmull_mod_exit(void) ARRAY_SIZE(crc32_pmull_algs)); } -module_cpu_feature_match(PMULL, crc32_pmull_mod_init); +static const struct cpu_feature crc32_cpu_feature[] = { + { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { } +}; +MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature); + +module_init(crc32_pmull_mod_init); module_exit(crc32_pmull_mod_exit); MODULE_AUTHOR("Ard Biesheuvel "); From c459bd7beda0295ea67db0ce2004a49addb2f765 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Wed, 1 Feb 2017 10:45:02 -0800 Subject: [PATCH 098/142] crypto: sha512-mb - Protect sha512 mb ctx mgr access The flusher and regular multi-buffer computation via mcryptd may race with another. Add here a lock and turn off interrupt to to access multi-buffer computation state cstate->mgr before a round of computation. This should prevent the flusher code jumping in. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-mb/sha512_mb.c | 64 ++++++++++++++++++--------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index 9c1bb6d58141..2dd3674b5a1e 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -221,7 +221,7 @@ static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit } static struct sha512_hash_ctx - *sha512_ctx_mgr_get_comp_ctx(struct sha512_ctx_mgr *mgr) + *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate) { /* * If get_comp_job returns NULL, there are no jobs complete. @@ -233,11 +233,17 @@ static struct sha512_hash_ctx * Otherwise, all jobs currently being managed by the hash_ctx_mgr * still need processing. */ + struct sha512_ctx_mgr *mgr; struct sha512_hash_ctx *ctx; + unsigned long flags; + mgr = cstate->mgr; + spin_lock_irqsave(&cstate->work_lock, flags); ctx = (struct sha512_hash_ctx *) sha512_job_mgr_get_comp_job(&mgr->mgr); - return sha512_ctx_mgr_resubmit(mgr, ctx); + ctx = sha512_ctx_mgr_resubmit(mgr, ctx); + spin_unlock_irqrestore(&cstate->work_lock, flags); + return ctx; } static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr) @@ -246,12 +252,17 @@ static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr) } static struct sha512_hash_ctx - *sha512_ctx_mgr_submit(struct sha512_ctx_mgr *mgr, + *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate, struct sha512_hash_ctx *ctx, const void *buffer, uint32_t len, int flags) { + struct sha512_ctx_mgr *mgr; + unsigned long irqflags; + + mgr = cstate->mgr; + spin_lock_irqsave(&cstate->work_lock, irqflags); if (flags & (~HASH_ENTIRE)) { /* * User should not pass anything other than FIRST, UPDATE, or @@ -351,20 +362,26 @@ static struct sha512_hash_ctx } } - return sha512_ctx_mgr_resubmit(mgr, ctx); + ctx = sha512_ctx_mgr_resubmit(mgr, ctx); + spin_unlock_irqrestore(&cstate->work_lock, irqflags); + return ctx; } -static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr) +static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate) { + struct sha512_ctx_mgr *mgr; struct sha512_hash_ctx *ctx; + unsigned long flags; + mgr = cstate->mgr; + spin_lock_irqsave(&cstate->work_lock, flags); while (1) { ctx = (struct sha512_hash_ctx *) sha512_job_mgr_flush(&mgr->mgr); /* If flush returned 0, there are no more jobs in flight. */ if (!ctx) - return NULL; + break; /* * If flush returned a job, resubmit the job to finish @@ -378,8 +395,10 @@ static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr) * the sha512_ctx_mgr still need processing. Loop. */ if (ctx) - return ctx; + break; } + spin_unlock_irqrestore(&cstate->work_lock, flags); + return ctx; } static int sha512_mb_init(struct ahash_request *areq) @@ -439,11 +458,11 @@ static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(&rctx->areq); kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, + sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, nbytes, flag); if (!sha_ctx) { if (flush) - sha_ctx = sha512_ctx_mgr_flush(cstate->mgr); + sha_ctx = sha512_ctx_mgr_flush(cstate); } kernel_fpu_end(); if (sha_ctx) @@ -471,11 +490,12 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, struct sha512_hash_ctx *sha_ctx; struct mcryptd_hash_request_ctx *req_ctx; int ret; + unsigned long flags; /* remove from work list */ - spin_lock(&cstate->work_lock); + spin_lock_irqsave(&cstate->work_lock, flags); list_del(&rctx->waiter); - spin_unlock(&cstate->work_lock); + spin_unlock_irqrestore(&cstate->work_lock, flags); if (irqs_disabled()) rctx->complete(&req->base, err); @@ -486,14 +506,14 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, } /* check to see if there are other jobs that are done */ - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr); + sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); while (sha_ctx) { req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); ret = sha_finish_walk(&req_ctx, cstate, false); if (req_ctx) { - spin_lock(&cstate->work_lock); + spin_lock_irqsave(&cstate->work_lock, flags); list_del(&req_ctx->waiter); - spin_unlock(&cstate->work_lock); + spin_unlock_irqrestore(&cstate->work_lock, flags); req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) @@ -504,7 +524,7 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, local_bh_enable(); } } - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr); + sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); } return 0; @@ -515,6 +535,7 @@ static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx, { unsigned long next_flush; unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); + unsigned long flags; /* initialize tag */ rctx->tag.arrival = jiffies; /* tag the arrival time */ @@ -522,9 +543,9 @@ static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx, next_flush = rctx->tag.arrival + delay; rctx->tag.expire = next_flush; - spin_lock(&cstate->work_lock); + spin_lock_irqsave(&cstate->work_lock, flags); list_add_tail(&rctx->waiter, &cstate->work_list); - spin_unlock(&cstate->work_lock); + spin_unlock_irqrestore(&cstate->work_lock, flags); mcryptd_arm_flusher(cstate, delay); } @@ -565,7 +586,7 @@ static int sha512_mb_update(struct ahash_request *areq) sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); sha512_mb_add_list(rctx, cstate); kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, + sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, nbytes, HASH_UPDATE); kernel_fpu_end(); @@ -628,7 +649,7 @@ static int sha512_mb_finup(struct ahash_request *areq) sha512_mb_add_list(rctx, cstate); kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, + sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, nbytes, flag); kernel_fpu_end(); @@ -677,8 +698,7 @@ static int sha512_mb_final(struct ahash_request *areq) /* flag HASH_FINAL and 0 data size */ sha512_mb_add_list(rctx, cstate); kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, - HASH_LAST); + sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST); kernel_fpu_end(); /* check if anything is returned */ @@ -940,7 +960,7 @@ static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate) break; kernel_fpu_begin(); sha_ctx = (struct sha512_hash_ctx *) - sha512_ctx_mgr_flush(cstate->mgr); + sha512_ctx_mgr_flush(cstate); kernel_fpu_end(); if (!sha_ctx) { pr_err("sha512_mb error: nothing got flushed for" From ec38a9376163f9f7cb671e49b7667129c7bb8f8b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 2 Feb 2017 15:58:57 +0000 Subject: [PATCH 099/142] crypto: aes-generic - drop alignment requirement The generic AES code exposes a 32-bit align mask, which forces all users of the code to use temporary buffers or take other measures to ensure the alignment requirement is adhered to, even on architectures that don't care about alignment for software algorithms such as this one. So drop the align mask, and fix the code to use get_unaligned_le32() where appropriate, which will resolve to whatever is optimal for the architecture. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/aes_generic.c | 64 ++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index 3dd101144a58..ca554d57d01e 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c @@ -54,6 +54,7 @@ #include #include #include +#include static inline u8 byte(const u32 x, const unsigned n) { @@ -1216,7 +1217,6 @@ EXPORT_SYMBOL_GPL(crypto_il_tab); int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len) { - const __le32 *key = (const __le32 *)in_key; u32 i, t, u, v, w, j; if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && @@ -1225,10 +1225,15 @@ int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key, ctx->key_length = key_len; - ctx->key_dec[key_len + 24] = ctx->key_enc[0] = le32_to_cpu(key[0]); - ctx->key_dec[key_len + 25] = ctx->key_enc[1] = le32_to_cpu(key[1]); - ctx->key_dec[key_len + 26] = ctx->key_enc[2] = le32_to_cpu(key[2]); - ctx->key_dec[key_len + 27] = ctx->key_enc[3] = le32_to_cpu(key[3]); + ctx->key_enc[0] = get_unaligned_le32(in_key); + ctx->key_enc[1] = get_unaligned_le32(in_key + 4); + ctx->key_enc[2] = get_unaligned_le32(in_key + 8); + ctx->key_enc[3] = get_unaligned_le32(in_key + 12); + + ctx->key_dec[key_len + 24] = ctx->key_enc[0]; + ctx->key_dec[key_len + 25] = ctx->key_enc[1]; + ctx->key_dec[key_len + 26] = ctx->key_enc[2]; + ctx->key_dec[key_len + 27] = ctx->key_enc[3]; switch (key_len) { case AES_KEYSIZE_128: @@ -1238,17 +1243,17 @@ int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key, break; case AES_KEYSIZE_192: - ctx->key_enc[4] = le32_to_cpu(key[4]); - t = ctx->key_enc[5] = le32_to_cpu(key[5]); + ctx->key_enc[4] = get_unaligned_le32(in_key + 16); + t = ctx->key_enc[5] = get_unaligned_le32(in_key + 20); for (i = 0; i < 8; ++i) loop6(i); break; case AES_KEYSIZE_256: - ctx->key_enc[4] = le32_to_cpu(key[4]); - ctx->key_enc[5] = le32_to_cpu(key[5]); - ctx->key_enc[6] = le32_to_cpu(key[6]); - t = ctx->key_enc[7] = le32_to_cpu(key[7]); + ctx->key_enc[4] = get_unaligned_le32(in_key + 16); + ctx->key_enc[5] = get_unaligned_le32(in_key + 20); + ctx->key_enc[6] = get_unaligned_le32(in_key + 24); + t = ctx->key_enc[7] = get_unaligned_le32(in_key + 28); for (i = 0; i < 6; ++i) loop8(i); loop8tophalf(i); @@ -1329,16 +1334,14 @@ EXPORT_SYMBOL_GPL(crypto_aes_set_key); static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); - const __le32 *src = (const __le32 *)in; - __le32 *dst = (__le32 *)out; u32 b0[4], b1[4]; const u32 *kp = ctx->key_enc + 4; const int key_len = ctx->key_length; - b0[0] = le32_to_cpu(src[0]) ^ ctx->key_enc[0]; - b0[1] = le32_to_cpu(src[1]) ^ ctx->key_enc[1]; - b0[2] = le32_to_cpu(src[2]) ^ ctx->key_enc[2]; - b0[3] = le32_to_cpu(src[3]) ^ ctx->key_enc[3]; + b0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in); + b0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4); + b0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8); + b0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12); if (key_len > 24) { f_nround(b1, b0, kp); @@ -1361,10 +1364,10 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) f_nround(b1, b0, kp); f_lround(b0, b1, kp); - dst[0] = cpu_to_le32(b0[0]); - dst[1] = cpu_to_le32(b0[1]); - dst[2] = cpu_to_le32(b0[2]); - dst[3] = cpu_to_le32(b0[3]); + put_unaligned_le32(b0[0], out); + put_unaligned_le32(b0[1], out + 4); + put_unaligned_le32(b0[2], out + 8); + put_unaligned_le32(b0[3], out + 12); } /* decrypt a block of text */ @@ -1401,16 +1404,14 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); - const __le32 *src = (const __le32 *)in; - __le32 *dst = (__le32 *)out; u32 b0[4], b1[4]; const int key_len = ctx->key_length; const u32 *kp = ctx->key_dec + 4; - b0[0] = le32_to_cpu(src[0]) ^ ctx->key_dec[0]; - b0[1] = le32_to_cpu(src[1]) ^ ctx->key_dec[1]; - b0[2] = le32_to_cpu(src[2]) ^ ctx->key_dec[2]; - b0[3] = le32_to_cpu(src[3]) ^ ctx->key_dec[3]; + b0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in); + b0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4); + b0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8); + b0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12); if (key_len > 24) { i_nround(b1, b0, kp); @@ -1433,10 +1434,10 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) i_nround(b1, b0, kp); i_lround(b0, b1, kp); - dst[0] = cpu_to_le32(b0[0]); - dst[1] = cpu_to_le32(b0[1]); - dst[2] = cpu_to_le32(b0[2]); - dst[3] = cpu_to_le32(b0[3]); + put_unaligned_le32(b0[0], out); + put_unaligned_le32(b0[1], out + 4); + put_unaligned_le32(b0[2], out + 8); + put_unaligned_le32(b0[3], out + 12); } static struct crypto_alg aes_alg = { @@ -1446,7 +1447,6 @@ static struct crypto_alg aes_alg = { .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 3, .cra_module = THIS_MODULE, .cra_u = { .cipher = { From b5e0b032b6c31c052ee0132ee70b155c22cf7b28 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 2 Feb 2017 16:37:40 +0000 Subject: [PATCH 100/142] crypto: aes - add generic time invariant AES cipher Lookup table based AES is sensitive to timing attacks, which is due to the fact that such table lookups are data dependent, and the fact that 8 KB worth of tables covers a significant number of cachelines on any architecture, resulting in an exploitable correlation between the key and the processing time for known plaintexts. For network facing algorithms such as CTR, CCM or GCM, this presents a security risk, which is why arch specific AES ports are typically time invariant, either through the use of special instructions, or by using SIMD algorithms that don't rely on table lookups. For generic code, this is difficult to achieve without losing too much performance, but we can improve the situation significantly by switching to an implementation that only needs 256 bytes of table data (the actual S-box itself), which can be prefetched at the start of each block to eliminate data dependent latencies. This code encrypts at ~25 cycles per byte on ARM Cortex-A57 (while the ordinary generic AES driver manages 18 cycles per byte on this hardware). Decryption is substantially slower. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/Kconfig | 17 +++ crypto/Makefile | 1 + crypto/aes_ti.c | 375 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 393 insertions(+) create mode 100644 crypto/aes_ti.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 160f08e721cc..419ff5fe6939 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -895,6 +895,23 @@ config CRYPTO_AES See for more information. +config CRYPTO_AES_TI + tristate "Fixed time AES cipher" + select CRYPTO_ALGAPI + help + This is a generic implementation of AES that attempts to eliminate + data dependent latencies as much as possible without affecting + performance too much. It is intended for use by the generic CCM + and GCM drivers, and other CTR or CMAC/XCBC based modes that rely + solely on encryption (although decryption is supported as well, but + with a more dramatic performance hit) + + Instead of using 16 lookup tables of 1 KB each, (8 for encryption and + 8 for decryption), this implementation only uses just two S-boxes of + 256 bytes each, and attempts to eliminate data dependent latencies by + prefetching the entire table into the cache at the start of each + block. + config CRYPTO_AES_586 tristate "AES cipher algorithms (i586)" depends on (X86 || UML_X86) && !64BIT diff --git a/crypto/Makefile b/crypto/Makefile index b8f0e3eb0791..bcd834536163 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o obj-$(CONFIG_CRYPTO_AES) += aes_generic.o +obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o obj-$(CONFIG_CRYPTO_CAST5) += cast5_generic.o diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c new file mode 100644 index 000000000000..92644fd1ac19 --- /dev/null +++ b/crypto/aes_ti.c @@ -0,0 +1,375 @@ +/* + * Scalar fixed time AES core transform + * + * Copyright (C) 2017 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +/* + * Emit the sbox as volatile const to prevent the compiler from doing + * constant folding on sbox references involving fixed indexes. + */ +static volatile const u8 __cacheline_aligned __aesti_sbox[] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, +}; + +static volatile const u8 __cacheline_aligned __aesti_inv_sbox[] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, +}; + +static u32 mul_by_x(u32 w) +{ + u32 x = w & 0x7f7f7f7f; + u32 y = w & 0x80808080; + + /* multiply by polynomial 'x' (0b10) in GF(2^8) */ + return (x << 1) ^ (y >> 7) * 0x1b; +} + +static u32 mul_by_x2(u32 w) +{ + u32 x = w & 0x3f3f3f3f; + u32 y = w & 0x80808080; + u32 z = w & 0x40404040; + + /* multiply by polynomial 'x^2' (0b100) in GF(2^8) */ + return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b; +} + +static u32 mix_columns(u32 x) +{ + /* + * Perform the following matrix multiplication in GF(2^8) + * + * | 0x2 0x3 0x1 0x1 | | x[0] | + * | 0x1 0x2 0x3 0x1 | | x[1] | + * | 0x1 0x1 0x2 0x3 | x | x[2] | + * | 0x3 0x1 0x1 0x3 | | x[3] | + */ + u32 y = mul_by_x(x) ^ ror32(x, 16); + + return y ^ ror32(x ^ y, 8); +} + +static u32 inv_mix_columns(u32 x) +{ + /* + * Perform the following matrix multiplication in GF(2^8) + * + * | 0xe 0xb 0xd 0x9 | | x[0] | + * | 0x9 0xe 0xb 0xd | | x[1] | + * | 0xd 0x9 0xe 0xb | x | x[2] | + * | 0xb 0xd 0x9 0xe | | x[3] | + * + * which can conveniently be reduced to + * + * | 0x2 0x3 0x1 0x1 | | 0x5 0x0 0x4 0x0 | | x[0] | + * | 0x1 0x2 0x3 0x1 | | 0x0 0x5 0x0 0x4 | | x[1] | + * | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] | + * | 0x3 0x1 0x1 0x2 | | 0x0 0x4 0x0 0x5 | | x[3] | + */ + u32 y = mul_by_x2(x); + + return mix_columns(x ^ y ^ ror32(y, 16)); +} + +static __always_inline u32 subshift(u32 in[], int pos) +{ + return (__aesti_sbox[in[pos] & 0xff]) ^ + (__aesti_sbox[(in[(pos + 1) % 4] >> 8) & 0xff] << 8) ^ + (__aesti_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^ + (__aesti_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24); +} + +static __always_inline u32 inv_subshift(u32 in[], int pos) +{ + return (__aesti_inv_sbox[in[pos] & 0xff]) ^ + (__aesti_inv_sbox[(in[(pos + 3) % 4] >> 8) & 0xff] << 8) ^ + (__aesti_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^ + (__aesti_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24); +} + +static u32 subw(u32 in) +{ + return (__aesti_sbox[in & 0xff]) ^ + (__aesti_sbox[(in >> 8) & 0xff] << 8) ^ + (__aesti_sbox[(in >> 16) & 0xff] << 16) ^ + (__aesti_sbox[(in >> 24) & 0xff] << 24); +} + +static int aesti_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len) +{ + u32 kwords = key_len / sizeof(u32); + u32 rc, i, j; + + if (key_len != AES_KEYSIZE_128 && + key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256) + return -EINVAL; + + ctx->key_length = key_len; + + for (i = 0; i < kwords; i++) + ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32)); + + for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) { + u32 *rki = ctx->key_enc + (i * kwords); + u32 *rko = rki + kwords; + + rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + + if (key_len == 24) { + if (i >= 7) + break; + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + } else if (key_len == 32) { + if (i >= 6) + break; + rko[4] = subw(rko[3]) ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + rko[6] = rko[5] ^ rki[6]; + rko[7] = rko[6] ^ rki[7]; + } + } + + /* + * Generate the decryption keys for the Equivalent Inverse Cipher. + * This involves reversing the order of the round keys, and applying + * the Inverse Mix Columns transformation to all but the first and + * the last one. + */ + ctx->key_dec[0] = ctx->key_enc[key_len + 24]; + ctx->key_dec[1] = ctx->key_enc[key_len + 25]; + ctx->key_dec[2] = ctx->key_enc[key_len + 26]; + ctx->key_dec[3] = ctx->key_enc[key_len + 27]; + + for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) { + ctx->key_dec[i] = inv_mix_columns(ctx->key_enc[j]); + ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]); + ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]); + ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]); + } + + ctx->key_dec[i] = ctx->key_enc[0]; + ctx->key_dec[i + 1] = ctx->key_enc[1]; + ctx->key_dec[i + 2] = ctx->key_enc[2]; + ctx->key_dec[i + 3] = ctx->key_enc[3]; + + return 0; +} + +static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = aesti_expand_key(ctx, in_key, key_len); + if (err) + return err; + + /* + * In order to force the compiler to emit data independent Sbox lookups + * at the start of each block, xor the first round key with values at + * fixed indexes in the Sbox. This will need to be repeated each time + * the key is used, which will pull the entire Sbox into the D-cache + * before any data dependent Sbox lookups are performed. + */ + ctx->key_enc[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128]; + ctx->key_enc[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160]; + ctx->key_enc[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192]; + ctx->key_enc[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224]; + + ctx->key_dec[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128]; + ctx->key_dec[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160]; + ctx->key_dec[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192]; + ctx->key_dec[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224]; + + return 0; +} + +static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + const u32 *rkp = ctx->key_enc + 4; + int rounds = 6 + ctx->key_length / 4; + u32 st0[4], st1[4]; + int round; + + st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in); + st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4); + st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8); + st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12); + + st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128]; + st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160]; + st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192]; + st0[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224]; + + for (round = 0;; round += 2, rkp += 8) { + st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0]; + st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1]; + st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2]; + st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3]; + + if (round == rounds - 2) + break; + + st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4]; + st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5]; + st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6]; + st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7]; + } + + put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out); + put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4); + put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8); + put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12); +} + +static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + const u32 *rkp = ctx->key_dec + 4; + int rounds = 6 + ctx->key_length / 4; + u32 st0[4], st1[4]; + int round; + + st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in); + st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4); + st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8); + st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12); + + st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128]; + st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160]; + st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192]; + st0[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224]; + + for (round = 0;; round += 2, rkp += 8) { + st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0]; + st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1]; + st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2]; + st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3]; + + if (round == rounds - 2) + break; + + st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4]; + st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5]; + st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6]; + st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7]; + } + + put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out); + put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4); + put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8); + put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-fixed-time", + .cra_priority = 100 + 1, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + + .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, + .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, + .cra_cipher.cia_setkey = aesti_set_key, + .cra_cipher.cia_encrypt = aesti_encrypt, + .cra_cipher.cia_decrypt = aesti_decrypt +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Generic fixed time AES"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); From 092acf069813a7a07371de0d05010de80d8e613b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Feb 2017 14:49:35 +0000 Subject: [PATCH 101/142] crypto: testmgr - add test cases for cbcmac(aes) In preparation of splitting off the CBC-MAC transform in the CCM driver into a separate algorithm, define some test cases for the AES incarnation of cbcmac. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/testmgr.c | 7 ++++++ crypto/testmgr.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 98eb09782db8..f9c378af3907 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2513,6 +2513,13 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(tf_cbc_dec_tv_template) } } + }, { + .alg = "cbcmac(aes)", + .fips_allowed = 1, + .test = alg_test_hash, + .suite = { + .hash = __VECS(aes_cbcmac_tv_template) + } }, { .alg = "ccm(aes)", .test = alg_test_aead, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 64595f067d72..f85e51cf7dcc 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -3413,6 +3413,66 @@ static struct hash_testvec aes_cmac128_tv_template[] = { } }; +static struct hash_testvec aes_cbcmac_tv_template[] = { + { + .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" + "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", + .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a", + .digest = "\x3a\xd7\x7b\xb4\x0d\x7a\x36\x60" + "\xa8\x9e\xca\xf3\x24\x66\xef\x97", + .psize = 16, + .ksize = 16, + }, { + .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" + "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", + .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30", + .digest = "\x9d\x0d\xd0\x63\xfb\xcb\x24\x43" + "\xf8\xf2\x76\x03\xac\x39\xb0\x9d", + .psize = 33, + .ksize = 16, + .np = 2, + .tap = { 7, 26 }, + }, { + .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" + "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", + .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37", + .digest = "\xc0\x71\x73\xb8\xa0\x2c\x11\x7c" + "\xaf\xdc\xb2\xf8\x89\x32\xa3\x3a", + .psize = 63, + .ksize = 16, + }, { + .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe" + "\x2b\x73\xae\xf0\x85\x7d\x77\x81" + "\x1f\x35\x2c\x07\x3b\x61\x08\xd7" + "\x2d\x98\x10\xa3\x09\x14\xdf\xf4", + .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" + "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" + "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" + "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" + "\xad\x2b\x41\x7b\xe6\x6c\x37\x10" + "\x1c", + .digest = "\x6a\x4e\xdb\x21\x47\x51\xdf\x4f" + "\xa8\x4d\x4c\x10\x3b\x72\x7d\xd6", + .psize = 65, + .ksize = 32, + } +}; + static struct hash_testvec des3_ede_cmac64_tv_template[] = { /* * From NIST Special Publication 800-38B, Three Key TDEA From f15f05b0a5de667c821a9727c33bce9d1d9b26dd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Feb 2017 14:49:36 +0000 Subject: [PATCH 102/142] crypto: ccm - switch to separate cbcmac driver Update the generic CCM driver to defer CBC-MAC processing to a dedicated CBC-MAC ahash transform rather than open coding this transform (and much of the associated scatterwalk plumbing) in the CCM driver itself. This cleans up the code considerably, but more importantly, it allows the use of alternative CBC-MAC implementations that don't suffer from performance degradation due to significant setup time (e.g., the NEON based AES code needs to enable/disable the NEON, and load the S-box into 16 SIMD registers, which cannot be amortized over the entire input when using the cipher interface) Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + crypto/ccm.c | 381 +++++++++++++++++++++++++++++++------------------ 2 files changed, 245 insertions(+), 137 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 419ff5fe6939..5a51b877277e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -263,6 +263,7 @@ comment "Authenticated Encryption with Associated Data" config CRYPTO_CCM tristate "CCM support" select CRYPTO_CTR + select CRYPTO_HASH select CRYPTO_AEAD help Support for Counter with CBC MAC. Required for IPsec. diff --git a/crypto/ccm.c b/crypto/ccm.c index 26b924d1e582..52e307807ff6 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -23,11 +24,11 @@ struct ccm_instance_ctx { struct crypto_skcipher_spawn ctr; - struct crypto_spawn cipher; + struct crypto_ahash_spawn mac; }; struct crypto_ccm_ctx { - struct crypto_cipher *cipher; + struct crypto_ahash *mac; struct crypto_skcipher *ctr; }; @@ -44,15 +45,22 @@ struct crypto_rfc4309_req_ctx { struct crypto_ccm_req_priv_ctx { u8 odata[16]; - u8 idata[16]; u8 auth_tag[16]; - u32 ilen; u32 flags; struct scatterlist src[3]; struct scatterlist dst[3]; struct skcipher_request skreq; }; +struct cbcmac_tfm_ctx { + struct crypto_cipher *child; +}; + +struct cbcmac_desc_ctx { + unsigned int len; + u8 dg[]; +}; + static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx( struct aead_request *req) { @@ -84,7 +92,7 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key, { struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead); struct crypto_skcipher *ctr = ctx->ctr; - struct crypto_cipher *tfm = ctx->cipher; + struct crypto_ahash *mac = ctx->mac; int err = 0; crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK); @@ -96,11 +104,11 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key, if (err) goto out; - crypto_cipher_clear_flags(tfm, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(tfm, crypto_aead_get_flags(aead) & + crypto_ahash_clear_flags(mac, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(mac, crypto_aead_get_flags(aead) & CRYPTO_TFM_REQ_MASK); - err = crypto_cipher_setkey(tfm, key, keylen); - crypto_aead_set_flags(aead, crypto_cipher_get_flags(tfm) & + err = crypto_ahash_setkey(mac, key, keylen); + crypto_aead_set_flags(aead, crypto_ahash_get_flags(mac) & CRYPTO_TFM_RES_MASK); out: @@ -167,119 +175,61 @@ static int format_adata(u8 *adata, unsigned int a) return len; } -static void compute_mac(struct crypto_cipher *tfm, u8 *data, int n, - struct crypto_ccm_req_priv_ctx *pctx) -{ - unsigned int bs = 16; - u8 *odata = pctx->odata; - u8 *idata = pctx->idata; - int datalen, getlen; - - datalen = n; - - /* first time in here, block may be partially filled. */ - getlen = bs - pctx->ilen; - if (datalen >= getlen) { - memcpy(idata + pctx->ilen, data, getlen); - crypto_xor(odata, idata, bs); - crypto_cipher_encrypt_one(tfm, odata, odata); - datalen -= getlen; - data += getlen; - pctx->ilen = 0; - } - - /* now encrypt rest of data */ - while (datalen >= bs) { - crypto_xor(odata, data, bs); - crypto_cipher_encrypt_one(tfm, odata, odata); - - datalen -= bs; - data += bs; - } - - /* check and see if there's leftover data that wasn't - * enough to fill a block. - */ - if (datalen) { - memcpy(idata + pctx->ilen, data, datalen); - pctx->ilen += datalen; - } -} - -static void get_data_to_compute(struct crypto_cipher *tfm, - struct crypto_ccm_req_priv_ctx *pctx, - struct scatterlist *sg, unsigned int len) -{ - struct scatter_walk walk; - u8 *data_src; - int n; - - scatterwalk_start(&walk, sg); - - while (len) { - n = scatterwalk_clamp(&walk, len); - if (!n) { - scatterwalk_start(&walk, sg_next(walk.sg)); - n = scatterwalk_clamp(&walk, len); - } - data_src = scatterwalk_map(&walk); - - compute_mac(tfm, data_src, n, pctx); - len -= n; - - scatterwalk_unmap(data_src); - scatterwalk_advance(&walk, n); - scatterwalk_done(&walk, 0, len); - if (len) - crypto_yield(pctx->flags); - } - - /* any leftover needs padding and then encrypted */ - if (pctx->ilen) { - int padlen; - u8 *odata = pctx->odata; - u8 *idata = pctx->idata; - - padlen = 16 - pctx->ilen; - memset(idata + pctx->ilen, 0, padlen); - crypto_xor(odata, idata, 16); - crypto_cipher_encrypt_one(tfm, odata, odata); - pctx->ilen = 0; - } -} - static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain, unsigned int cryptlen) { + struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead); - struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); - struct crypto_cipher *cipher = ctx->cipher; + AHASH_REQUEST_ON_STACK(ahreq, ctx->mac); unsigned int assoclen = req->assoclen; - u8 *odata = pctx->odata; - u8 *idata = pctx->idata; - int err; + struct scatterlist sg[3]; + u8 odata[16]; + u8 idata[16]; + int ilen, err; /* format control data for input */ err = format_input(odata, req, cryptlen); if (err) goto out; - /* encrypt first block to use as start in computing mac */ - crypto_cipher_encrypt_one(cipher, odata, odata); + sg_init_table(sg, 3); + sg_set_buf(&sg[0], odata, 16); /* format associated data and compute into mac */ if (assoclen) { - pctx->ilen = format_adata(idata, assoclen); - get_data_to_compute(cipher, pctx, req->src, req->assoclen); + ilen = format_adata(idata, assoclen); + sg_set_buf(&sg[1], idata, ilen); + sg_chain(sg, 3, req->src); } else { - pctx->ilen = 0; + ilen = 0; + sg_chain(sg, 2, req->src); } - /* compute plaintext into mac */ - if (cryptlen) - get_data_to_compute(cipher, pctx, plain, cryptlen); + ahash_request_set_tfm(ahreq, ctx->mac); + ahash_request_set_callback(ahreq, pctx->flags, NULL, NULL); + ahash_request_set_crypt(ahreq, sg, NULL, assoclen + ilen + 16); + err = crypto_ahash_init(ahreq); + if (err) + goto out; + err = crypto_ahash_update(ahreq); + if (err) + goto out; + /* we need to pad the MAC input to a round multiple of the block size */ + ilen = 16 - (assoclen + ilen) % 16; + if (ilen < 16) { + memset(idata, 0, ilen); + sg_init_table(sg, 2); + sg_set_buf(&sg[0], idata, ilen); + if (plain) + sg_chain(sg, 2, plain); + plain = sg; + cryptlen += ilen; + } + + ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen); + err = crypto_ahash_finup(ahreq); out: return err; } @@ -453,21 +403,21 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm) struct aead_instance *inst = aead_alg_instance(tfm); struct ccm_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm); - struct crypto_cipher *cipher; + struct crypto_ahash *mac; struct crypto_skcipher *ctr; unsigned long align; int err; - cipher = crypto_spawn_cipher(&ictx->cipher); - if (IS_ERR(cipher)) - return PTR_ERR(cipher); + mac = crypto_spawn_ahash(&ictx->mac); + if (IS_ERR(mac)) + return PTR_ERR(mac); ctr = crypto_spawn_skcipher(&ictx->ctr); err = PTR_ERR(ctr); if (IS_ERR(ctr)) - goto err_free_cipher; + goto err_free_mac; - ctx->cipher = cipher; + ctx->mac = mac; ctx->ctr = ctr; align = crypto_aead_alignmask(tfm); @@ -479,8 +429,8 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm) return 0; -err_free_cipher: - crypto_free_cipher(cipher); +err_free_mac: + crypto_free_ahash(mac); return err; } @@ -488,7 +438,7 @@ static void crypto_ccm_exit_tfm(struct crypto_aead *tfm) { struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm); - crypto_free_cipher(ctx->cipher); + crypto_free_ahash(ctx->mac); crypto_free_skcipher(ctx->ctr); } @@ -496,7 +446,7 @@ static void crypto_ccm_free(struct aead_instance *inst) { struct ccm_instance_ctx *ctx = aead_instance_ctx(inst); - crypto_drop_spawn(&ctx->cipher); + crypto_drop_ahash(&ctx->mac); crypto_drop_skcipher(&ctx->ctr); kfree(inst); } @@ -505,12 +455,13 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, struct rtattr **tb, const char *full_name, const char *ctr_name, - const char *cipher_name) + const char *mac_name) { struct crypto_attr_type *algt; struct aead_instance *inst; struct skcipher_alg *ctr; - struct crypto_alg *cipher; + struct crypto_alg *mac_alg; + struct hash_alg_common *mac; struct ccm_instance_ctx *ictx; int err; @@ -521,25 +472,26 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) return -EINVAL; - cipher = crypto_alg_mod_lookup(cipher_name, CRYPTO_ALG_TYPE_CIPHER, - CRYPTO_ALG_TYPE_MASK); - if (IS_ERR(cipher)) - return PTR_ERR(cipher); + mac_alg = crypto_find_alg(mac_name, &crypto_ahash_type, + CRYPTO_ALG_TYPE_HASH, + CRYPTO_ALG_TYPE_AHASH_MASK | + CRYPTO_ALG_ASYNC); + if (IS_ERR(mac_alg)) + return PTR_ERR(mac_alg); + mac = __crypto_hash_alg_common(mac_alg); err = -EINVAL; - if (cipher->cra_blocksize != 16) - goto out_put_cipher; + if (mac->digestsize != 16) + goto out_put_mac; inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL); err = -ENOMEM; if (!inst) - goto out_put_cipher; + goto out_put_mac; ictx = aead_instance_ctx(inst); - - err = crypto_init_spawn(&ictx->cipher, cipher, - aead_crypto_instance(inst), - CRYPTO_ALG_TYPE_MASK); + err = crypto_init_ahash_spawn(&ictx->mac, mac, + aead_crypto_instance(inst)); if (err) goto err_free_inst; @@ -548,7 +500,7 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, crypto_requires_sync(algt->type, algt->mask)); if (err) - goto err_drop_cipher; + goto err_drop_mac; ctr = crypto_spawn_skcipher_alg(&ictx->ctr); @@ -564,16 +516,16 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "ccm_base(%s,%s)", ctr->base.cra_driver_name, - cipher->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_ctr; memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME); inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC; - inst->alg.base.cra_priority = (cipher->cra_priority + + inst->alg.base.cra_priority = (mac->base.cra_priority + ctr->base.cra_priority) / 2; inst->alg.base.cra_blocksize = 1; - inst->alg.base.cra_alignmask = cipher->cra_alignmask | + inst->alg.base.cra_alignmask = mac->base.cra_alignmask | ctr->base.cra_alignmask | (__alignof__(u32) - 1); inst->alg.ivsize = 16; @@ -593,23 +545,24 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, if (err) goto err_drop_ctr; -out_put_cipher: - crypto_mod_put(cipher); +out_put_mac: + crypto_mod_put(mac_alg); return err; err_drop_ctr: crypto_drop_skcipher(&ictx->ctr); -err_drop_cipher: - crypto_drop_spawn(&ictx->cipher); +err_drop_mac: + crypto_drop_ahash(&ictx->mac); err_free_inst: kfree(inst); - goto out_put_cipher; + goto out_put_mac; } static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb) { const char *cipher_name; char ctr_name[CRYPTO_MAX_ALG_NAME]; + char mac_name[CRYPTO_MAX_ALG_NAME]; char full_name[CRYPTO_MAX_ALG_NAME]; cipher_name = crypto_attr_alg_name(tb[1]); @@ -620,12 +573,16 @@ static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb) cipher_name) >= CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; + if (snprintf(mac_name, CRYPTO_MAX_ALG_NAME, "cbcmac(%s)", + cipher_name) >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name, - cipher_name); + mac_name); } static struct crypto_template crypto_ccm_tmpl = { @@ -899,14 +856,161 @@ static struct crypto_template crypto_rfc4309_tmpl = { .module = THIS_MODULE, }; +static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent, + const u8 *inkey, unsigned int keylen) +{ + struct cbcmac_tfm_ctx *ctx = crypto_shash_ctx(parent); + + return crypto_cipher_setkey(ctx->child, inkey, keylen); +} + +static int crypto_cbcmac_digest_init(struct shash_desc *pdesc) +{ + struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc); + int bs = crypto_shash_digestsize(pdesc->tfm); + + ctx->len = 0; + memset(ctx->dg, 0, bs); + + return 0; +} + +static int crypto_cbcmac_digest_update(struct shash_desc *pdesc, const u8 *p, + unsigned int len) +{ + struct crypto_shash *parent = pdesc->tfm; + struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent); + struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc); + struct crypto_cipher *tfm = tctx->child; + int bs = crypto_shash_digestsize(parent); + + while (len > 0) { + unsigned int l = min(len, bs - ctx->len); + + crypto_xor(ctx->dg + ctx->len, p, l); + ctx->len +=l; + len -= l; + p += l; + + if (ctx->len == bs) { + crypto_cipher_encrypt_one(tfm, ctx->dg, ctx->dg); + ctx->len = 0; + } + } + + return 0; +} + +static int crypto_cbcmac_digest_final(struct shash_desc *pdesc, u8 *out) +{ + struct crypto_shash *parent = pdesc->tfm; + struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent); + struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc); + struct crypto_cipher *tfm = tctx->child; + int bs = crypto_shash_digestsize(parent); + + if (ctx->len) + crypto_cipher_encrypt_one(tfm, out, ctx->dg); + else + memcpy(out, ctx->dg, bs); + + return 0; +} + +static int cbcmac_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_cipher *cipher; + struct crypto_instance *inst = (void *)tfm->__crt_alg; + struct crypto_spawn *spawn = crypto_instance_ctx(inst); + struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm); + + cipher = crypto_spawn_cipher(spawn); + if (IS_ERR(cipher)) + return PTR_ERR(cipher); + + ctx->child = cipher; + + return 0; +}; + +static void cbcmac_exit_tfm(struct crypto_tfm *tfm) +{ + struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm); + crypto_free_cipher(ctx->child); +} + +static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct shash_instance *inst; + struct crypto_alg *alg; + int err; + + err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH); + if (err) + return err; + + alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, + CRYPTO_ALG_TYPE_MASK); + if (IS_ERR(alg)) + return PTR_ERR(alg); + + inst = shash_alloc_instance("cbcmac", alg); + err = PTR_ERR(inst); + if (IS_ERR(inst)) + goto out_put_alg; + + err = crypto_init_spawn(shash_instance_ctx(inst), alg, + shash_crypto_instance(inst), + CRYPTO_ALG_TYPE_MASK); + if (err) + goto out_free_inst; + + inst->alg.base.cra_priority = alg->cra_priority; + inst->alg.base.cra_blocksize = 1; + + inst->alg.digestsize = alg->cra_blocksize; + inst->alg.descsize = sizeof(struct cbcmac_desc_ctx) + + alg->cra_blocksize; + + inst->alg.base.cra_ctxsize = sizeof(struct cbcmac_tfm_ctx); + inst->alg.base.cra_init = cbcmac_init_tfm; + inst->alg.base.cra_exit = cbcmac_exit_tfm; + + inst->alg.init = crypto_cbcmac_digest_init; + inst->alg.update = crypto_cbcmac_digest_update; + inst->alg.final = crypto_cbcmac_digest_final; + inst->alg.setkey = crypto_cbcmac_digest_setkey; + + err = shash_register_instance(tmpl, inst); + +out_free_inst: + if (err) + shash_free_instance(shash_crypto_instance(inst)); + +out_put_alg: + crypto_mod_put(alg); + return err; +} + +static struct crypto_template crypto_cbcmac_tmpl = { + .name = "cbcmac", + .create = cbcmac_create, + .free = shash_free_instance, + .module = THIS_MODULE, +}; + static int __init crypto_ccm_module_init(void) { int err; - err = crypto_register_template(&crypto_ccm_base_tmpl); + err = crypto_register_template(&crypto_cbcmac_tmpl); if (err) goto out; + err = crypto_register_template(&crypto_ccm_base_tmpl); + if (err) + goto out_undo_cbcmac; + err = crypto_register_template(&crypto_ccm_tmpl); if (err) goto out_undo_base; @@ -922,6 +1026,8 @@ out_undo_ccm: crypto_unregister_template(&crypto_ccm_tmpl); out_undo_base: crypto_unregister_template(&crypto_ccm_base_tmpl); +out_undo_cbcmac: + crypto_register_template(&crypto_cbcmac_tmpl); goto out; } @@ -930,6 +1036,7 @@ static void __exit crypto_ccm_module_exit(void) crypto_unregister_template(&crypto_rfc4309_tmpl); crypto_unregister_template(&crypto_ccm_tmpl); crypto_unregister_template(&crypto_ccm_base_tmpl); + crypto_unregister_template(&crypto_cbcmac_tmpl); } module_init(crypto_ccm_module_init); From 4860620da7e5752d916737472c40be573aec1869 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Feb 2017 14:49:37 +0000 Subject: [PATCH 103/142] crypto: arm64/aes - add NEON/Crypto Extensions CBCMAC/CMAC/XCBC driver On ARMv8 implementations that do not support the Crypto Extensions, such as the Raspberry Pi 3, the CCM driver falls back to the generic table based AES implementation to perform the MAC part of the algorithm, which is slow and not time invariant. So add a CBCMAC implementation to the shared glue code between NEON AES and Crypto Extensions AES, so that it can be used instead now that the CCM driver has been updated to look for CBCMAC implementations other than the one it supplies itself. Also, given how these algorithms mostly only differ in the way the key handling and the final encryption are implemented, expose CMAC and XCBC algorithms as well based on the same core update code. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 240 +++++++++++++++++++++++++++++++++- arch/arm64/crypto/aes-modes.S | 29 +++- 2 files changed, 267 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 055bc3f61138..bcf596b0197e 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -1,7 +1,7 @@ /* * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES * - * Copyright (C) 2013 Linaro Ltd + * Copyright (C) 2013 - 2017 Linaro Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #define aes_ctr_encrypt ce_aes_ctr_encrypt #define aes_xts_encrypt ce_aes_xts_encrypt #define aes_xts_decrypt ce_aes_xts_decrypt +#define aes_mac_update ce_aes_mac_update MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #else #define MODE "neon" @@ -44,11 +46,15 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #define aes_ctr_encrypt neon_aes_ctr_encrypt #define aes_xts_encrypt neon_aes_xts_encrypt #define aes_xts_decrypt neon_aes_xts_decrypt +#define aes_mac_update neon_aes_mac_update MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); MODULE_ALIAS_CRYPTO("ecb(aes)"); MODULE_ALIAS_CRYPTO("cbc(aes)"); MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)"); +MODULE_ALIAS_CRYPTO("cmac(aes)"); +MODULE_ALIAS_CRYPTO("xcbc(aes)"); +MODULE_ALIAS_CRYPTO("cbcmac(aes)"); #endif MODULE_AUTHOR("Ard Biesheuvel "); @@ -75,11 +81,25 @@ asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, int blocks, u8 const rk2[], u8 iv[], int first); +asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds, + int blocks, u8 dg[], int enc_before, + int enc_after); + struct crypto_aes_xts_ctx { struct crypto_aes_ctx key1; struct crypto_aes_ctx __aligned(8) key2; }; +struct mac_tfm_ctx { + struct crypto_aes_ctx key; + u8 __aligned(8) consts[]; +}; + +struct mac_desc_ctx { + unsigned int len; + u8 dg[AES_BLOCK_SIZE]; +}; + static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { @@ -357,6 +377,217 @@ static struct skcipher_alg aes_algs[] = { { .decrypt = xts_decrypt, } }; +static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm); + int err; + + err = aes_expandkey(&ctx->key, in_key, key_len); + if (err) + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + + return err; +} + +static void cmac_gf128_mul_by_x(be128 *y, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + + y->a = cpu_to_be64((a << 1) | (b >> 63)); + y->b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0)); +} + +static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm); + be128 *consts = (be128 *)ctx->consts; + u8 *rk = (u8 *)ctx->key.key_enc; + int rounds = 6 + key_len / 4; + int err; + + err = cbcmac_setkey(tfm, in_key, key_len); + if (err) + return err; + + /* encrypt the zero vector */ + kernel_neon_begin(); + aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1, 1); + kernel_neon_end(); + + cmac_gf128_mul_by_x(consts, consts); + cmac_gf128_mul_by_x(consts + 1, consts); + + return 0; +} + +static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key, + unsigned int key_len) +{ + static u8 const ks[3][AES_BLOCK_SIZE] = { + { [0 ... AES_BLOCK_SIZE - 1] = 0x1 }, + { [0 ... AES_BLOCK_SIZE - 1] = 0x2 }, + { [0 ... AES_BLOCK_SIZE - 1] = 0x3 }, + }; + + struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm); + u8 *rk = (u8 *)ctx->key.key_enc; + int rounds = 6 + key_len / 4; + u8 key[AES_BLOCK_SIZE]; + int err; + + err = cbcmac_setkey(tfm, in_key, key_len); + if (err) + return err; + + kernel_neon_begin(); + aes_ecb_encrypt(key, ks[0], rk, rounds, 1, 1); + aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2, 0); + kernel_neon_end(); + + return cbcmac_setkey(tfm, key, sizeof(key)); +} + +static int mac_init(struct shash_desc *desc) +{ + struct mac_desc_ctx *ctx = shash_desc_ctx(desc); + + memset(ctx->dg, 0, AES_BLOCK_SIZE); + ctx->len = 0; + + return 0; +} + +static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len) +{ + struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct mac_desc_ctx *ctx = shash_desc_ctx(desc); + int rounds = 6 + tctx->key.key_length / 4; + + while (len > 0) { + unsigned int l; + + if ((ctx->len % AES_BLOCK_SIZE) == 0 && + (ctx->len + len) > AES_BLOCK_SIZE) { + + int blocks = len / AES_BLOCK_SIZE; + + len %= AES_BLOCK_SIZE; + + kernel_neon_begin(); + aes_mac_update(p, tctx->key.key_enc, rounds, blocks, + ctx->dg, (ctx->len != 0), (len != 0)); + kernel_neon_end(); + + p += blocks * AES_BLOCK_SIZE; + + if (!len) { + ctx->len = AES_BLOCK_SIZE; + break; + } + ctx->len = 0; + } + + l = min(len, AES_BLOCK_SIZE - ctx->len); + + if (l <= AES_BLOCK_SIZE) { + crypto_xor(ctx->dg + ctx->len, p, l); + ctx->len += l; + len -= l; + p += l; + } + } + + return 0; +} + +static int cbcmac_final(struct shash_desc *desc, u8 *out) +{ + struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct mac_desc_ctx *ctx = shash_desc_ctx(desc); + int rounds = 6 + tctx->key.key_length / 4; + + kernel_neon_begin(); + aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0); + kernel_neon_end(); + + memcpy(out, ctx->dg, AES_BLOCK_SIZE); + + return 0; +} + +static int cmac_final(struct shash_desc *desc, u8 *out) +{ + struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct mac_desc_ctx *ctx = shash_desc_ctx(desc); + int rounds = 6 + tctx->key.key_length / 4; + u8 *consts = tctx->consts; + + if (ctx->len != AES_BLOCK_SIZE) { + ctx->dg[ctx->len] ^= 0x80; + consts += AES_BLOCK_SIZE; + } + + kernel_neon_begin(); + aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1); + kernel_neon_end(); + + memcpy(out, ctx->dg, AES_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg mac_algs[] = { { + .base.cra_name = "cmac(aes)", + .base.cra_driver_name = "cmac-aes-" MODE, + .base.cra_priority = PRIO, + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct mac_tfm_ctx) + + 2 * AES_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + + .digestsize = AES_BLOCK_SIZE, + .init = mac_init, + .update = mac_update, + .final = cmac_final, + .setkey = cmac_setkey, + .descsize = sizeof(struct mac_desc_ctx), +}, { + .base.cra_name = "xcbc(aes)", + .base.cra_driver_name = "xcbc-aes-" MODE, + .base.cra_priority = PRIO, + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct mac_tfm_ctx) + + 2 * AES_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + + .digestsize = AES_BLOCK_SIZE, + .init = mac_init, + .update = mac_update, + .final = cmac_final, + .setkey = xcbc_setkey, + .descsize = sizeof(struct mac_desc_ctx), +}, { + .base.cra_name = "cbcmac(aes)", + .base.cra_driver_name = "cbcmac-aes-" MODE, + .base.cra_priority = PRIO, + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct mac_tfm_ctx), + .base.cra_module = THIS_MODULE, + + .digestsize = AES_BLOCK_SIZE, + .init = mac_init, + .update = mac_update, + .final = cbcmac_final, + .setkey = cbcmac_setkey, + .descsize = sizeof(struct mac_desc_ctx), +} }; + static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; static void aes_exit(void) @@ -367,6 +598,7 @@ static void aes_exit(void) if (aes_simd_algs[i]) simd_skcipher_free(aes_simd_algs[i]); + crypto_unregister_shashes(mac_algs, ARRAY_SIZE(mac_algs)); crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } @@ -383,6 +615,10 @@ static int __init aes_init(void) if (err) return err; + err = crypto_register_shashes(mac_algs, ARRAY_SIZE(mac_algs)); + if (err) + goto unregister_ciphers; + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) continue; @@ -402,6 +638,8 @@ static int __init aes_init(void) unregister_simds: aes_exit(); +unregister_ciphers: + crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); return err; } diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 92b982a8b112..2674d43d1384 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -1,7 +1,7 @@ /* * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES * - * Copyright (C) 2013 Linaro Ltd + * Copyright (C) 2013 - 2017 Linaro Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -525,3 +525,30 @@ AES_ENTRY(aes_xts_decrypt) FRAME_POP ret AES_ENDPROC(aes_xts_decrypt) + + /* + * aes_mac_update(u8 const in[], u32 const rk[], int rounds, + * int blocks, u8 dg[], int enc_before, int enc_after) + */ +AES_ENTRY(aes_mac_update) + ld1 {v0.16b}, [x4] /* get dg */ + enc_prepare w2, x1, x7 + cbnz w5, .Lmacenc + +.Lmacloop: + cbz w3, .Lmacout + ld1 {v1.16b}, [x0], #16 /* get next pt block */ + eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ + + subs w3, w3, #1 + csinv x5, x6, xzr, eq + cbz w5, .Lmacout + +.Lmacenc: + encrypt_block v0, w2, x1, x7, w8 + b .Lmacloop + +.Lmacout: + st1 {v0.16b}, [x4] /* return dg */ + ret +AES_ENDPROC(aes_mac_update) From 7d6e9105026788c497f0ab32fa16c82f4ab5ff61 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 3 Feb 2017 23:33:23 +0100 Subject: [PATCH 104/142] crypto: improve gcc optimization flags for serpent and wp512 An ancient gcc bug (first reported in 2003) has apparently resurfaced on MIPS, where kernelci.org reports an overly large stack frame in the whirlpool hash algorithm: crypto/wp512.c:987:1: warning: the frame size of 1112 bytes is larger than 1024 bytes [-Wframe-larger-than=] With some testing in different configurations, I'm seeing large variations in stack frames size up to 1500 bytes for what should have around 300 bytes at most. I also checked the reference implementation, which is essentially the same code but also comes with some test and benchmarking infrastructure. It seems that recent compiler versions on at least arm, arm64 and powerpc have a partial fix for this problem, but enabling "-fsched-pressure", but even with that fix they suffer from the issue to a certain degree. Some testing on arm64 shows that the time needed to hash a given amount of data is roughly proportional to the stack frame size here, which makes sense given that the wp512 implementation is doing lots of loads for table lookups, and the problem with the overly large stack is a result of doing a lot more loads and stores for spilled registers (as seen from inspecting the object code). Disabling -fschedule-insns consistently fixes the problem for wp512, in my collection of cross-compilers, the results are consistently better or identical when comparing the stack sizes in this function, though some architectures (notable x86) have schedule-insns disabled by default. The four columns are: default: -O2 press: -O2 -fsched-pressure nopress: -O2 -fschedule-insns -fno-sched-pressure nosched: -O2 -no-schedule-insns (disables sched-pressure) default press nopress nosched alpha-linux-gcc-4.9.3 1136 848 1136 176 am33_2.0-linux-gcc-4.9.3 2100 2076 2100 2104 arm-linux-gnueabi-gcc-4.9.3 848 848 1048 352 cris-linux-gcc-4.9.3 272 272 272 272 frv-linux-gcc-4.9.3 1128 1000 1128 280 hppa64-linux-gcc-4.9.3 1128 336 1128 184 hppa-linux-gcc-4.9.3 644 308 644 276 i386-linux-gcc-4.9.3 352 352 352 352 m32r-linux-gcc-4.9.3 720 656 720 268 microblaze-linux-gcc-4.9.3 1108 604 1108 256 mips64-linux-gcc-4.9.3 1328 592 1328 208 mips-linux-gcc-4.9.3 1096 624 1096 240 powerpc64-linux-gcc-4.9.3 1088 432 1088 160 powerpc-linux-gcc-4.9.3 1080 584 1080 224 s390-linux-gcc-4.9.3 456 456 624 360 sh3-linux-gcc-4.9.3 292 292 292 292 sparc64-linux-gcc-4.9.3 992 240 992 208 sparc-linux-gcc-4.9.3 680 592 680 312 x86_64-linux-gcc-4.9.3 224 240 272 224 xtensa-linux-gcc-4.9.3 1152 704 1152 304 aarch64-linux-gcc-7.0.0 224 224 1104 208 arm-linux-gnueabi-gcc-7.0.1 824 824 1048 352 mips-linux-gcc-7.0.0 1120 648 1120 272 x86_64-linux-gcc-7.0.1 240 240 304 240 arm-linux-gnueabi-gcc-4.4.7 840 392 arm-linux-gnueabi-gcc-4.5.4 784 728 784 320 arm-linux-gnueabi-gcc-4.6.4 736 728 736 304 arm-linux-gnueabi-gcc-4.7.4 944 784 944 352 arm-linux-gnueabi-gcc-4.8.5 464 464 760 352 arm-linux-gnueabi-gcc-4.9.3 848 848 1048 352 arm-linux-gnueabi-gcc-5.3.1 824 824 1064 336 arm-linux-gnueabi-gcc-6.1.1 808 808 1056 344 arm-linux-gnueabi-gcc-7.0.1 824 824 1048 352 Trying the same test for serpent-generic, the picture is a bit different, and while -fno-schedule-insns is generally better here than the default, -fsched-pressure wins overall, so I picked that instead. default press nopress nosched alpha-linux-gcc-4.9.3 1392 864 1392 960 am33_2.0-linux-gcc-4.9.3 536 524 536 528 arm-linux-gnueabi-gcc-4.9.3 552 552 776 536 cris-linux-gcc-4.9.3 528 528 528 528 frv-linux-gcc-4.9.3 536 400 536 504 hppa64-linux-gcc-4.9.3 524 208 524 480 hppa-linux-gcc-4.9.3 768 472 768 508 i386-linux-gcc-4.9.3 564 564 564 564 m32r-linux-gcc-4.9.3 712 576 712 532 microblaze-linux-gcc-4.9.3 724 392 724 512 mips64-linux-gcc-4.9.3 720 384 720 496 mips-linux-gcc-4.9.3 728 384 728 496 powerpc64-linux-gcc-4.9.3 704 304 704 480 powerpc-linux-gcc-4.9.3 704 296 704 480 s390-linux-gcc-4.9.3 560 560 592 536 sh3-linux-gcc-4.9.3 540 540 540 540 sparc64-linux-gcc-4.9.3 544 352 544 496 sparc-linux-gcc-4.9.3 544 344 544 496 x86_64-linux-gcc-4.9.3 528 536 576 528 xtensa-linux-gcc-4.9.3 752 544 752 544 aarch64-linux-gcc-7.0.0 432 432 656 480 arm-linux-gnueabi-gcc-7.0.1 616 616 808 536 mips-linux-gcc-7.0.0 720 464 720 488 x86_64-linux-gcc-7.0.1 536 528 600 536 arm-linux-gnueabi-gcc-4.4.7 592 440 arm-linux-gnueabi-gcc-4.5.4 776 448 776 544 arm-linux-gnueabi-gcc-4.6.4 776 448 776 544 arm-linux-gnueabi-gcc-4.7.4 768 448 768 544 arm-linux-gnueabi-gcc-4.8.5 488 488 776 544 arm-linux-gnueabi-gcc-4.9.3 552 552 776 536 arm-linux-gnueabi-gcc-5.3.1 552 552 776 536 arm-linux-gnueabi-gcc-6.1.1 560 560 776 536 arm-linux-gnueabi-gcc-7.0.1 616 616 808 536 I did not do any runtime tests with serpent, so it is possible that stack frame size does not directly correlate with runtime performance here and it actually makes things worse, but it's more likely to help here, and the reduced stack frame size is probably enough reason to apply the patch, especially given that the crypto code is often used in deep call chains. Link: https://kernelci.org/build/id/58797d7559b5149efdf6c3a9/logs/ Link: http://www.larc.usp.br/~pbarreto/WhirlpoolPage.html Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11488 Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 Cc: Ralf Baechle Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- crypto/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/Makefile b/crypto/Makefile index bcd834536163..8a44057240d5 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -75,6 +75,7 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o +CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o obj-$(CONFIG_CRYPTO_ECB) += ecb.o @@ -98,6 +99,7 @@ obj-$(CONFIG_CRYPTO_BLOWFISH_COMMON) += blowfish_common.o obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o +CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o From db91af0fbe20474cec33263e28d15f5e6b45ebc9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 5 Feb 2017 10:06:12 +0000 Subject: [PATCH 105/142] crypto: algapi - make crypto_xor() and crypto_inc() alignment agnostic Instead of unconditionally forcing 4 byte alignment for all generic chaining modes that rely on crypto_xor() or crypto_inc() (which may result in unnecessary copying of data when the underlying hardware can perform unaligned accesses efficiently), make those functions deal with unaligned input explicitly, but only if the Kconfig symbol HAVE_EFFICIENT_UNALIGNED_ACCESS is set. This will allow us to drop the alignmasks from the CBC, CMAC, CTR, CTS, PCBC and SEQIV drivers. For crypto_inc(), this simply involves making the 4-byte stride conditional on HAVE_EFFICIENT_UNALIGNED_ACCESS being set, given that it typically operates on 16 byte buffers. For crypto_xor(), an algorithm is implemented that simply runs through the input using the largest strides possible if unaligned accesses are allowed. If they are not, an optimal sequence of memory accesses is emitted that takes the relative alignment of the input buffers into account, e.g., if the relative misalignment of dst and src is 4 bytes, the entire xor operation will be completed using 4 byte loads and stores (modulo unaligned bits at the start and end). Note that all expressions involving misalign are simply eliminated by the compiler when HAVE_EFFICIENT_UNALIGNED_ACCESS is defined. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/algapi.c | 74 +++++++++++++++++++++++++++++------------ crypto/cbc.c | 3 -- crypto/cmac.c | 3 +- crypto/ctr.c | 2 +- crypto/cts.c | 3 -- crypto/pcbc.c | 3 -- crypto/seqiv.c | 2 -- include/crypto/algapi.h | 20 +++++++++-- 8 files changed, 73 insertions(+), 37 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 1fad2a6b3bbb..6b52e8f0b95f 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -962,34 +962,66 @@ void crypto_inc(u8 *a, unsigned int size) __be32 *b = (__be32 *)(a + size); u32 c; - for (; size >= 4; size -= 4) { - c = be32_to_cpu(*--b) + 1; - *b = cpu_to_be32(c); - if (c) - return; - } + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !((unsigned long)b & (__alignof__(*b) - 1))) + for (; size >= 4; size -= 4) { + c = be32_to_cpu(*--b) + 1; + *b = cpu_to_be32(c); + if (c) + return; + } crypto_inc_byte(a, size); } EXPORT_SYMBOL_GPL(crypto_inc); -static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size) +void __crypto_xor(u8 *dst, const u8 *src, unsigned int len) { - for (; size; size--) - *a++ ^= *b++; + int relalign = 0; + + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + int size = sizeof(unsigned long); + int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1); + + relalign = d ? 1 << __ffs(d) : size; + + /* + * If we care about alignment, process as many bytes as + * needed to advance dst and src to values whose alignments + * equal their relative alignment. This will allow us to + * process the remainder of the input using optimal strides. + */ + while (((unsigned long)dst & (relalign - 1)) && len > 0) { + *dst++ ^= *src++; + len--; + } + } + + while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) { + *(u64 *)dst ^= *(u64 *)src; + dst += 8; + src += 8; + len -= 8; + } + + while (len >= 4 && !(relalign & 3)) { + *(u32 *)dst ^= *(u32 *)src; + dst += 4; + src += 4; + len -= 4; + } + + while (len >= 2 && !(relalign & 1)) { + *(u16 *)dst ^= *(u16 *)src; + dst += 2; + src += 2; + len -= 2; + } + + while (len--) + *dst++ ^= *src++; } - -void crypto_xor(u8 *dst, const u8 *src, unsigned int size) -{ - u32 *a = (u32 *)dst; - u32 *b = (u32 *)src; - - for (; size >= 4; size -= 4) - *a++ ^= *b++; - - crypto_xor_byte((u8 *)a, (u8 *)b, size); -} -EXPORT_SYMBOL_GPL(crypto_xor); +EXPORT_SYMBOL_GPL(__crypto_xor); unsigned int crypto_alg_extsize(struct crypto_alg *alg) { diff --git a/crypto/cbc.c b/crypto/cbc.c index 68f751a41a84..bc160a3186dc 100644 --- a/crypto/cbc.c +++ b/crypto/cbc.c @@ -145,9 +145,6 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.base.cra_blocksize = alg->cra_blocksize; inst->alg.base.cra_alignmask = alg->cra_alignmask; - /* We access the data as u32s when xoring. */ - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.ivsize = alg->cra_blocksize; inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize; inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize; diff --git a/crypto/cmac.c b/crypto/cmac.c index 04080dca8f0c..16301f52858c 100644 --- a/crypto/cmac.c +++ b/crypto/cmac.c @@ -260,8 +260,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) if (err) goto out_free_inst; - /* We access the data as u32s when xoring. */ - alignmask = alg->cra_alignmask | (__alignof__(u32) - 1); + alignmask = alg->cra_alignmask; inst->alg.base.cra_alignmask = alignmask; inst->alg.base.cra_priority = alg->cra_priority; inst->alg.base.cra_blocksize = alg->cra_blocksize; diff --git a/crypto/ctr.c b/crypto/ctr.c index a9a7a44f2783..a4f4a8983169 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -209,7 +209,7 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb) inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; inst->alg.cra_priority = alg->cra_priority; inst->alg.cra_blocksize = 1; - inst->alg.cra_alignmask = alg->cra_alignmask | (__alignof__(u32) - 1); + inst->alg.cra_alignmask = alg->cra_alignmask; inst->alg.cra_type = &crypto_blkcipher_type; inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; diff --git a/crypto/cts.c b/crypto/cts.c index a1335d6c35fb..243f591dc409 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -374,9 +374,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.base.cra_blocksize = alg->base.cra_blocksize; inst->alg.base.cra_alignmask = alg->base.cra_alignmask; - /* We access the data as u32s when xoring. */ - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.ivsize = alg->base.cra_blocksize; inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg); inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg); diff --git a/crypto/pcbc.c b/crypto/pcbc.c index 11d248673ad4..29dd2b4a3b85 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -260,9 +260,6 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.base.cra_blocksize = alg->cra_blocksize; inst->alg.base.cra_alignmask = alg->cra_alignmask; - /* We access the data as u32s when xoring. */ - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.ivsize = alg->cra_blocksize; inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize; inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize; diff --git a/crypto/seqiv.c b/crypto/seqiv.c index c7049231861f..570b7d1aa0ca 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -153,8 +153,6 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb) if (IS_ERR(inst)) return PTR_ERR(inst); - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - spawn = aead_instance_ctx(inst); alg = crypto_spawn_aead_alg(spawn); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 404e9558e879..ebe4ded0c55d 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -191,9 +191,25 @@ static inline unsigned int crypto_queue_len(struct crypto_queue *queue) return queue->qlen; } -/* These functions require the input/output to be aligned as u32. */ void crypto_inc(u8 *a, unsigned int size); -void crypto_xor(u8 *dst, const u8 *src, unsigned int size); +void __crypto_xor(u8 *dst, const u8 *src, unsigned int size); + +static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size) +{ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + __builtin_constant_p(size) && + (size % sizeof(unsigned long)) == 0) { + unsigned long *d = (unsigned long *)dst; + unsigned long *s = (unsigned long *)src; + + while (size > 0) { + *d++ ^= *s++; + size -= sizeof(unsigned long); + } + } else { + __crypto_xor(dst, src, size); + } +} int blkcipher_walk_done(struct blkcipher_desc *desc, struct blkcipher_walk *walk, int err); From ceb4afb3086ab08f0d1d9cb3da536a18ea2e5918 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Feb 2017 13:32:15 +0100 Subject: [PATCH 106/142] crypto: atmel - refine Kconfig dependencies With the new authenc support, we get a harmless Kconfig warning: warning: (CRYPTO_DEV_ATMEL_AUTHENC) selects CRYPTO_DEV_ATMEL_SHA which has unmet direct dependencies (CRYPTO && CRYPTO_HW && ARCH_AT91) The problem is that each of the options has slightly different dependencies, although they all seem to want the same thing: allow building for real AT91 targets that actually have the hardware, and possibly for compile testing. This makes all four options consistent: instead of depending on a particular dmaengine implementation, we depend on the ARM platform, CONFIG_COMPILE_TEST as an alternative when that is turned off. This makes the 'select' statements work correctly. Fixes: 89a82ef87e01 ("crypto: atmel-authenc - add support to authenc(hmac(shaX), Y(aes)) modes") Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 74824612d3e9..f60de152a90d 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -417,7 +417,8 @@ config CRYPTO_DEV_BFIN_CRC config CRYPTO_DEV_ATMEL_AUTHENC tristate "Support for Atmel IPSEC/SSL hw accelerator" - depends on (ARCH_AT91 && HAS_DMA) || COMPILE_TEST + depends on HAS_DMA + depends on ARCH_AT91 || COMPILE_TEST select CRYPTO_AUTHENC select CRYPTO_DEV_ATMEL_AES select CRYPTO_DEV_ATMEL_SHA @@ -430,7 +431,7 @@ config CRYPTO_DEV_ATMEL_AUTHENC config CRYPTO_DEV_ATMEL_AES tristate "Support for Atmel AES hw accelerator" depends on HAS_DMA - depends on AT_XDMAC || AT_HDMAC || COMPILE_TEST + depends on ARCH_AT91 || COMPILE_TEST select CRYPTO_AES select CRYPTO_AEAD select CRYPTO_BLKCIPHER @@ -444,7 +445,7 @@ config CRYPTO_DEV_ATMEL_AES config CRYPTO_DEV_ATMEL_TDES tristate "Support for Atmel DES/TDES hw accelerator" - depends on ARCH_AT91 + depends on ARCH_AT91 || COMPILE_TEST select CRYPTO_DES select CRYPTO_BLKCIPHER help @@ -457,7 +458,7 @@ config CRYPTO_DEV_ATMEL_TDES config CRYPTO_DEV_ATMEL_SHA tristate "Support for Atmel SHA hw accelerator" - depends on ARCH_AT91 + depends on ARCH_AT91 || COMPILE_TEST select CRYPTO_HASH help Some Atmel processors have SHA1/SHA224/SHA256/SHA384/SHA512 From 4c147bcff52b582cd278f39261302a4659da3fef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Feb 2017 13:32:16 +0100 Subject: [PATCH 107/142] crypto: atmel - fix 64-bit build warnings When we enable COMPILE_TEST building for the Atmel sha and tdes implementations, we run into a couple of warnings about incorrect format strings, e.g. In file included from include/linux/platform_device.h:14:0, from drivers/crypto/atmel-sha.c:24: drivers/crypto/atmel-sha.c: In function 'atmel_sha_xmit_cpu': drivers/crypto/atmel-sha.c:571:19: error: format '%d' expects argument of type 'int', but argument 6 has type 'size_t {aka long unsigned int}' [-Werror=format=] In file included from include/linux/printk.h:6:0, from include/linux/kernel.h:13, from drivers/crypto/atmel-tdes.c:17: drivers/crypto/atmel-tdes.c: In function 'atmel_tdes_crypt_dma_stop': include/linux/kern_levels.h:4:18: error: format '%u' expects argument of type 'unsigned int', but argument 2 has type 'size_t {aka long unsigned int}' [-Werror=format=] These are all fixed by using the "%z" modifier for size_t data. There are also a few uses of min()/max() with incompatible types: drivers/crypto/atmel-tdes.c: In function 'atmel_tdes_crypt_start': drivers/crypto/atmel-tdes.c:528:181: error: comparison of distinct pointer types lacks a cast [-Werror] Where possible, we should use consistent types here, otherwise we can use min_t()/max_t() to get well-defined behavior without a warning. Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha.c | 16 ++++++++-------- drivers/crypto/atmel-tdes.c | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 22d0c0c118da..50a1dcd50c46 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -568,7 +568,7 @@ static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf, int count, len32; const u32 *buffer = (const u32 *)buf; - dev_dbg(dd->dev, "xmit_cpu: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n", + dev_dbg(dd->dev, "xmit_cpu: digcnt: 0x%llx 0x%llx, length: %zd, final: %d\n", ctx->digcnt[1], ctx->digcnt[0], length, final); atmel_sha_write_ctrl(dd, 0); @@ -597,7 +597,7 @@ static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); int len32; - dev_dbg(dd->dev, "xmit_pdc: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n", + dev_dbg(dd->dev, "xmit_pdc: digcnt: 0x%llx 0x%llx, length: %zd, final: %d\n", ctx->digcnt[1], ctx->digcnt[0], length1, final); len32 = DIV_ROUND_UP(length1, sizeof(u32)); @@ -644,7 +644,7 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, struct dma_async_tx_descriptor *in_desc; struct scatterlist sg[2]; - dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n", + dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %zd, final: %d\n", ctx->digcnt[1], ctx->digcnt[0], length1, final); dd->dma_lch_in.dma_conf.src_maxburst = 16; @@ -723,7 +723,7 @@ static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd, ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen + ctx->block_size, DMA_TO_DEVICE); if (dma_mapping_error(dd->dev, ctx->dma_addr)) { - dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen + + dev_err(dd->dev, "dma %zu bytes error\n", ctx->buflen + ctx->block_size); atmel_sha_complete(dd, -EINVAL); } @@ -744,7 +744,7 @@ static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd) final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; - dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: 0x%llx 0x%llx, final: %d\n", + dev_dbg(dd->dev, "slow: bufcnt: %zu, digcnt: 0x%llx 0x%llx, final: %d\n", ctx->bufcnt, ctx->digcnt[1], ctx->digcnt[0], final); if (final) @@ -772,7 +772,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) if (ctx->bufcnt || ctx->offset) return atmel_sha_update_dma_slow(dd); - dev_dbg(dd->dev, "fast: digcnt: 0x%llx 0x%llx, bufcnt: %u, total: %u\n", + dev_dbg(dd->dev, "fast: digcnt: 0x%llx 0x%llx, bufcnt: %zd, total: %u\n", ctx->digcnt[1], ctx->digcnt[0], ctx->bufcnt, ctx->total); sg = ctx->sg; @@ -814,7 +814,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen + ctx->block_size, DMA_TO_DEVICE); if (dma_mapping_error(dd->dev, ctx->dma_addr)) { - dev_err(dd->dev, "dma %u bytes error\n", + dev_err(dd->dev, "dma %zu bytes error\n", ctx->buflen + ctx->block_size); atmel_sha_complete(dd, -EINVAL); } @@ -994,7 +994,7 @@ static int atmel_sha_finish(struct ahash_request *req) if (ctx->digcnt[0] || ctx->digcnt[1]) atmel_sha_copy_ready_hash(req); - dev_dbg(dd->dev, "digcnt: 0x%llx 0x%llx, bufcnt: %d\n", ctx->digcnt[1], + dev_dbg(dd->dev, "digcnt: 0x%llx 0x%llx, bufcnt: %zd\n", ctx->digcnt[1], ctx->digcnt[0], ctx->bufcnt); return 0; diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index bf467d7be35c..b25f1b3c981f 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -150,7 +150,7 @@ static struct atmel_tdes_drv atmel_tdes = { static int atmel_tdes_sg_copy(struct scatterlist **sg, size_t *offset, void *buf, size_t buflen, size_t total, int out) { - unsigned int count, off = 0; + size_t count, off = 0; while (buflen && total) { count = min((*sg)->length - *offset, total); @@ -336,7 +336,7 @@ static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd) dd->buf_out, dd->buflen, dd->dma_size, 1); if (count != dd->dma_size) { err = -EINVAL; - pr_err("not all data converted: %u\n", count); + pr_err("not all data converted: %zu\n", count); } } @@ -361,7 +361,7 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd) dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in, dd->buflen, DMA_TO_DEVICE); if (dma_mapping_error(dd->dev, dd->dma_addr_in)) { - dev_err(dd->dev, "dma %d bytes error\n", dd->buflen); + dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen); err = -EINVAL; goto err_map_in; } @@ -369,7 +369,7 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd) dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out, dd->buflen, DMA_FROM_DEVICE); if (dma_mapping_error(dd->dev, dd->dma_addr_out)) { - dev_err(dd->dev, "dma %d bytes error\n", dd->buflen); + dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen); err = -EINVAL; goto err_map_out; } @@ -525,8 +525,8 @@ static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd) if (fast) { - count = min(dd->total, sg_dma_len(dd->in_sg)); - count = min(count, sg_dma_len(dd->out_sg)); + count = min_t(size_t, dd->total, sg_dma_len(dd->in_sg)); + count = min_t(size_t, count, sg_dma_len(dd->out_sg)); err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); if (!err) { @@ -661,7 +661,7 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd) dd->buf_out, dd->buflen, dd->dma_size, 1); if (count != dd->dma_size) { err = -EINVAL; - pr_err("not all data converted: %u\n", count); + pr_err("not all data converted: %zu\n", count); } } } From 87f3d0887c9cc63d7b8e7b0b7e5a48dca8cc8564 Mon Sep 17 00:00:00 2001 From: David Daney Date: Mon, 6 Feb 2017 14:28:46 -0800 Subject: [PATCH 108/142] hwrng: cavium - Use per device name to allow for multiple devices. Systems containing the Cavium HW RNG may have one device per NUMA node. A typical configuration is a 2-node NUMA system, which results in 2 RNG devices. The hwrng subsystem refuses (and rightly so) to register more than one device with he same name, so we get failure messages on these systems. Make the hwrng name unique by including the underlying device name. Also remove spaces from the name to make it possible to switch devices via the sysfs knobs. Signed-off-by: David Daney Signed-off-by: Herbert Xu --- drivers/char/hw_random/cavium-rng-vf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c index 066ae0e78d63..dd1007aecb10 100644 --- a/drivers/char/hw_random/cavium-rng-vf.c +++ b/drivers/char/hw_random/cavium-rng-vf.c @@ -57,7 +57,11 @@ static int cavium_rng_probe_vf(struct pci_dev *pdev, return -ENOMEM; } - rng->ops.name = "cavium rng"; + rng->ops.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "cavium-rng-%s", dev_name(&pdev->dev)); + if (!rng->ops.name) + return -ENOMEM; + rng->ops.read = cavium_rng_read; rng->ops.quality = 1000; From 9e2c7d99941d000a36f68a3594cec27a1bbea274 Mon Sep 17 00:00:00 2001 From: George Cherian Date: Tue, 7 Feb 2017 14:51:13 +0000 Subject: [PATCH 109/142] crypto: cavium - Add Support for Octeon-tx CPT Engine Enable the Physical Function driver for the Cavium Crypto Engine (CPT) found in Octeon-tx series of SoC's. CPT is the Cryptographic Accelaration Unit. CPT includes microcoded GigaCypher symmetric engines (SEs) and asymmetric engines (AEs). Signed-off-by: George Cherian Reviewed-by: David Daney Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/Kconfig | 16 + drivers/crypto/cavium/cpt/Makefile | 2 + drivers/crypto/cavium/cpt/cpt_common.h | 158 +++++ drivers/crypto/cavium/cpt/cpt_hw_types.h | 658 +++++++++++++++++++++ drivers/crypto/cavium/cpt/cptpf.h | 69 +++ drivers/crypto/cavium/cpt/cptpf_main.c | 708 +++++++++++++++++++++++ drivers/crypto/cavium/cpt/cptpf_mbox.c | 163 ++++++ 7 files changed, 1774 insertions(+) create mode 100644 drivers/crypto/cavium/cpt/Kconfig create mode 100644 drivers/crypto/cavium/cpt/Makefile create mode 100644 drivers/crypto/cavium/cpt/cpt_common.h create mode 100644 drivers/crypto/cavium/cpt/cpt_hw_types.h create mode 100644 drivers/crypto/cavium/cpt/cptpf.h create mode 100644 drivers/crypto/cavium/cpt/cptpf_main.c create mode 100644 drivers/crypto/cavium/cpt/cptpf_mbox.c diff --git a/drivers/crypto/cavium/cpt/Kconfig b/drivers/crypto/cavium/cpt/Kconfig new file mode 100644 index 000000000000..247f1cbbefc1 --- /dev/null +++ b/drivers/crypto/cavium/cpt/Kconfig @@ -0,0 +1,16 @@ +# +# Cavium crypto device configuration +# + +config CRYPTO_DEV_CPT + tristate + +config CAVIUM_CPT + tristate "Cavium Cryptographic Accelerator driver" + depends on ARCH_THUNDER + select CRYPTO_DEV_CPT + help + Support for Cavium CPT block found in octeon-tx series of + processors. + + To compile this as a module, choose M here. diff --git a/drivers/crypto/cavium/cpt/Makefile b/drivers/crypto/cavium/cpt/Makefile new file mode 100644 index 000000000000..fe3d454a34a7 --- /dev/null +++ b/drivers/crypto/cavium/cpt/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CAVIUM_CPT) += cptpf.o +cptpf-objs := cptpf_main.o cptpf_mbox.o diff --git a/drivers/crypto/cavium/cpt/cpt_common.h b/drivers/crypto/cavium/cpt/cpt_common.h new file mode 100644 index 000000000000..ede612f306d3 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cpt_common.h @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef __CPT_COMMON_H +#define __CPT_COMMON_H + +#include +#include +#include + +#include "cpt_hw_types.h" + +/* Device ID */ +#define CPT_81XX_PCI_PF_DEVICE_ID 0xa040 +#define CPT_81XX_PCI_VF_DEVICE_ID 0xa041 + +/* flags to indicate the features supported */ +#define CPT_FLAG_MSIX_ENABLED BIT(0) +#define CPT_FLAG_SRIOV_ENABLED BIT(1) +#define CPT_FLAG_VF_DRIVER BIT(2) +#define CPT_FLAG_DEVICE_READY BIT(3) + +#define cpt_msix_enabled(cpt) ((cpt)->flags & CPT_FLAG_MSIX_ENABLED) +#define cpt_sriov_enabled(cpt) ((cpt)->flags & CPT_FLAG_SRIOV_ENABLED) +#define cpt_vf_driver(cpt) ((cpt)->flags & CPT_FLAG_VF_DRIVER) +#define cpt_device_ready(cpt) ((cpt)->flags & CPT_FLAG_DEVICE_READY) + +#define CPT_MBOX_MSG_TYPE_ACK 1 +#define CPT_MBOX_MSG_TYPE_NACK 2 +#define CPT_MBOX_MSG_TIMEOUT 2000 +#define VF_STATE_DOWN 0 +#define VF_STATE_UP 1 + +/* + * CPT Registers map for 81xx + */ + +/* PF registers */ +#define CPTX_PF_CONSTANTS(a) (0x0ll + ((u64)(a) << 36)) +#define CPTX_PF_RESET(a) (0x100ll + ((u64)(a) << 36)) +#define CPTX_PF_DIAG(a) (0x120ll + ((u64)(a) << 36)) +#define CPTX_PF_BIST_STATUS(a) (0x160ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_CTL(a) (0x200ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_FLIP(a) (0x210ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_INT(a) (0x220ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_INT_W1S(a) (0x230ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_ENA_W1S(a) (0x240ll + ((u64)(a) << 36)) +#define CPTX_PF_ECC0_ENA_W1C(a) (0x250ll + ((u64)(a) << 36)) +#define CPTX_PF_MBOX_INTX(a, b) \ + (0x400ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_MBOX_INT_W1SX(a, b) \ + (0x420ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_MBOX_ENA_W1CX(a, b) \ + (0x440ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_MBOX_ENA_W1SX(a, b) \ + (0x460ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_EXEC_INT(a) (0x500ll + 0x1000000000ll * ((a) & 0x1)) +#define CPTX_PF_EXEC_INT_W1S(a) (0x520ll + ((u64)(a) << 36)) +#define CPTX_PF_EXEC_ENA_W1C(a) (0x540ll + ((u64)(a) << 36)) +#define CPTX_PF_EXEC_ENA_W1S(a) (0x560ll + ((u64)(a) << 36)) +#define CPTX_PF_GX_EN(a, b) \ + (0x600ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_EXEC_INFO(a) (0x700ll + ((u64)(a) << 36)) +#define CPTX_PF_EXEC_BUSY(a) (0x800ll + ((u64)(a) << 36)) +#define CPTX_PF_EXEC_INFO0(a) (0x900ll + ((u64)(a) << 36)) +#define CPTX_PF_EXEC_INFO1(a) (0x910ll + ((u64)(a) << 36)) +#define CPTX_PF_INST_REQ_PC(a) (0x10000ll + ((u64)(a) << 36)) +#define CPTX_PF_INST_LATENCY_PC(a) \ + (0x10020ll + ((u64)(a) << 36)) +#define CPTX_PF_RD_REQ_PC(a) (0x10040ll + ((u64)(a) << 36)) +#define CPTX_PF_RD_LATENCY_PC(a) (0x10060ll + ((u64)(a) << 36)) +#define CPTX_PF_RD_UC_PC(a) (0x10080ll + ((u64)(a) << 36)) +#define CPTX_PF_ACTIVE_CYCLES_PC(a) (0x10100ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_CTL(a) (0x4000000ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_STATUS(a) (0x4000008ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_CLK(a) (0x4000010ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_DBG_CTL(a) (0x4000018ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_DBG_DATA(a) (0x4000020ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_BIST_STATUS(a) (0x4000028ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_REQ_TIMER(a) (0x4000030ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_MEM_CTL(a) (0x4000038ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_PERF_CTL(a) (0x4001000ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_DBG_CNTX(a, b) \ + (0x4001100ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_EXE_PERF_EVENT_CNT(a) (0x4001180ll + ((u64)(a) << 36)) +#define CPTX_PF_EXE_EPCI_INBX_CNT(a, b) \ + (0x4001200ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_EXE_EPCI_OUTBX_CNT(a, b) \ + (0x4001240ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_ENGX_UCODE_BASE(a, b) \ + (0x4002000ll + ((u64)(a) << 36) + ((b) << 3)) +#define CPTX_PF_QX_CTL(a, b) \ + (0x8000000ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_PF_QX_GMCTL(a, b) \ + (0x8000020ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_PF_QX_CTL2(a, b) \ + (0x8000100ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_PF_VFX_MBOXX(a, b, c) \ + (0x8001000ll + ((u64)(a) << 36) + ((b) << 20) + ((c) << 8)) + +/* VF registers */ +#define CPTX_VQX_CTL(a, b) (0x100ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_SADDR(a, b) (0x200ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_WAIT(a, b) (0x400ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_INPROG(a, b) (0x410ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE(a, b) (0x420ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_ACK(a, b) (0x440ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_INT_W1S(a, b) (0x460ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_INT_W1C(a, b) (0x468ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_ENA_W1S(a, b) (0x470ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DONE_ENA_W1C(a, b) (0x478ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_MISC_INT(a, b) (0x500ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_MISC_INT_W1S(a, b) (0x508ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_MISC_ENA_W1S(a, b) (0x510ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_MISC_ENA_W1C(a, b) (0x518ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VQX_DOORBELL(a, b) (0x600ll + ((u64)(a) << 36) + ((b) << 20)) +#define CPTX_VFX_PF_MBOXX(a, b, c) \ + (0x1000ll + ((u64)(a) << 36) + ((b) << 20) + ((c) << 3)) + +enum vftype { + AE_TYPES = 1, + SE_TYPES = 2, + BAD_CPT_TYPES, +}; + +/* Max CPT devices supported */ +enum cpt_mbox_opcode { + CPT_MSG_VF_UP = 1, + CPT_MSG_VF_DOWN, + CPT_MSG_READY, + CPT_MSG_QLEN, + CPT_MSG_QBIND_GRP, + CPT_MSG_VQ_PRIORITY, +}; + +/* CPT mailbox structure */ +struct cpt_mbox { + u64 msg; /* Message type MBOX[0] */ + u64 data;/* Data MBOX[1] */ +}; + +/* Register read/write APIs */ +static inline void cpt_write_csr64(u8 __iomem *hw_addr, u64 offset, + u64 val) +{ + writeq(val, hw_addr + offset); +} + +static inline u64 cpt_read_csr64(u8 __iomem *hw_addr, u64 offset) +{ + return readq(hw_addr + offset); +} +#endif /* __CPT_COMMON_H */ diff --git a/drivers/crypto/cavium/cpt/cpt_hw_types.h b/drivers/crypto/cavium/cpt/cpt_hw_types.h new file mode 100644 index 000000000000..279669494196 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cpt_hw_types.h @@ -0,0 +1,658 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef __CPT_HW_TYPES_H +#define __CPT_HW_TYPES_H + +#include "cpt_common.h" + +/** + * Enumeration cpt_comp_e + * + * CPT Completion Enumeration + * Enumerates the values of CPT_RES_S[COMPCODE]. + */ +enum cpt_comp_e { + CPT_COMP_E_NOTDONE = 0x00, + CPT_COMP_E_GOOD = 0x01, + CPT_COMP_E_FAULT = 0x02, + CPT_COMP_E_SWERR = 0x03, + CPT_COMP_E_LAST_ENTRY = 0xFF +}; + +/** + * Structure cpt_inst_s + * + * CPT Instruction Structure + * This structure specifies the instruction layout. Instructions are + * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_inst_s_s + * Word 0 + * doneint:1 Done interrupt. + * 0 = No interrupts related to this instruction. + * 1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be + * incremented,and based on the rules described there an interrupt may + * occur. + * Word 1 + * res_addr [127: 64] Result IOVA. + * If nonzero, specifies where to write CPT_RES_S. + * If zero, no result structure will be written. + * Address must be 16-byte aligned. + * Bits <63:49> are ignored by hardware; software should use a + * sign-extended bit <48> for forward compatibility. + * Word 2 + * grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when + * CPT submits work SSO. + * For the SSO to not discard the add-work request, FPA_PF_MAP() must map + * [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT + * submits work to SSO + * tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT + * submits work to SSO. + * Word 3 + * wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a + * work-queue entry that CPT submits work to SSO after all context, + * output data, and result write operations are visible to other + * CNXXXX units and the cores. Bits <2:0> must be zero. + * Bits <63:49> are ignored by hardware; software should + * use a sign-extended bit <48> for forward compatibility. + * Internal: + * Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0. + * Word 4 + * ei0; [319:256] Engine instruction word 0. Passed to the AE/SE. + * Word 5 + * ei1; [383:320] Engine instruction word 1. Passed to the AE/SE. + * Word 6 + * ei2; [447:384] Engine instruction word 1. Passed to the AE/SE. + * Word 7 + * ei3; [511:448] Engine instruction word 1. Passed to the AE/SE. + * + */ +union cpt_inst_s { + u64 u[8]; + struct cpt_inst_s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_0_1:16; +#else /* Word 0 - Little Endian */ + u64 reserved_0_15:16; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 res_addr; +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 2 - Big Endian */ + u64 reserved_172_19:20; + u64 grp:10; + u64 tt:2; + u64 tag:32; +#else /* Word 2 - Little Endian */ + u64 tag:32; + u64 tt:2; + u64 grp:10; + u64 reserved_172_191:20; +#endif /* Word 2 - End */ + u64 wq_ptr; + u64 ei0; + u64 ei1; + u64 ei2; + u64 ei3; + } s; +}; + +/** + * Structure cpt_res_s + * + * CPT Result Structure + * The CPT coprocessor writes the result structure after it completes a + * CPT_INST_S instruction. The result structure is exactly 16 bytes, and + * each instruction completion produces exactly one result structure. + * + * This structure is stored in memory as little-endian unless + * CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_res_s_s + * Word 0 + * doneint:1 [16:16] Done interrupt. This bit is copied from the + * corresponding instruction's CPT_INST_S[DONEINT]. + * compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor + * for the associated instruction, as enumerated by CPT_COMP_E. + * Core software may write the memory location containing [COMPCODE] to + * 0x0 before ringing the doorbell, and then poll for completion by + * checking for a nonzero value. + * Once the core observes a nonzero [COMPCODE] value in this case,the CPT + * coprocessor will have also completed L2/DRAM write operations. + * Word 1 + * reserved + * + */ +union cpt_res_s { + u64 u[2]; + struct cpt_res_s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_8_15:8; + u64 compcode:8; +#else /* Word 0 - Little Endian */ + u64 compcode:8; + u64 reserved_8_15:8; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 reserved_64_127; + } s; +}; + +/** + * Register (NCB) cpt#_pf_bist_status + * + * CPT PF Control Bist Status Register + * This register has the BIST status of memories. Each bit is the BIST result + * of an individual memory (per bit, 0 = pass and 1 = fail). + * cptx_pf_bist_status_s + * Word0 + * bstatus [29:0](RO/H) BIST status. One bit per memory, enumerated by + * CPT_RAMS_E. + */ +union cptx_pf_bist_status { + u64 u; + struct cptx_pf_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_30_63:34; + u64 bstatus:30; +#else /* Word 0 - Little Endian */ + u64 bstatus:30; + u64 reserved_30_63:34; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_pf_constants + * + * CPT PF Constants Register + * This register contains implementation-related parameters of CPT in CNXXXX. + * cptx_pf_constants_s + * Word 0 + * reserved_40_63:24 [63:40] Reserved. + * epcis:8 [39:32](RO) Number of EPCI busses. + * grps:8 [31:24](RO) Number of engine groups implemented. + * ae:8 [23:16](RO/H) Number of AEs. In CNXXXX, for CPT0 returns 0x0, + * for CPT1 returns 0x18, or less if there are fuse-disables. + * se:8 [15:8](RO/H) Number of SEs. In CNXXXX, for CPT0 returns 0x30, + * or less if there are fuse-disables, for CPT1 returns 0x0. + * vq:8 [7:0](RO) Number of VQs. + */ +union cptx_pf_constants { + u64 u; + struct cptx_pf_constants_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_40_63:24; + u64 epcis:8; + u64 grps:8; + u64 ae:8; + u64 se:8; + u64 vq:8; +#else /* Word 0 - Little Endian */ + u64 vq:8; + u64 se:8; + u64 ae:8; + u64 grps:8; + u64 epcis:8; + u64 reserved_40_63:24; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_pf_exe_bist_status + * + * CPT PF Engine Bist Status Register + * This register has the BIST status of each engine. Each bit is the + * BIST result of an individual engine (per bit, 0 = pass and 1 = fail). + * cptx_pf_exe_bist_status_s + * Word0 + * reserved_48_63:16 [63:48] reserved + * bstatus:48 [47:0](RO/H) BIST status. One bit per engine. + * + */ +union cptx_pf_exe_bist_status { + u64 u; + struct cptx_pf_exe_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 bstatus:48; +#else /* Word 0 - Little Endian */ + u64 bstatus:48; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_pf_q#_ctl + * + * CPT Queue Control Register + * This register configures queues. This register should be changed only + * when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * cptx_pf_qx_ctl_s + * Word0 + * reserved_60_63:4 [63:60] reserved. + * aura:12; [59:48](R/W) Guest-aura for returning this queue's + * instruction-chunk buffers to FPA. Only used when [INST_FREE] is set. + * For the FPA to not discard the request, FPA_PF_MAP() must map + * [AURA] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * reserved_45_47:3 [47:45] reserved. + * size:13 [44:32](R/W) Command-buffer size, in number of 64-bit words per + * command buffer segment. Must be 8*n + 1, where n is the number of + * instructions per buffer segment. + * reserved_11_31:21 [31:11] Reserved. + * cont_err:1 [10:10](R/W) Continue on error. + * 0 = When CPT()_VQ()_MISC_INT[NWRP], CPT()_VQ()_MISC_INT[IRDE] or + * CPT()_VQ()_MISC_INT[DOVF] are set by hardware or software via + * CPT()_VQ()_MISC_INT_W1S, then CPT()_VQ()_CTL[ENA] is cleared. Due to + * pipelining, additional instructions may have been processed between the + * instruction causing the error and the next instruction in the disabled + * queue (the instruction at CPT()_VQ()_SADDR). + * 1 = Ignore errors and continue processing instructions. + * For diagnostic use only. + * inst_free:1 [9:9](R/W) Instruction FPA free. When set, when CPT reaches the + * end of an instruction chunk, that chunk will be freed to the FPA. + * inst_be:1 [8:8](R/W) Instruction big-endian control. When set, instructions, + * instruction next chunk pointers, and result structures are stored in + * big-endian format in memory. + * iqb_ldwb:1 [7:7](R/W) Instruction load don't write back. + * 0 = The hardware issues NCB transient load (LDT) towards the cache, + * which if the line hits and is is dirty will cause the line to be + * written back before being replaced. + * 1 = The hardware issues NCB LDWB read-and-invalidate command towards + * the cache when fetching the last word of instructions; as a result the + * line will not be written back when replaced. This improves + * performance, but software must not read the instructions after they are + * posted to the hardware. Reads that do not consume the last word of a + * cache line always use LDI. + * reserved_4_6:3 [6:4] Reserved. + * grp:3; [3:1](R/W) Engine group. + * pri:1; [0:0](R/W) Queue priority. + * 1 = This queue has higher priority. Round-robin between higher + * priority queues. + * 0 = This queue has lower priority. Round-robin between lower + * priority queues. + */ +union cptx_pf_qx_ctl { + u64 u; + struct cptx_pf_qx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_60_63:4; + u64 aura:12; + u64 reserved_45_47:3; + u64 size:13; + u64 reserved_11_31:21; + u64 cont_err:1; + u64 inst_free:1; + u64 inst_be:1; + u64 iqb_ldwb:1; + u64 reserved_4_6:3; + u64 grp:3; + u64 pri:1; +#else /* Word 0 - Little Endian */ + u64 pri:1; + u64 grp:3; + u64 reserved_4_6:3; + u64 iqb_ldwb:1; + u64 inst_be:1; + u64 inst_free:1; + u64 cont_err:1; + u64 reserved_11_31:21; + u64 size:13; + u64 reserved_45_47:3; + u64 aura:12; + u64 reserved_60_63:4; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_saddr + * + * CPT Queue Starting Buffer Address Registers + * These registers set the instruction buffer starting address. + * cptx_vqx_saddr_s + * Word0 + * reserved_49_63:15 [63:49] Reserved. + * ptr:43 [48:6](R/W/H) Instruction buffer IOVA <48:6> (64-byte aligned). + * When written, it is the initial buffer starting address; when read, + * it is the next read pointer to be requested from L2C. The PTR field + * is overwritten with the next pointer each time that the command buffer + * segment is exhausted. New commands will then be read from the newly + * specified command buffer pointer. + * reserved_0_5:6 [5:0] Reserved. + * + */ +union cptx_vqx_saddr { + u64 u; + struct cptx_vqx_saddr_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_49_63:15; + u64 ptr:43; + u64 reserved_0_5:6; +#else /* Word 0 - Little Endian */ + u64 reserved_0_5:6; + u64 ptr:43; + u64 reserved_49_63:15; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_misc_ena_w1s + * + * CPT Queue Misc Interrupt Enable Set Register + * This register sets interrupt enable bits. + * cptx_vqx_misc_ena_w1s_s + * Word0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[SWERR]. + * nwrp:1 [3:3](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[NWRP]. + * irde:1 [2:2](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[IRDE]. + * dovf:1 [1:1](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[DOVF]. + * mbox:1 [0:0](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[MBOX]. + * + */ +union cptx_vqx_misc_ena_w1s { + u64 u; + struct cptx_vqx_misc_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_doorbell + * + * CPT Queue Doorbell Registers + * Doorbells for the CPT instruction queues. + * cptx_vqx_doorbell_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add + * to the CPT instruction doorbell count. Readback value is the the + * current number of pending doorbell requests. If counter overflows + * CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to + * zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF], + * then write a value of 2^20 minus the read [DBELL_CNT], then write one + * to CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and + * CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF]. Must be a multiple of 8. + * All CPT instructions are 8 words and require a doorbell count of + * multiple of 8. + */ +union cptx_vqx_doorbell { + u64 u; + struct cptx_vqx_doorbell_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 dbell_cnt:20; +#else /* Word 0 - Little Endian */ + u64 dbell_cnt:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_inprog + * + * CPT Queue In Progress Count Registers + * These registers contain the per-queue instruction in flight registers. + * cptx_vqx_inprog_s + * Word0 + * reserved_8_63:56 [63:8] Reserved. + * inflight:8 [7:0](RO/H) Inflight count. Counts the number of instructions + * for the VF for which CPT is fetching, executing or responding to + * instructions. However this does not include any interrupts that are + * awaiting software handling (CPT()_VQ()_DONE[DONE] != 0x0). + * A queue may not be reconfigured until: + * 1. CPT()_VQ()_CTL[ENA] is cleared by software. + * 2. [INFLIGHT] is polled until equals to zero. + */ +union cptx_vqx_inprog { + u64 u; + struct cptx_vqx_inprog_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_8_63:56; + u64 inflight:8; +#else /* Word 0 - Little Endian */ + u64 inflight:8; + u64 reserved_8_63:56; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_misc_int + * + * CPT Queue Misc Interrupt Register + * These registers contain the per-queue miscellaneous interrupts. + * cptx_vqx_misc_int_s + * Word 0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1C/H) Software error from engines. + * nwrp:1 [3:3](R/W1C/H) NCB result write response error. + * irde:1 [2:2](R/W1C/H) Instruction NCB read response error. + * dovf:1 [1:1](R/W1C/H) Doorbell overflow. + * mbox:1 [0:0](R/W1C/H) PF to VF mailbox interrupt. Set when + * CPT()_VF()_PF_MBOX(0) is written. + * + */ +union cptx_vqx_misc_int { + u64 u; + struct cptx_vqx_misc_int_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_done_ack + * + * CPT Queue Done Count Ack Registers + * This register is written by software to acknowledge interrupts. + * cptx_vqx_done_ack_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done_ack:20 [19:0](R/W/H) Number of decrements to CPT()_VQ()_DONE[DONE]. + * Reads CPT()_VQ()_DONE[DONE]. Written by software to acknowledge + * interrupts. If CPT()_VQ()_DONE[DONE] is still nonzero the interrupt + * will be re-sent if the conditions described in CPT()_VQ()_DONE[DONE] + * are satisfied. + * + */ +union cptx_vqx_done_ack { + u64 u; + struct cptx_vqx_done_ack_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done_ack:20; +#else /* Word 0 - Little Endian */ + u64 done_ack:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_done + * + * CPT Queue Done Count Registers + * These registers contain the per-queue instruction done count. + * cptx_vqx_done_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done:20 [19:0](R/W/H) Done count. When CPT_INST_S[DONEINT] set and that + * instruction completes, CPT()_VQ()_DONE[DONE] is incremented when the + * instruction finishes. Write to this field are for diagnostic use only; + * instead software writes CPT()_VQ()_DONE_ACK with the number of + * decrements for this field. + * Interrupts are sent as follows: + * * When CPT()_VQ()_DONE[DONE] = 0, then no results are pending, the + * interrupt coalescing timer is held to zero, and an interrupt is not + * sent. + * * When CPT()_VQ()_DONE[DONE] != 0, then the interrupt coalescing timer + * counts. If the counter is >= CPT()_VQ()_DONE_WAIT[TIME_WAIT]*1024, or + * CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT], i.e. enough + * time has passed or enough results have arrived, then the interrupt is + * sent. + * * When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is written + * but this is not typical), the interrupt coalescing timer restarts. + * Note after decrementing this interrupt equation is recomputed, + * for example if CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT] + * and because the timer is zero, the interrupt will be resent immediately. + * (This covers the race case between software acknowledging an interrupt + * and a result returning.) + * * When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not sent, + * but the counting described above still occurs. + * Since CPT instructions complete out-of-order, if software is using + * completion interrupts the suggested scheme is to request a DONEINT on + * each request, and when an interrupt arrives perform a "greedy" scan for + * completions; even if a later command is acknowledged first this will + * not result in missing a completion. + * Software is responsible for making sure [DONE] does not overflow; + * for example by insuring there are not more than 2^20-1 instructions in + * flight that may request interrupts. + * + */ +union cptx_vqx_done { + u64 u; + struct cptx_vqx_done_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done:20; +#else /* Word 0 - Little Endian */ + u64 done:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_done_wait + * + * CPT Queue Done Interrupt Coalescing Wait Registers + * Specifies the per queue interrupt coalescing settings. + * cptx_vqx_done_wait_s + * Word0 + * reserved_48_63:16 [63:48] Reserved. + * time_wait:16; [47:32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] = 0 + * or CPT()_VQ()_DONE_ACK is written a timer is cleared. When the timer + * reaches [TIME_WAIT]*1024 then interrupt coalescing ends. + * see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is disabled. + * reserved_20_31:12 [31:20] Reserved. + * num_wait:20 [19:0](R/W) Number of messages hold-off. + * When CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing ends + * see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as 0x1. + * + */ +union cptx_vqx_done_wait { + u64 u; + struct cptx_vqx_done_wait_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 time_wait:16; + u64 reserved_20_31:12; + u64 num_wait:20; +#else /* Word 0 - Little Endian */ + u64 num_wait:20; + u64 reserved_20_31:12; + u64 time_wait:16; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_done_ena_w1s + * + * CPT Queue Done Interrupt Enable Set Registers + * Write 1 to these registers will enable the DONEINT interrupt for the queue. + * cptx_vqx_done_ena_w1s_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * done:1 [0:0](R/W1S/H) Write 1 will enable DONEINT for this queue. + * Write 0 has no effect. Read will return the enable bit. + */ +union cptx_vqx_done_ena_w1s { + u64 u; + struct cptx_vqx_done_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 done:1; +#else /* Word 0 - Little Endian */ + u64 done:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/** + * Register (NCB) cpt#_vq#_ctl + * + * CPT VF Queue Control Registers + * This register configures queues. This register should be changed (other than + * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * cptx_vqx_ctl_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * ena:1 [0:0](R/W/H) Enables the logical instruction queue. + * See also CPT()_PF_Q()_CTL[CONT_ERR] and CPT()_VQ()_INPROG[INFLIGHT]. + * 1 = Queue is enabled. + * 0 = Queue is disabled. + */ +union cptx_vqx_ctl { + u64 u; + struct cptx_vqx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 ena:1; +#else /* Word 0 - Little Endian */ + u64 ena:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; +#endif /*__CPT_HW_TYPES_H*/ diff --git a/drivers/crypto/cavium/cpt/cptpf.h b/drivers/crypto/cavium/cpt/cptpf.h new file mode 100644 index 000000000000..8a2a8e538da4 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptpf.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef __CPTPF_H +#define __CPTPF_H + +#include "cpt_common.h" + +#define CSR_DELAY 30 +#define CPT_MAX_CORE_GROUPS 8 +#define CPT_MAX_SE_CORES 10 +#define CPT_MAX_AE_CORES 6 +#define CPT_MAX_TOTAL_CORES (CPT_MAX_SE_CORES + CPT_MAX_AE_CORES) +#define CPT_MAX_VF_NUM 16 +#define CPT_PF_MSIX_VECTORS 3 +#define CPT_PF_INT_VEC_E_MBOXX(a) (0x02 + (a)) +#define CPT_UCODE_VERSION_SZ 32 +struct cpt_device; + +struct microcode { + u8 is_mc_valid; + u8 is_ae; + u8 group; + u8 num_cores; + u32 code_size; + u64 core_mask; + u8 version[CPT_UCODE_VERSION_SZ]; + /* Base info */ + dma_addr_t phys_base; + void *code; +}; + +struct cpt_vf_info { + u8 state; + u8 priority; + u8 id; + u32 qlen; +}; + +/** + * cpt device structure + */ +struct cpt_device { + u16 flags; /* Flags to hold device status bits */ + u8 num_vf_en; /* Number of VFs enabled (0...CPT_MAX_VF_NUM) */ + struct cpt_vf_info vfinfo[CPT_MAX_VF_NUM]; /* Per VF info */ + + void __iomem *reg_base; /* Register start address */ + /* MSI-X */ + u8 num_vec; + bool msix_enabled; + struct msix_entry msix_entries[CPT_PF_MSIX_VECTORS]; + bool irq_allocated[CPT_PF_MSIX_VECTORS]; + struct pci_dev *pdev; /* pci device handle */ + + struct microcode mcode[CPT_MAX_CORE_GROUPS]; + u8 next_mc_idx; /* next microcode index */ + u8 next_group; + u8 max_se_cores; + u8 max_ae_cores; +}; + +void cpt_mbox_intr_handler(struct cpt_device *cpt, int mbx); +#endif /* __CPTPF_H */ diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c new file mode 100644 index 000000000000..682d57a11a75 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptpf_main.c @@ -0,0 +1,708 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cptpf.h" + +#define DRV_NAME "thunder-cpt" +#define DRV_VERSION "1.0" + +static u32 num_vfs = 4; /* Default 4 VF enabled */ +module_param(num_vfs, uint, 0444); +MODULE_PARM_DESC(num_vfs, "Number of VFs to enable(1-16)"); + +/* + * Disable cores specified by coremask + */ +static void cpt_disable_cores(struct cpt_device *cpt, u64 coremask, + u8 type, u8 grp) +{ + u64 pf_exe_ctl; + u32 timeout = 100; + u64 grpmask = 0; + struct device *dev = &cpt->pdev->dev; + + if (type == AE_TYPES) + coremask = (coremask << cpt->max_se_cores); + + /* Disengage the cores from groups */ + grpmask = cpt_read_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp)); + cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp), + (grpmask & ~coremask)); + udelay(CSR_DELAY); + grp = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXEC_BUSY(0)); + while (grp & coremask) { + dev_err(dev, "Cores still busy %llx", coremask); + grp = cpt_read_csr64(cpt->reg_base, + CPTX_PF_EXEC_BUSY(0)); + if (timeout--) + break; + + udelay(CSR_DELAY); + } + + /* Disable the cores */ + pf_exe_ctl = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0)); + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), + (pf_exe_ctl & ~coremask)); + udelay(CSR_DELAY); +} + +/* + * Enable cores specified by coremask + */ +static void cpt_enable_cores(struct cpt_device *cpt, u64 coremask, + u8 type) +{ + u64 pf_exe_ctl; + + if (type == AE_TYPES) + coremask = (coremask << cpt->max_se_cores); + + pf_exe_ctl = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0)); + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), + (pf_exe_ctl | coremask)); + udelay(CSR_DELAY); +} + +static void cpt_configure_group(struct cpt_device *cpt, u8 grp, + u64 coremask, u8 type) +{ + u64 pf_gx_en = 0; + + if (type == AE_TYPES) + coremask = (coremask << cpt->max_se_cores); + + pf_gx_en = cpt_read_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp)); + cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp), + (pf_gx_en | coremask)); + udelay(CSR_DELAY); +} + +static void cpt_disable_mbox_interrupts(struct cpt_device *cpt) +{ + /* Clear mbox(0) interupts for all vfs */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_ENA_W1CX(0, 0), ~0ull); +} + +static void cpt_disable_ecc_interrupts(struct cpt_device *cpt) +{ + /* Clear ecc(0) interupts for all vfs */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_ECC0_ENA_W1C(0), ~0ull); +} + +static void cpt_disable_exec_interrupts(struct cpt_device *cpt) +{ + /* Clear exec interupts for all vfs */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXEC_ENA_W1C(0), ~0ull); +} + +static void cpt_disable_all_interrupts(struct cpt_device *cpt) +{ + cpt_disable_mbox_interrupts(cpt); + cpt_disable_ecc_interrupts(cpt); + cpt_disable_exec_interrupts(cpt); +} + +static void cpt_enable_mbox_interrupts(struct cpt_device *cpt) +{ + /* Set mbox(0) interupts for all vfs */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_ENA_W1SX(0, 0), ~0ull); +} + +static int cpt_load_microcode(struct cpt_device *cpt, struct microcode *mcode) +{ + int ret = 0, core = 0, shift = 0; + u32 total_cores = 0; + struct device *dev = &cpt->pdev->dev; + + if (!mcode || !mcode->code) { + dev_err(dev, "Either the mcode is null or data is NULL\n"); + return -EINVAL; + } + + if (mcode->code_size == 0) { + dev_err(dev, "microcode size is 0\n"); + return -EINVAL; + } + + /* Assumes 0-9 are SE cores for UCODE_BASE registers and + * AE core bases follow + */ + if (mcode->is_ae) { + core = CPT_MAX_SE_CORES; /* start couting from 10 */ + total_cores = CPT_MAX_TOTAL_CORES; /* upto 15 */ + } else { + core = 0; /* start couting from 0 */ + total_cores = CPT_MAX_SE_CORES; /* upto 9 */ + } + + /* Point to microcode for each core of the group */ + for (; core < total_cores ; core++, shift++) { + if (mcode->core_mask & (1 << shift)) { + cpt_write_csr64(cpt->reg_base, + CPTX_PF_ENGX_UCODE_BASE(0, core), + (u64)mcode->phys_base); + } + } + return ret; +} + +static int do_cpt_init(struct cpt_device *cpt, struct microcode *mcode) +{ + int ret = 0; + struct device *dev = &cpt->pdev->dev; + + /* Make device not ready */ + cpt->flags &= ~CPT_FLAG_DEVICE_READY; + /* Disable All PF interrupts */ + cpt_disable_all_interrupts(cpt); + /* Calculate mcode group and coremasks */ + if (mcode->is_ae) { + if (mcode->num_cores > cpt->max_ae_cores) { + dev_err(dev, "Requested for more cores than available AE cores\n"); + ret = -EINVAL; + goto cpt_init_fail; + } + + if (cpt->next_group >= CPT_MAX_CORE_GROUPS) { + dev_err(dev, "Can't load, all eight microcode groups in use"); + return -ENFILE; + } + + mcode->group = cpt->next_group; + /* Convert requested cores to mask */ + mcode->core_mask = GENMASK(mcode->num_cores, 0); + cpt_disable_cores(cpt, mcode->core_mask, AE_TYPES, + mcode->group); + /* Load microcode for AE engines */ + ret = cpt_load_microcode(cpt, mcode); + if (ret) { + dev_err(dev, "Microcode load Failed for %s\n", + mcode->version); + goto cpt_init_fail; + } + cpt->next_group++; + /* Configure group mask for the mcode */ + cpt_configure_group(cpt, mcode->group, mcode->core_mask, + AE_TYPES); + /* Enable AE cores for the group mask */ + cpt_enable_cores(cpt, mcode->core_mask, AE_TYPES); + } else { + if (mcode->num_cores > cpt->max_se_cores) { + dev_err(dev, "Requested for more cores than available SE cores\n"); + ret = -EINVAL; + goto cpt_init_fail; + } + if (cpt->next_group >= CPT_MAX_CORE_GROUPS) { + dev_err(dev, "Can't load, all eight microcode groups in use"); + return -ENFILE; + } + + mcode->group = cpt->next_group; + /* Covert requested cores to mask */ + mcode->core_mask = GENMASK(mcode->num_cores, 0); + cpt_disable_cores(cpt, mcode->core_mask, SE_TYPES, + mcode->group); + /* Load microcode for SE engines */ + ret = cpt_load_microcode(cpt, mcode); + if (ret) { + dev_err(dev, "Microcode load Failed for %s\n", + mcode->version); + goto cpt_init_fail; + } + cpt->next_group++; + /* Configure group mask for the mcode */ + cpt_configure_group(cpt, mcode->group, mcode->core_mask, + SE_TYPES); + /* Enable SE cores for the group mask */ + cpt_enable_cores(cpt, mcode->core_mask, SE_TYPES); + } + + /* Enabled PF mailbox interrupts */ + cpt_enable_mbox_interrupts(cpt); + cpt->flags |= CPT_FLAG_DEVICE_READY; + + return ret; + +cpt_init_fail: + /* Enabled PF mailbox interrupts */ + cpt_enable_mbox_interrupts(cpt); + + return ret; +} + +struct ucode_header { + u8 version[CPT_UCODE_VERSION_SZ]; + u32 code_length; + u32 data_length; + u64 sram_address; +}; + +static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae) +{ + const struct firmware *fw_entry; + struct device *dev = &cpt->pdev->dev; + struct ucode_header *ucode; + struct microcode *mcode; + int j, ret = 0; + + ret = request_firmware(&fw_entry, fw, dev); + if (ret) + return ret; + + ucode = (struct ucode_header *)fw_entry->data; + mcode = &cpt->mcode[cpt->next_mc_idx]; + memcpy(mcode->version, (u8 *)fw_entry->data, CPT_UCODE_VERSION_SZ); + mcode->code_size = ntohl(ucode->code_length) * 2; + if (!mcode->code_size) + return -EINVAL; + + mcode->is_ae = is_ae; + mcode->core_mask = 0ULL; + mcode->num_cores = is_ae ? 6 : 10; + + /* Allocate DMAable space */ + mcode->code = dma_zalloc_coherent(&cpt->pdev->dev, mcode->code_size, + &mcode->phys_base, GFP_KERNEL); + if (!mcode->code) { + dev_err(dev, "Unable to allocate space for microcode"); + return -ENOMEM; + } + + memcpy((void *)mcode->code, (void *)(fw_entry->data + sizeof(*ucode)), + mcode->code_size); + + /* Byte swap 64-bit */ + for (j = 0; j < (mcode->code_size / 8); j++) + ((u64 *)mcode->code)[j] = cpu_to_be64(((u64 *)mcode->code)[j]); + /* MC needs 16-bit swap */ + for (j = 0; j < (mcode->code_size / 2); j++) + ((u16 *)mcode->code)[j] = cpu_to_be16(((u16 *)mcode->code)[j]); + + dev_dbg(dev, "mcode->code_size = %u\n", mcode->code_size); + dev_dbg(dev, "mcode->is_ae = %u\n", mcode->is_ae); + dev_dbg(dev, "mcode->num_cores = %u\n", mcode->num_cores); + dev_dbg(dev, "mcode->code = %llx\n", (u64)mcode->code); + dev_dbg(dev, "mcode->phys_base = %llx\n", mcode->phys_base); + + ret = do_cpt_init(cpt, mcode); + if (ret) { + dev_err(dev, "do_cpt_init failed with ret: %d\n", ret); + return ret; + } + + dev_info(dev, "Microcode Loaded %s\n", mcode->version); + mcode->is_mc_valid = 1; + cpt->next_mc_idx++; + release_firmware(fw_entry); + + return ret; +} + +static int cpt_ucode_load(struct cpt_device *cpt) +{ + int ret = 0; + struct device *dev = &cpt->pdev->dev; + + ret = cpt_ucode_load_fw(cpt, "cpt8x-mc-ae.out", true); + if (ret) { + dev_err(dev, "ae:cpt_ucode_load failed with ret: %d\n", ret); + return ret; + } + ret = cpt_ucode_load_fw(cpt, "cpt8x-mc-se.out", false); + if (ret) { + dev_err(dev, "se:cpt_ucode_load failed with ret: %d\n", ret); + return ret; + } + + return ret; +} + +static int cpt_enable_msix(struct cpt_device *cpt) +{ + int i, ret; + + cpt->num_vec = CPT_PF_MSIX_VECTORS; + + for (i = 0; i < cpt->num_vec; i++) + cpt->msix_entries[i].entry = i; + + ret = pci_enable_msix(cpt->pdev, cpt->msix_entries, cpt->num_vec); + if (ret) { + dev_err(&cpt->pdev->dev, "Request for #%d msix vectors failed\n", + cpt->num_vec); + return ret; + } + + cpt->msix_enabled = 1; + return 0; +} + +static irqreturn_t cpt_mbx0_intr_handler(int irq, void *cpt_irq) +{ + struct cpt_device *cpt = (struct cpt_device *)cpt_irq; + + cpt_mbox_intr_handler(cpt, 0); + + return IRQ_HANDLED; +} + +static void cpt_disable_msix(struct cpt_device *cpt) +{ + if (cpt->msix_enabled) { + pci_disable_msix(cpt->pdev); + cpt->msix_enabled = 0; + cpt->num_vec = 0; + } +} + +static void cpt_free_all_interrupts(struct cpt_device *cpt) +{ + int irq; + + for (irq = 0; irq < cpt->num_vec; irq++) { + if (cpt->irq_allocated[irq]) + free_irq(cpt->msix_entries[irq].vector, cpt); + cpt->irq_allocated[irq] = false; + } +} + +static void cpt_reset(struct cpt_device *cpt) +{ + cpt_write_csr64(cpt->reg_base, CPTX_PF_RESET(0), 1); +} + +static void cpt_find_max_enabled_cores(struct cpt_device *cpt) +{ + union cptx_pf_constants pf_cnsts = {0}; + + pf_cnsts.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_CONSTANTS(0)); + cpt->max_se_cores = pf_cnsts.s.se; + cpt->max_ae_cores = pf_cnsts.s.ae; +} + +static u32 cpt_check_bist_status(struct cpt_device *cpt) +{ + union cptx_pf_bist_status bist_sts = {0}; + + bist_sts.u = cpt_read_csr64(cpt->reg_base, + CPTX_PF_BIST_STATUS(0)); + + return bist_sts.u; +} + +static u64 cpt_check_exe_bist_status(struct cpt_device *cpt) +{ + union cptx_pf_exe_bist_status bist_sts = {0}; + + bist_sts.u = cpt_read_csr64(cpt->reg_base, + CPTX_PF_EXE_BIST_STATUS(0)); + + return bist_sts.u; +} + +static void cpt_disable_all_cores(struct cpt_device *cpt) +{ + u32 grp, timeout = 100; + struct device *dev = &cpt->pdev->dev; + + /* Disengage the cores from groups */ + for (grp = 0; grp < CPT_MAX_CORE_GROUPS; grp++) { + cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp), 0); + udelay(CSR_DELAY); + } + + grp = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXEC_BUSY(0)); + while (grp) { + dev_err(dev, "Cores still busy"); + grp = cpt_read_csr64(cpt->reg_base, + CPTX_PF_EXEC_BUSY(0)); + if (timeout--) + break; + + udelay(CSR_DELAY); + } + /* Disable the cores */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), 0); +} + +/** + * Ensure all cores are disengaged from all groups by + * calling cpt_disable_all_cores() before calling this + * function. + */ +static void cpt_unload_microcode(struct cpt_device *cpt) +{ + u32 grp = 0, core; + + /* Free microcode bases and reset group masks */ + for (grp = 0; grp < CPT_MAX_CORE_GROUPS; grp++) { + struct microcode *mcode = &cpt->mcode[grp]; + + if (cpt->mcode[grp].code) + dma_free_coherent(&cpt->pdev->dev, mcode->code_size, + mcode->code, mcode->phys_base); + mcode->code = NULL; + } + /* Clear UCODE_BASE registers for all engines */ + for (core = 0; core < CPT_MAX_TOTAL_CORES; core++) + cpt_write_csr64(cpt->reg_base, + CPTX_PF_ENGX_UCODE_BASE(0, core), 0ull); +} + +static int cpt_device_init(struct cpt_device *cpt) +{ + u64 bist; + struct device *dev = &cpt->pdev->dev; + + /* Reset the PF when probed first */ + cpt_reset(cpt); + mdelay(100); + + /*Check BIST status*/ + bist = (u64)cpt_check_bist_status(cpt); + if (bist) { + dev_err(dev, "RAM BIST failed with code 0x%llx", bist); + return -ENODEV; + } + + bist = cpt_check_exe_bist_status(cpt); + if (bist) { + dev_err(dev, "Engine BIST failed with code 0x%llx", bist); + return -ENODEV; + } + + /*Get CLK frequency*/ + /*Get max enabled cores */ + cpt_find_max_enabled_cores(cpt); + /*Disable all cores*/ + cpt_disable_all_cores(cpt); + /*Reset device parameters*/ + cpt->next_mc_idx = 0; + cpt->next_group = 0; + /* PF is ready */ + cpt->flags |= CPT_FLAG_DEVICE_READY; + + return 0; +} + +static int cpt_register_interrupts(struct cpt_device *cpt) +{ + int ret; + struct device *dev = &cpt->pdev->dev; + + /* Enable MSI-X */ + ret = cpt_enable_msix(cpt); + if (ret) + return ret; + + /* Register mailbox interrupt handlers */ + ret = request_irq(cpt->msix_entries[CPT_PF_INT_VEC_E_MBOXX(0)].vector, + cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt); + if (ret) + goto fail; + + cpt->irq_allocated[CPT_PF_INT_VEC_E_MBOXX(0)] = true; + + /* Enable mailbox interrupt */ + cpt_enable_mbox_interrupts(cpt); + return 0; + +fail: + dev_err(dev, "Request irq failed\n"); + cpt_free_all_interrupts(cpt); + return ret; +} + +static void cpt_unregister_interrupts(struct cpt_device *cpt) +{ + cpt_free_all_interrupts(cpt); + cpt_disable_msix(cpt); +} + +static int cpt_sriov_init(struct cpt_device *cpt, int num_vfs) +{ + int pos = 0; + int err; + u16 total_vf_cnt; + struct pci_dev *pdev = cpt->pdev; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) { + dev_err(&pdev->dev, "SRIOV capability is not found in PCIe config space\n"); + return -ENODEV; + } + + cpt->num_vf_en = num_vfs; /* User requested VFs */ + pci_read_config_word(pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf_cnt); + if (total_vf_cnt < cpt->num_vf_en) + cpt->num_vf_en = total_vf_cnt; + + if (!total_vf_cnt) + return 0; + + /*Enabled the available VFs */ + err = pci_enable_sriov(pdev, cpt->num_vf_en); + if (err) { + dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n", + cpt->num_vf_en); + cpt->num_vf_en = 0; + return err; + } + + /* TODO: Optionally enable static VQ priorities feature */ + + dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n", + cpt->num_vf_en); + + cpt->flags |= CPT_FLAG_SRIOV_ENABLED; + + return 0; +} + +static int cpt_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct cpt_device *cpt; + int err; + + if (num_vfs > 16 || num_vfs < 4) { + dev_warn(dev, "Invalid vf count %d, Resetting it to 4(default)\n", + num_vfs); + num_vfs = 4; + } + + cpt = devm_kzalloc(dev, sizeof(*cpt), GFP_KERNEL); + if (!cpt) + return -ENOMEM; + + pci_set_drvdata(pdev, cpt); + cpt->pdev = pdev; + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + pci_set_drvdata(pdev, NULL); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto cpt_err_disable_device; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto cpt_err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto cpt_err_release_regions; + } + + /* MAP PF's configuration registers */ + cpt->reg_base = pcim_iomap(pdev, 0, 0); + if (!cpt->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto cpt_err_release_regions; + } + + /* CPT device HW initialization */ + cpt_device_init(cpt); + + /* Register interrupts */ + err = cpt_register_interrupts(cpt); + if (err) + goto cpt_err_release_regions; + + err = cpt_ucode_load(cpt); + if (err) + goto cpt_err_unregister_interrupts; + + /* Configure SRIOV */ + err = cpt_sriov_init(cpt, num_vfs); + if (err) + goto cpt_err_unregister_interrupts; + + return 0; + +cpt_err_unregister_interrupts: + cpt_unregister_interrupts(cpt); +cpt_err_release_regions: + pci_release_regions(pdev); +cpt_err_disable_device: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return err; +} + +static void cpt_remove(struct pci_dev *pdev) +{ + struct cpt_device *cpt = pci_get_drvdata(pdev); + + /* Disengage SE and AE cores from all groups*/ + cpt_disable_all_cores(cpt); + /* Unload microcodes */ + cpt_unload_microcode(cpt); + cpt_unregister_interrupts(cpt); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static void cpt_shutdown(struct pci_dev *pdev) +{ + struct cpt_device *cpt = pci_get_drvdata(pdev); + + if (!cpt) + return; + + dev_info(&pdev->dev, "Shutdown device %x:%x.\n", + (u32)pdev->vendor, (u32)pdev->device); + + cpt_unregister_interrupts(cpt); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +/* Supported devices */ +static const struct pci_device_id cpt_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, CPT_81XX_PCI_PF_DEVICE_ID) }, + { 0, } /* end of table */ +}; + +static struct pci_driver cpt_pci_driver = { + .name = DRV_NAME, + .id_table = cpt_id_table, + .probe = cpt_probe, + .remove = cpt_remove, + .shutdown = cpt_shutdown, +}; + +module_pci_driver(cpt_pci_driver); + +MODULE_AUTHOR("George Cherian "); +MODULE_DESCRIPTION("Cavium Thunder CPT Physical Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, cpt_id_table); diff --git a/drivers/crypto/cavium/cpt/cptpf_mbox.c b/drivers/crypto/cavium/cpt/cptpf_mbox.c new file mode 100644 index 000000000000..5818b415e814 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptpf_mbox.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ +#include +#include "cptpf.h" + +static void cpt_send_msg_to_vf(struct cpt_device *cpt, int vf, + struct cpt_mbox *mbx) +{ + /* Writing mbox(0) causes interrupt */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 1), + mbx->data); + cpt_write_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 0), mbx->msg); +} + +/* ACKs VF's mailbox message + * @vf: VF to which ACK to be sent + */ +static void cpt_mbox_send_ack(struct cpt_device *cpt, int vf, + struct cpt_mbox *mbx) +{ + mbx->data = 0ull; + mbx->msg = CPT_MBOX_MSG_TYPE_ACK; + cpt_send_msg_to_vf(cpt, vf, mbx); +} + +static void cpt_clear_mbox_intr(struct cpt_device *cpt, u32 vf) +{ + /* W1C for the VF */ + cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_INTX(0, 0), (1 << vf)); +} + +/* + * Configure QLEN/Chunk sizes for VF + */ +static void cpt_cfg_qlen_for_vf(struct cpt_device *cpt, int vf, u32 size) +{ + union cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf)); + pf_qx_ctl.s.size = size; + pf_qx_ctl.s.cont_err = true; + cpt_write_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf), pf_qx_ctl.u); +} + +/* + * Configure VQ priority + */ +static void cpt_cfg_vq_priority(struct cpt_device *cpt, int vf, u32 pri) +{ + union cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf)); + pf_qx_ctl.s.pri = pri; + cpt_write_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf), pf_qx_ctl.u); +} + +static u8 cpt_bind_vq_to_grp(struct cpt_device *cpt, u8 q, u8 grp) +{ + struct microcode *mcode = cpt->mcode; + union cptx_pf_qx_ctl pf_qx_ctl; + struct device *dev = &cpt->pdev->dev; + + if (q >= CPT_MAX_VF_NUM) { + dev_err(dev, "Queues are more than cores in the group"); + return -EINVAL; + } + if (grp >= CPT_MAX_CORE_GROUPS) { + dev_err(dev, "Request group is more than possible groups"); + return -EINVAL; + } + if (grp >= cpt->next_mc_idx) { + dev_err(dev, "Request group is higher than available functional groups"); + return -EINVAL; + } + pf_qx_ctl.u = cpt_read_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, q)); + pf_qx_ctl.s.grp = mcode[grp].group; + cpt_write_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, q), pf_qx_ctl.u); + dev_dbg(dev, "VF %d TYPE %s", q, (mcode[grp].is_ae ? "AE" : "SE")); + + return mcode[grp].is_ae ? AE_TYPES : SE_TYPES; +} + +/* Interrupt handler to handle mailbox messages from VFs */ +static void cpt_handle_mbox_intr(struct cpt_device *cpt, int vf) +{ + struct cpt_vf_info *vfx = &cpt->vfinfo[vf]; + struct cpt_mbox mbx = {}; + u8 vftype; + struct device *dev = &cpt->pdev->dev; + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = cpt_read_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 0)); + mbx.data = cpt_read_csr64(cpt->reg_base, CPTX_PF_VFX_MBOXX(0, vf, 1)); + dev_dbg(dev, "%s: Mailbox msg 0x%llx from VF%d", __func__, mbx.msg, vf); + switch (mbx.msg) { + case CPT_MSG_VF_UP: + vfx->state = VF_STATE_UP; + try_module_get(THIS_MODULE); + cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case CPT_MSG_READY: + mbx.msg = CPT_MSG_READY; + mbx.data = vf; + cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case CPT_MSG_VF_DOWN: + /* First msg in VF teardown sequence */ + vfx->state = VF_STATE_DOWN; + module_put(THIS_MODULE); + cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case CPT_MSG_QLEN: + vfx->qlen = mbx.data; + cpt_cfg_qlen_for_vf(cpt, vf, vfx->qlen); + cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case CPT_MSG_QBIND_GRP: + vftype = cpt_bind_vq_to_grp(cpt, vf, (u8)mbx.data); + if ((vftype != AE_TYPES) && (vftype != SE_TYPES)) + dev_err(dev, "Queue %d binding to group %llu failed", + vf, mbx.data); + else { + dev_dbg(dev, "Queue %d binding to group %llu successful", + vf, mbx.data); + mbx.msg = CPT_MSG_QBIND_GRP; + mbx.data = vftype; + cpt_send_msg_to_vf(cpt, vf, &mbx); + } + break; + case CPT_MSG_VQ_PRIORITY: + vfx->priority = mbx.data; + cpt_cfg_vq_priority(cpt, vf, vfx->priority); + cpt_mbox_send_ack(cpt, vf, &mbx); + break; + default: + dev_err(&cpt->pdev->dev, "Invalid msg from VF%d, msg 0x%llx\n", + vf, mbx.msg); + break; + } +} + +void cpt_mbox_intr_handler (struct cpt_device *cpt, int mbx) +{ + u64 intr; + u8 vf; + + intr = cpt_read_csr64(cpt->reg_base, CPTX_PF_MBOX_INTX(0, 0)); + dev_dbg(&cpt->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr); + for (vf = 0; vf < CPT_MAX_VF_NUM; vf++) { + if (intr & (1ULL << vf)) { + dev_dbg(&cpt->pdev->dev, "Intr from VF %d\n", vf); + cpt_handle_mbox_intr(cpt, vf); + cpt_clear_mbox_intr(cpt, vf); + } + } +} From c694b233295b99c33dd5ac28aede9f171f5a6862 Mon Sep 17 00:00:00 2001 From: George Cherian Date: Tue, 7 Feb 2017 14:51:14 +0000 Subject: [PATCH 110/142] crypto: cavium - Add the Virtual Function driver for CPT Enable the CPT VF driver. CPT is the cryptographic Acceleration Unit in Octeon-tx series of processors. Signed-off-by: George Cherian Reviewed-by: David Daney Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/Makefile | 3 +- drivers/crypto/cavium/cpt/cptvf.h | 135 +++ drivers/crypto/cavium/cpt/cptvf_algs.c | 444 +++++++++ drivers/crypto/cavium/cpt/cptvf_algs.h | 113 +++ drivers/crypto/cavium/cpt/cptvf_main.c | 936 +++++++++++++++++++ drivers/crypto/cavium/cpt/cptvf_mbox.c | 211 +++++ drivers/crypto/cavium/cpt/cptvf_reqmanager.c | 593 ++++++++++++ drivers/crypto/cavium/cpt/request_manager.h | 147 +++ 8 files changed, 2581 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/cavium/cpt/cptvf.h create mode 100644 drivers/crypto/cavium/cpt/cptvf_algs.c create mode 100644 drivers/crypto/cavium/cpt/cptvf_algs.h create mode 100644 drivers/crypto/cavium/cpt/cptvf_main.c create mode 100644 drivers/crypto/cavium/cpt/cptvf_mbox.c create mode 100644 drivers/crypto/cavium/cpt/cptvf_reqmanager.c create mode 100644 drivers/crypto/cavium/cpt/request_manager.h diff --git a/drivers/crypto/cavium/cpt/Makefile b/drivers/crypto/cavium/cpt/Makefile index fe3d454a34a7..dbf055e14622 100644 --- a/drivers/crypto/cavium/cpt/Makefile +++ b/drivers/crypto/cavium/cpt/Makefile @@ -1,2 +1,3 @@ -obj-$(CONFIG_CAVIUM_CPT) += cptpf.o +obj-$(CONFIG_CAVIUM_CPT) += cptpf.o cptvf.o cptpf-objs := cptpf_main.o cptpf_mbox.o +cptvf-objs := cptvf_main.o cptvf_reqmanager.o cptvf_mbox.o cptvf_algs.o diff --git a/drivers/crypto/cavium/cpt/cptvf.h b/drivers/crypto/cavium/cpt/cptvf.h new file mode 100644 index 000000000000..1cc04aa611e4 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf.h @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef __CPTVF_H +#define __CPTVF_H + +#include +#include "cpt_common.h" + +/* Default command queue length */ +#define CPT_CMD_QLEN 2046 +#define CPT_CMD_QCHUNK_SIZE 1023 + +/* Default command timeout in seconds */ +#define CPT_COMMAND_TIMEOUT 4 +#define CPT_TIMER_THOLD 0xFFFF +#define CPT_NUM_QS_PER_VF 1 +#define CPT_INST_SIZE 64 +#define CPT_NEXT_CHUNK_PTR_SIZE 8 + +#define CPT_VF_MSIX_VECTORS 2 +#define CPT_VF_INTR_MBOX_MASK BIT(0) +#define CPT_VF_INTR_DOVF_MASK BIT(1) +#define CPT_VF_INTR_IRDE_MASK BIT(2) +#define CPT_VF_INTR_NWRP_MASK BIT(3) +#define CPT_VF_INTR_SERR_MASK BIT(4) +#define DMA_DIRECT_DIRECT 0 /* Input DIRECT, Output DIRECT */ +#define DMA_GATHER_SCATTER 1 +#define FROM_DPTR 1 + +/** + * Enumeration cpt_vf_int_vec_e + * + * CPT VF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum cpt_vf_int_vec_e { + CPT_VF_INT_VEC_E_MISC = 0x00, + CPT_VF_INT_VEC_E_DONE = 0x01 +}; + +struct command_chunk { + u8 *head; + dma_addr_t dma_addr; + u32 size; /* Chunk size, max CPT_INST_CHUNK_MAX_SIZE */ + struct hlist_node nextchunk; +}; + +struct command_queue { + spinlock_t lock; /* command queue lock */ + u32 idx; /* Command queue host write idx */ + u32 nchunks; /* Number of command chunks */ + struct command_chunk *qhead; /* Command queue head, instructions + * are inserted here + */ + struct hlist_head chead; +}; + +struct command_qinfo { + u32 cmd_size; + u32 qchunksize; /* Command queue chunk size */ + struct command_queue queue[CPT_NUM_QS_PER_VF]; +}; + +struct pending_entry { + u8 busy; /* Entry status (free/busy) */ + + volatile u64 *completion_addr; /* Completion address */ + void *post_arg; + void (*callback)(int, void *); /* Kernel ASYNC request callabck */ + void *callback_arg; /* Kernel ASYNC request callabck arg */ +}; + +struct pending_queue { + struct pending_entry *head; /* head of the queue */ + u32 front; /* Process work from here */ + u32 rear; /* Append new work here */ + atomic64_t pending_count; + spinlock_t lock; /* Queue lock */ +}; + +struct pending_qinfo { + u32 nr_queues; /* Number of queues supported */ + u32 qlen; /* Queue length */ + struct pending_queue queue[CPT_NUM_QS_PER_VF]; +}; + +#define for_each_pending_queue(qinfo, q, i) \ + for (i = 0, q = &qinfo->queue[i]; i < qinfo->nr_queues; i++, \ + q = &qinfo->queue[i]) + +struct cpt_vf { + u16 flags; /* Flags to hold device status bits */ + u8 vfid; /* Device Index 0...CPT_MAX_VF_NUM */ + u8 vftype; /* VF type of SE_TYPE(1) or AE_TYPE(1) */ + u8 vfgrp; /* VF group (0 - 8) */ + u8 node; /* Operating node: Bits (46:44) in BAR0 address */ + u8 priority; /* VF priority ring: 1-High proirity round + * robin ring;0-Low priority round robin ring; + */ + struct pci_dev *pdev; /* pci device handle */ + void __iomem *reg_base; /* Register start address */ + void *wqe_info; /* BH worker info */ + /* MSI-X */ + bool msix_enabled; + struct msix_entry msix_entries[CPT_VF_MSIX_VECTORS]; + bool irq_allocated[CPT_VF_MSIX_VECTORS]; + cpumask_var_t affinity_mask[CPT_VF_MSIX_VECTORS]; + /* Command and Pending queues */ + u32 qsize; + u32 nr_queues; + struct command_qinfo cqinfo; /* Command queue information */ + struct pending_qinfo pqinfo; /* Pending queue information */ + /* VF-PF mailbox communication */ + bool pf_acked; + bool pf_nacked; +}; + +int cptvf_send_vf_up(struct cpt_vf *cptvf); +int cptvf_send_vf_down(struct cpt_vf *cptvf); +int cptvf_send_vf_to_grp_msg(struct cpt_vf *cptvf); +int cptvf_send_vf_priority_msg(struct cpt_vf *cptvf); +int cptvf_send_vq_size_msg(struct cpt_vf *cptvf); +int cptvf_check_pf_ready(struct cpt_vf *cptvf); +void cptvf_handle_mbox_intr(struct cpt_vf *cptvf); +void cvm_crypto_exit(void); +int cvm_crypto_init(struct cpt_vf *cptvf); +void vq_post_process(struct cpt_vf *cptvf, u32 qno); +void cptvf_write_vq_doorbell(struct cpt_vf *cptvf, u32 val); +#endif /* __CPTVF_H */ diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c new file mode 100644 index 000000000000..cc853f913d4b --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf_algs.c @@ -0,0 +1,444 @@ + +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cptvf.h" +#include "cptvf_algs.h" + +struct cpt_device_handle { + void *cdev[MAX_DEVICES]; + u32 dev_count; +}; + +static struct cpt_device_handle dev_handle; + +static void cvm_callback(u32 status, void *arg) +{ + struct crypto_async_request *req = (struct crypto_async_request *)arg; + + req->complete(req, !status); +} + +static inline void update_input_iv(struct cpt_request_info *req_info, + u8 *iv, u32 enc_iv_len, + u32 *argcnt) +{ + /* Setting the iv information */ + req_info->in[*argcnt].vptr = (void *)iv; + req_info->in[*argcnt].size = enc_iv_len; + req_info->req.dlen += enc_iv_len; + + ++(*argcnt); +} + +static inline void update_output_iv(struct cpt_request_info *req_info, + u8 *iv, u32 enc_iv_len, + u32 *argcnt) +{ + /* Setting the iv information */ + req_info->out[*argcnt].vptr = (void *)iv; + req_info->out[*argcnt].size = enc_iv_len; + req_info->rlen += enc_iv_len; + + ++(*argcnt); +} + +static inline void update_input_data(struct cpt_request_info *req_info, + struct scatterlist *inp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->req.dlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, inp_sg->length); + u8 *ptr = sg_virt(inp_sg); + + req_info->in[*argcnt].vptr = (void *)ptr; + req_info->in[*argcnt].size = len; + nbytes -= len; + + ++(*argcnt); + ++inp_sg; + } +} + +static inline void update_output_data(struct cpt_request_info *req_info, + struct scatterlist *outp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->rlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, outp_sg->length); + u8 *ptr = sg_virt(outp_sg); + + req_info->out[*argcnt].vptr = (void *)ptr; + req_info->out[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + ++outp_sg; + } +} + +static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc, + u32 cipher_type, u32 aes_key_type, + u32 *argcnt) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct cvm_enc_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req); + struct fc_context *fctx = &rctx->fctx; + u64 *offset_control = &rctx->control_word; + u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm); + struct cpt_request_info *req_info = &rctx->cpt_req; + u64 *ctrl_flags = NULL; + + req_info->ctrl.s.grp = 0; + req_info->ctrl.s.dma_mode = DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = SE_CORE_REQ; + + req_info->req.opcode.s.major = MAJOR_OP_FC | + DMA_MODE_FLAG(DMA_GATHER_SCATTER); + if (enc) + req_info->req.opcode.s.minor = 2; + else + req_info->req.opcode.s.minor = 3; + + req_info->req.param1 = req->nbytes; /* Encryption Data length */ + req_info->req.param2 = 0; /*Auth data length */ + + fctx->enc.enc_ctrl.e.enc_cipher = cipher_type; + fctx->enc.enc_ctrl.e.aes_key = aes_key_type; + fctx->enc.enc_ctrl.e.iv_source = FROM_DPTR; + + if (cipher_type == AES_XTS) + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2); + else + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len); + ctrl_flags = (u64 *)&fctx->enc.enc_ctrl.flags; + *ctrl_flags = cpu_to_be64(*ctrl_flags); + + *offset_control = cpu_to_be64(((u64)(enc_iv_len) << 16)); + /* Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)offset_control; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct fc_context); + req_info->req.dlen += sizeof(struct fc_context); + + ++(*argcnt); + + return 0; +} + +static inline u32 create_input_list(struct ablkcipher_request *req, u32 enc, + u32 cipher_type, u32 aes_key_type, + u32 enc_iv_len) +{ + struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req); + struct cpt_request_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + + create_ctx_hdr(req, enc, cipher_type, aes_key_type, &argcnt); + update_input_iv(req_info, req->info, enc_iv_len, &argcnt); + update_input_data(req_info, req->src, req->nbytes, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline void store_cb_info(struct ablkcipher_request *req, + struct cpt_request_info *req_info) +{ + req_info->callback = (void *)cvm_callback; + req_info->callback_arg = (void *)&req->base; +} + +static inline void create_output_list(struct ablkcipher_request *req, + u32 cipher_type, + u32 enc_iv_len) +{ + struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req); + struct cpt_request_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + + /* OUTPUT Buffer Processing + * AES encryption/decryption output would be + * received in the following format + * + * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----| + * [ 16 Bytes/ [ Request Enc/Dec/ DATA Len AES CBC ] + */ + /* Reading IV information */ + update_output_iv(req_info, req->info, enc_iv_len, &argcnt); + update_output_data(req_info, req->dst, req->nbytes, &argcnt); + req_info->outcnt = argcnt; +} + +static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc, + u32 cipher_type) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct cvm_enc_ctx *ctx = crypto_ablkcipher_ctx(tfm); + u32 key_type = AES_128_BIT; + struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req); + u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm); + struct fc_context *fctx = &rctx->fctx; + struct cpt_request_info *req_info = &rctx->cpt_req; + void *cdev = NULL; + int status; + + switch (ctx->key_len) { + case 16: + key_type = AES_128_BIT; + break; + case 24: + key_type = AES_192_BIT; + break; + case 32: + if (cipher_type == AES_XTS) + key_type = AES_128_BIT; + else + key_type = AES_256_BIT; + break; + case 64: + if (cipher_type == AES_XTS) + key_type = AES_256_BIT; + else + return -EINVAL; + break; + default: + return -EINVAL; + } + + if (cipher_type == DES3_CBC) + key_type = 0; + + memset(req_info, 0, sizeof(struct cpt_request_info)); + memset(fctx, 0, sizeof(struct fc_context)); + create_input_list(req, enc, cipher_type, key_type, enc_iv_len); + create_output_list(req, cipher_type, enc_iv_len); + store_cb_info(req, req_info); + cdev = dev_handle.cdev[smp_processor_id()]; + status = cptvf_do_request(cdev, req_info); + /* We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + + if (status) + return status; + else + return -EINPROGRESS; +} + +int cvm_des3_encrypt_cbc(struct ablkcipher_request *req) +{ + return cvm_enc_dec(req, true, DES3_CBC); +} + +int cvm_des3_decrypt_cbc(struct ablkcipher_request *req) +{ + return cvm_enc_dec(req, false, DES3_CBC); +} + +int cvm_aes_encrypt_xts(struct ablkcipher_request *req) +{ + return cvm_enc_dec(req, true, AES_XTS); +} + +int cvm_aes_decrypt_xts(struct ablkcipher_request *req) +{ + return cvm_enc_dec(req, false, AES_XTS); +} + +int cvm_aes_encrypt_cbc(struct ablkcipher_request *req) +{ + return cvm_enc_dec(req, true, AES_CBC); +} + +int cvm_aes_decrypt_cbc(struct ablkcipher_request *req) +{ + return cvm_enc_dec(req, false, AES_CBC); +} + +int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + u32 keylen) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + const u8 *key1 = key; + const u8 *key2 = key + (keylen / 2); + + err = xts_check_key(tfm, key, keylen); + if (err) + return err; + ctx->key_len = keylen; + memcpy(ctx->enc_key, key1, keylen / 2); + memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2); + + return 0; +} + +int cvm_enc_dec_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + u32 keylen) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm); + + if ((keylen == 16) || (keylen == 24) || (keylen == 32)) { + ctx->key_len = keylen; + memcpy(ctx->enc_key, key, keylen); + return 0; + } + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + + return -EINVAL; +} + +int cvm_enc_dec_init(struct crypto_tfm *tfm) +{ + struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm); + + memset(ctx, 0, sizeof(*ctx)); + tfm->crt_ablkcipher.reqsize = sizeof(struct cvm_req_ctx) + + sizeof(struct ablkcipher_request); + /* Additional memory for ablkcipher_request is + * allocated since the cryptd daemon uses + * this memory for request_ctx information + */ + + return 0; +} + +struct crypto_alg algs[] = { { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cvm_enc_ctx), + .cra_alignmask = 7, + .cra_priority = 4001, + .cra_name = "xts(aes)", + .cra_driver_name = "cavium-xts-aes", + .cra_type = &crypto_ablkcipher_type, + .cra_u = { + .ablkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = cvm_xts_setkey, + .encrypt = cvm_aes_encrypt_xts, + .decrypt = cvm_aes_decrypt_xts, + }, + }, + .cra_init = cvm_enc_dec_init, + .cra_module = THIS_MODULE, +}, { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cvm_enc_ctx), + .cra_alignmask = 7, + .cra_priority = 4001, + .cra_name = "cbc(aes)", + .cra_driver_name = "cavium-cbc-aes", + .cra_type = &crypto_ablkcipher_type, + .cra_u = { + .ablkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = cvm_enc_dec_setkey, + .encrypt = cvm_aes_encrypt_cbc, + .decrypt = cvm_aes_decrypt_cbc, + }, + }, + .cra_init = cvm_enc_dec_init, + .cra_module = THIS_MODULE, +}, { + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cvm_des3_ctx), + .cra_alignmask = 7, + .cra_priority = 4001, + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cavium-cbc-des3_ede", + .cra_type = &crypto_ablkcipher_type, + .cra_u = { + .ablkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = cvm_enc_dec_setkey, + .encrypt = cvm_des3_encrypt_cbc, + .decrypt = cvm_des3_decrypt_cbc, + }, + }, + .cra_init = cvm_enc_dec_init, + .cra_module = THIS_MODULE, +} }; + +static inline int cav_register_algs(void) +{ + int err = 0; + + err = crypto_register_algs(algs, ARRAY_SIZE(algs)); + if (err) + return err; + + return 0; +} + +static inline void cav_unregister_algs(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +int cvm_crypto_init(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + u32 dev_count; + + dev_count = dev_handle.dev_count; + dev_handle.cdev[dev_count] = cptvf; + dev_handle.dev_count++; + + if (dev_count == 3) { + if (cav_register_algs()) { + dev_err(&pdev->dev, "Error in registering crypto algorithms\n"); + return -EINVAL; + } + } + + return 0; +} + +void cvm_crypto_exit(void) +{ + u32 dev_count; + + dev_count = --dev_handle.dev_count; + if (!dev_count) + cav_unregister_algs(); +} diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.h b/drivers/crypto/cavium/cpt/cptvf_algs.h new file mode 100644 index 000000000000..a12050d11b0c --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf_algs.h @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef _CPTVF_ALGS_H_ +#define _CPTVF_ALGS_H_ + +#include "request_manager.h" + +#define MAX_DEVICES 16 +#define MAJOR_OP_FC 0x33 +#define MAX_ENC_KEY_SIZE 32 +#define MAX_HASH_KEY_SIZE 64 +#define MAX_KEY_SIZE (MAX_ENC_KEY_SIZE + MAX_HASH_KEY_SIZE) +#define CONTROL_WORD_LEN 8 +#define KEY2_OFFSET 48 + +#define DMA_MODE_FLAG(dma_mode) \ + (((dma_mode) == DMA_GATHER_SCATTER) ? (1 << 7) : 0) + +enum req_type { + AE_CORE_REQ, + SE_CORE_REQ, +}; + +enum cipher_type { + DES3_CBC = 0x1, + DES3_ECB = 0x2, + AES_CBC = 0x3, + AES_ECB = 0x4, + AES_CFB = 0x5, + AES_CTR = 0x6, + AES_GCM = 0x7, + AES_XTS = 0x8 +}; + +enum aes_type { + AES_128_BIT = 0x1, + AES_192_BIT = 0x2, + AES_256_BIT = 0x3 +}; + +union encr_ctrl { + u64 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 enc_cipher:4; + u64 reserved1:1; + u64 aes_key:2; + u64 iv_source:1; + u64 hash_type:4; + u64 reserved2:3; + u64 auth_input_type:1; + u64 mac_len:8; + u64 reserved3:8; + u64 encr_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 encr_offset:16; + u64 reserved3:8; + u64 mac_len:8; + u64 auth_input_type:1; + u64 reserved2:3; + u64 hash_type:4; + u64 iv_source:1; + u64 aes_key:2; + u64 reserved1:1; + u64 enc_cipher:4; +#endif + } e; +}; + +struct enc_context { + union encr_ctrl enc_ctrl; + u8 encr_key[32]; + u8 encr_iv[16]; +}; + +struct fchmac_context { + u8 ipad[64]; + u8 opad[64]; /* or OPAD */ +}; + +struct fc_context { + struct enc_context enc; + struct fchmac_context hmac; +}; + +struct cvm_enc_ctx { + u32 key_len; + u8 enc_key[MAX_KEY_SIZE]; +}; + +struct cvm_des3_ctx { + u32 key_len; + u8 des3_key[MAX_KEY_SIZE]; +}; + +struct cvm_req_ctx { + struct cpt_request_info cpt_req; + u64 control_word; + struct fc_context fctx; +}; + +int cptvf_do_request(void *cptvf, struct cpt_request_info *req); +#endif /*_CPTVF_ALGS_H_*/ diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c new file mode 100644 index 000000000000..527bdc3c2969 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf_main.c @@ -0,0 +1,936 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include +#include + +#include "cptvf.h" + +#define DRV_NAME "thunder-cptvf" +#define DRV_VERSION "1.0" + +struct cptvf_wqe { + struct tasklet_struct twork; + void *cptvf; + u32 qno; +}; + +struct cptvf_wqe_info { + struct cptvf_wqe vq_wqe[CPT_NUM_QS_PER_VF]; +}; + +static void vq_work_handler(unsigned long data) +{ + struct cptvf_wqe_info *cwqe_info = (struct cptvf_wqe_info *)data; + struct cptvf_wqe *cwqe = &cwqe_info->vq_wqe[0]; + + vq_post_process(cwqe->cptvf, cwqe->qno); +} + +static int init_worker_threads(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = kzalloc(sizeof(*cwqe_info), GFP_KERNEL); + if (!cwqe_info) + return -ENOMEM; + + if (cptvf->nr_queues) { + dev_info(&pdev->dev, "Creating VQ worker threads (%d)\n", + cptvf->nr_queues); + } + + for (i = 0; i < cptvf->nr_queues; i++) { + tasklet_init(&cwqe_info->vq_wqe[i].twork, vq_work_handler, + (u64)cwqe_info); + cwqe_info->vq_wqe[i].qno = i; + cwqe_info->vq_wqe[i].cptvf = cptvf; + } + + cptvf->wqe_info = cwqe_info; + + return 0; +} + +static void cleanup_worker_threads(struct cpt_vf *cptvf) +{ + struct cptvf_wqe_info *cwqe_info; + struct pci_dev *pdev = cptvf->pdev; + int i; + + cwqe_info = (struct cptvf_wqe_info *)cptvf->wqe_info; + if (!cwqe_info) + return; + + if (cptvf->nr_queues) { + dev_info(&pdev->dev, "Cleaning VQ worker threads (%u)\n", + cptvf->nr_queues); + } + + for (i = 0; i < cptvf->nr_queues; i++) + tasklet_kill(&cwqe_info->vq_wqe[i].twork); + + kzfree(cwqe_info); + cptvf->wqe_info = NULL; +} + +static void free_pending_queues(struct pending_qinfo *pqinfo) +{ + int i; + struct pending_queue *queue; + + for_each_pending_queue(pqinfo, queue, i) { + if (!queue->head) + continue; + + /* free single queue */ + kzfree((queue->head)); + + queue->front = 0; + queue->rear = 0; + + return; + } + + pqinfo->qlen = 0; + pqinfo->nr_queues = 0; +} + +static int alloc_pending_queues(struct pending_qinfo *pqinfo, u32 qlen, + u32 nr_queues) +{ + u32 i; + size_t size; + int ret; + struct pending_queue *queue = NULL; + + pqinfo->nr_queues = nr_queues; + pqinfo->qlen = qlen; + + size = (qlen * sizeof(struct pending_entry)); + + for_each_pending_queue(pqinfo, queue, i) { + queue->head = kzalloc((size), GFP_KERNEL); + if (!queue->head) { + ret = -ENOMEM; + goto pending_qfail; + } + + queue->front = 0; + queue->rear = 0; + atomic64_set((&queue->pending_count), (0)); + + /* init queue spin lock */ + spin_lock_init(&queue->lock); + } + + return 0; + +pending_qfail: + free_pending_queues(pqinfo); + + return ret; +} + +static int init_pending_queues(struct cpt_vf *cptvf, u32 qlen, u32 nr_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + if (!nr_queues) + return 0; + + ret = alloc_pending_queues(&cptvf->pqinfo, qlen, nr_queues); + if (ret) { + dev_err(&pdev->dev, "failed to setup pending queues (%u)\n", + nr_queues); + return ret; + } + + return 0; +} + +static void cleanup_pending_queues(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->nr_queues) + return; + + dev_info(&pdev->dev, "Cleaning VQ pending queue (%u)\n", + cptvf->nr_queues); + free_pending_queues(&cptvf->pqinfo); +} + +static void free_command_queues(struct cpt_vf *cptvf, + struct command_qinfo *cqinfo) +{ + int i; + struct command_queue *queue = NULL; + struct command_chunk *chunk = NULL; + struct pci_dev *pdev = cptvf->pdev; + struct hlist_node *node; + + /* clean up for each queue */ + for (i = 0; i < cptvf->nr_queues; i++) { + queue = &cqinfo->queue[i]; + if (hlist_empty(&cqinfo->queue[i].chead)) + continue; + + hlist_for_each_entry_safe(chunk, node, &cqinfo->queue[i].chead, + nextchunk) { + dma_free_coherent(&pdev->dev, chunk->size, + chunk->head, + chunk->dma_addr); + chunk->head = NULL; + chunk->dma_addr = 0; + hlist_del(&chunk->nextchunk); + kzfree(chunk); + } + + queue->nchunks = 0; + queue->idx = 0; + } + + /* common cleanup */ + cqinfo->cmd_size = 0; +} + +static int alloc_command_queues(struct cpt_vf *cptvf, + struct command_qinfo *cqinfo, size_t cmd_size, + u32 qlen) +{ + int i; + size_t q_size; + struct command_queue *queue = NULL; + struct pci_dev *pdev = cptvf->pdev; + + /* common init */ + cqinfo->cmd_size = cmd_size; + /* Qsize in dwords, needed for SADDR config, 1-next chunk pointer */ + cptvf->qsize = min(qlen, cqinfo->qchunksize) * + CPT_NEXT_CHUNK_PTR_SIZE + 1; + /* Qsize in bytes to create space for alignment */ + q_size = qlen * cqinfo->cmd_size; + + /* per queue initialization */ + for (i = 0; i < cptvf->nr_queues; i++) { + size_t c_size = 0; + size_t rem_q_size = q_size; + struct command_chunk *curr = NULL, *first = NULL, *last = NULL; + u32 qcsize_bytes = cqinfo->qchunksize * cqinfo->cmd_size; + + queue = &cqinfo->queue[i]; + INIT_HLIST_HEAD(&cqinfo->queue[i].chead); + do { + curr = kzalloc(sizeof(*curr), GFP_KERNEL); + if (!curr) + goto cmd_qfail; + + c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes : + rem_q_size; + curr->head = (u8 *)dma_zalloc_coherent(&pdev->dev, + c_size + CPT_NEXT_CHUNK_PTR_SIZE, + &curr->dma_addr, GFP_KERNEL); + if (!curr->head) { + dev_err(&pdev->dev, "Command Q (%d) chunk (%d) allocation failed\n", + i, queue->nchunks); + goto cmd_qfail; + } + + curr->size = c_size; + if (queue->nchunks == 0) { + hlist_add_head(&curr->nextchunk, + &cqinfo->queue[i].chead); + first = curr; + } else { + hlist_add_behind(&curr->nextchunk, + &last->nextchunk); + } + + queue->nchunks++; + rem_q_size -= c_size; + if (last) + *((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr; + + last = curr; + } while (rem_q_size); + + /* Make the queue circular */ + /* Tie back last chunk entry to head */ + curr = first; + *((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr; + queue->qhead = curr; + spin_lock_init(&queue->lock); + } + return 0; + +cmd_qfail: + free_command_queues(cptvf, cqinfo); + return -ENOMEM; +} + +static int init_command_queues(struct cpt_vf *cptvf, u32 qlen) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + /* setup AE command queues */ + ret = alloc_command_queues(cptvf, &cptvf->cqinfo, CPT_INST_SIZE, + qlen); + if (ret) { + dev_err(&pdev->dev, "failed to allocate AE command queues (%u)\n", + cptvf->nr_queues); + return ret; + } + + return ret; +} + +static void cleanup_command_queues(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->nr_queues) + return; + + dev_info(&pdev->dev, "Cleaning VQ command queue (%u)\n", + cptvf->nr_queues); + free_command_queues(cptvf, &cptvf->cqinfo); +} + +static void cptvf_sw_cleanup(struct cpt_vf *cptvf) +{ + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + cleanup_command_queues(cptvf); +} + +static int cptvf_sw_init(struct cpt_vf *cptvf, u32 qlen, u32 nr_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret = 0; + u32 max_dev_queues = 0; + + max_dev_queues = CPT_NUM_QS_PER_VF; + /* possible cpus */ + nr_queues = min_t(u32, nr_queues, max_dev_queues); + cptvf->nr_queues = nr_queues; + + ret = init_command_queues(cptvf, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to setup command queues (%u)\n", + nr_queues); + return ret; + } + + ret = init_pending_queues(cptvf, qlen, nr_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + nr_queues); + goto setup_pqfail; + } + + /* Create worker threads for BH processing */ + ret = init_worker_threads(cptvf); + if (ret) { + dev_err(&pdev->dev, "Failed to setup worker threads\n"); + goto init_work_fail; + } + + return 0; + +init_work_fail: + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + +setup_pqfail: + cleanup_command_queues(cptvf); + + return ret; +} + +static void cptvf_disable_msix(struct cpt_vf *cptvf) +{ + if (cptvf->msix_enabled) { + pci_disable_msix(cptvf->pdev); + cptvf->msix_enabled = 0; + } +} + +static int cptvf_enable_msix(struct cpt_vf *cptvf) +{ + int i, ret; + + for (i = 0; i < CPT_VF_MSIX_VECTORS; i++) + cptvf->msix_entries[i].entry = i; + + ret = pci_enable_msix(cptvf->pdev, cptvf->msix_entries, + CPT_VF_MSIX_VECTORS); + if (ret) { + dev_err(&cptvf->pdev->dev, "Request for #%d msix vectors failed\n", + CPT_VF_MSIX_VECTORS); + return ret; + } + + cptvf->msix_enabled = 1; + /* Mark MSIX enabled */ + cptvf->flags |= CPT_FLAG_MSIX_ENABLED; + + return 0; +} + +static void cptvf_free_all_interrupts(struct cpt_vf *cptvf) +{ + int irq; + + for (irq = 0; irq < CPT_VF_MSIX_VECTORS; irq++) { + if (cptvf->irq_allocated[irq]) + irq_set_affinity_hint(cptvf->msix_entries[irq].vector, + NULL); + free_cpumask_var(cptvf->affinity_mask[irq]); + free_irq(cptvf->msix_entries[irq].vector, cptvf); + cptvf->irq_allocated[irq] = false; + } +} + +static void cptvf_write_vq_ctl(struct cpt_vf *cptvf, bool val) +{ + union cptx_vqx_ctl vqx_ctl; + + vqx_ctl.u = cpt_read_csr64(cptvf->reg_base, CPTX_VQX_CTL(0, 0)); + vqx_ctl.s.ena = val; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_CTL(0, 0), vqx_ctl.u); +} + +void cptvf_write_vq_doorbell(struct cpt_vf *cptvf, u32 val) +{ + union cptx_vqx_doorbell vqx_dbell; + + vqx_dbell.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_DOORBELL(0, 0)); + vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */ + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DOORBELL(0, 0), + vqx_dbell.u); +} + +static void cptvf_write_vq_inprog(struct cpt_vf *cptvf, u8 val) +{ + union cptx_vqx_inprog vqx_inprg; + + vqx_inprg.u = cpt_read_csr64(cptvf->reg_base, CPTX_VQX_INPROG(0, 0)); + vqx_inprg.s.inflight = val; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_INPROG(0, 0), vqx_inprg.u); +} + +static void cptvf_write_vq_done_numwait(struct cpt_vf *cptvf, u32 val) +{ + union cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_DONE_WAIT(0, 0)); + vqx_dwait.s.num_wait = val; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_WAIT(0, 0), + vqx_dwait.u); +} + +static void cptvf_write_vq_done_timewait(struct cpt_vf *cptvf, u16 time) +{ + union cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_DONE_WAIT(0, 0)); + vqx_dwait.s.time_wait = time; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_WAIT(0, 0), + vqx_dwait.u); +} + +static void cptvf_enable_swerr_interrupts(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_ENA_W1S(0, 0)); + /* Set mbox(0) interupts for the requested vf */ + vqx_misc_ena.s.swerr = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_ENA_W1S(0, 0), + vqx_misc_ena.u); +} + +static void cptvf_enable_mbox_interrupts(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_ENA_W1S(0, 0)); + /* Set mbox(0) interupts for the requested vf */ + vqx_misc_ena.s.mbox = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_ENA_W1S(0, 0), + vqx_misc_ena.u); +} + +static void cptvf_enable_done_interrupts(struct cpt_vf *cptvf) +{ + union cptx_vqx_done_ena_w1s vqx_done_ena; + + vqx_done_ena.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_DONE_ENA_W1S(0, 0)); + /* Set DONE interrupt for the requested vf */ + vqx_done_ena.s.done = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_ENA_W1S(0, 0), + vqx_done_ena.u); +} + +static void cptvf_clear_dovf_intr(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0)); + /* W1C for the VF */ + vqx_misc_int.s.dovf = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0), + vqx_misc_int.u); +} + +static void cptvf_clear_irde_intr(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0)); + /* W1C for the VF */ + vqx_misc_int.s.irde = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0), + vqx_misc_int.u); +} + +static void cptvf_clear_nwrp_intr(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0)); + /* W1C for the VF */ + vqx_misc_int.s.nwrp = 1; + cpt_write_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0), vqx_misc_int.u); +} + +static void cptvf_clear_mbox_intr(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0)); + /* W1C for the VF */ + vqx_misc_int.s.mbox = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0), + vqx_misc_int.u); +} + +static void cptvf_clear_swerr_intr(struct cpt_vf *cptvf) +{ + union cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_MISC_INT(0, 0)); + /* W1C for the VF */ + vqx_misc_int.s.swerr = 1; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0), + vqx_misc_int.u); +} + +static u64 cptvf_read_vf_misc_intr_status(struct cpt_vf *cptvf) +{ + return cpt_read_csr64(cptvf->reg_base, CPTX_VQX_MISC_INT(0, 0)); +} + +static irqreturn_t cptvf_misc_intr_handler(int irq, void *cptvf_irq) +{ + struct cpt_vf *cptvf = (struct cpt_vf *)cptvf_irq; + struct pci_dev *pdev = cptvf->pdev; + u64 intr; + + intr = cptvf_read_vf_misc_intr_status(cptvf); + /*Check for MISC interrupt types*/ + if (likely(intr & CPT_VF_INTR_MBOX_MASK)) { + dev_err(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + cptvf_handle_mbox_intr(cptvf); + cptvf_clear_mbox_intr(cptvf); + } else if (unlikely(intr & CPT_VF_INTR_DOVF_MASK)) { + cptvf_clear_dovf_intr(cptvf); + /*Clear doorbell count*/ + cptvf_write_vq_doorbell(cptvf, 0); + dev_err(&pdev->dev, "Doorbell overflow error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & CPT_VF_INTR_IRDE_MASK)) { + cptvf_clear_irde_intr(cptvf); + dev_err(&pdev->dev, "Instruction NCB read error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & CPT_VF_INTR_NWRP_MASK)) { + cptvf_clear_nwrp_intr(cptvf); + dev_err(&pdev->dev, "NCB response write error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & CPT_VF_INTR_SERR_MASK)) { + cptvf_clear_swerr_intr(cptvf); + dev_err(&pdev->dev, "Software error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else { + dev_err(&pdev->dev, "Unhandled interrupt in CPT VF %d\n", + cptvf->vfid); + } + + return IRQ_HANDLED; +} + +static inline struct cptvf_wqe *get_cptvf_vq_wqe(struct cpt_vf *cptvf, + int qno) +{ + struct cptvf_wqe_info *nwqe_info; + + if (unlikely(qno >= cptvf->nr_queues)) + return NULL; + nwqe_info = (struct cptvf_wqe_info *)cptvf->wqe_info; + + return &nwqe_info->vq_wqe[qno]; +} + +static inline u32 cptvf_read_vq_done_count(struct cpt_vf *cptvf) +{ + union cptx_vqx_done vqx_done; + + vqx_done.u = cpt_read_csr64(cptvf->reg_base, CPTX_VQX_DONE(0, 0)); + return vqx_done.s.done; +} + +static inline void cptvf_write_vq_done_ack(struct cpt_vf *cptvf, + u32 ackcnt) +{ + union cptx_vqx_done_ack vqx_dack_cnt; + + vqx_dack_cnt.u = cpt_read_csr64(cptvf->reg_base, + CPTX_VQX_DONE_ACK(0, 0)); + vqx_dack_cnt.s.done_ack = ackcnt; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_DONE_ACK(0, 0), + vqx_dack_cnt.u); +} + +static irqreturn_t cptvf_done_intr_handler(int irq, void *cptvf_irq) +{ + struct cpt_vf *cptvf = (struct cpt_vf *)cptvf_irq; + struct pci_dev *pdev = cptvf->pdev; + /* Read the number of completions */ + u32 intr = cptvf_read_vq_done_count(cptvf); + + if (intr) { + struct cptvf_wqe *wqe; + + /* Acknowledge the number of + * scheduled completions for processing + */ + cptvf_write_vq_done_ack(cptvf, intr); + wqe = get_cptvf_vq_wqe(cptvf, 0); + if (unlikely(!wqe)) { + dev_err(&pdev->dev, "No work to schedule for VF (%d)", + cptvf->vfid); + return IRQ_NONE; + } + tasklet_hi_schedule(&wqe->twork); + } + + return IRQ_HANDLED; +} + +static int cptvf_register_misc_intr(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + /* Register misc interrupt handlers */ + ret = request_irq(cptvf->msix_entries[CPT_VF_INT_VEC_E_MISC].vector, + cptvf_misc_intr_handler, 0, "CPT VF misc intr", + cptvf); + if (ret) + goto fail; + + cptvf->irq_allocated[CPT_VF_INT_VEC_E_MISC] = true; + + /* Enable mailbox interrupt */ + cptvf_enable_mbox_interrupts(cptvf); + cptvf_enable_swerr_interrupts(cptvf); + + return 0; + +fail: + dev_err(&pdev->dev, "Request misc irq failed"); + cptvf_free_all_interrupts(cptvf); + return ret; +} + +static int cptvf_register_done_intr(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + /* Register DONE interrupt handlers */ + ret = request_irq(cptvf->msix_entries[CPT_VF_INT_VEC_E_DONE].vector, + cptvf_done_intr_handler, 0, "CPT VF done intr", + cptvf); + if (ret) + goto fail; + + cptvf->irq_allocated[CPT_VF_INT_VEC_E_DONE] = true; + + /* Enable mailbox interrupt */ + cptvf_enable_done_interrupts(cptvf); + return 0; + +fail: + dev_err(&pdev->dev, "Request done irq failed\n"); + cptvf_free_all_interrupts(cptvf); + return ret; +} + +static void cptvf_unregister_interrupts(struct cpt_vf *cptvf) +{ + cptvf_free_all_interrupts(cptvf); + cptvf_disable_msix(cptvf); +} + +static void cptvf_set_irq_affinity(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + int vec, cpu; + int irqnum; + + for (vec = 0; vec < CPT_VF_MSIX_VECTORS; vec++) { + if (!cptvf->irq_allocated[vec]) + continue; + + if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec], + GFP_KERNEL)) { + dev_err(&pdev->dev, "Allocation failed for affinity_mask for VF %d", + cptvf->vfid); + return; + } + + cpu = cptvf->vfid % num_online_cpus(); + cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node), + cptvf->affinity_mask[vec]); + irqnum = cptvf->msix_entries[vec].vector; + irq_set_affinity_hint(irqnum, cptvf->affinity_mask[vec]); + } +} + +static void cptvf_write_vq_saddr(struct cpt_vf *cptvf, u64 val) +{ + union cptx_vqx_saddr vqx_saddr; + + vqx_saddr.u = val; + cpt_write_csr64(cptvf->reg_base, CPTX_VQX_SADDR(0, 0), vqx_saddr.u); +} + +void cptvf_device_init(struct cpt_vf *cptvf) +{ + u64 base_addr = 0; + + /* Disable the VQ */ + cptvf_write_vq_ctl(cptvf, 0); + /* Reset the doorbell */ + cptvf_write_vq_doorbell(cptvf, 0); + /* Clear inflight */ + cptvf_write_vq_inprog(cptvf, 0); + /* Write VQ SADDR */ + /* TODO: for now only one queue, so hard coded */ + base_addr = (u64)(cptvf->cqinfo.queue[0].qhead->dma_addr); + cptvf_write_vq_saddr(cptvf, base_addr); + /* Configure timerhold / coalescence */ + cptvf_write_vq_done_timewait(cptvf, CPT_TIMER_THOLD); + cptvf_write_vq_done_numwait(cptvf, 1); + /* Enable the VQ */ + cptvf_write_vq_ctl(cptvf, 1); + /* Flag the VF ready */ + cptvf->flags |= CPT_FLAG_DEVICE_READY; +} + +static int cptvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct cpt_vf *cptvf; + int err; + + cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL); + if (!cptvf) + return -ENOMEM; + + pci_set_drvdata(pdev, cptvf); + cptvf->pdev = pdev; + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + pci_set_drvdata(pdev, NULL); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto cptvf_err_disable_device; + } + /* Mark as VF driver */ + cptvf->flags |= CPT_FLAG_VF_DRIVER; + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto cptvf_err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto cptvf_err_release_regions; + } + + /* MAP PF's configuration registers */ + cptvf->reg_base = pcim_iomap(pdev, 0, 0); + if (!cptvf->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto cptvf_err_release_regions; + } + + cptvf->node = dev_to_node(&pdev->dev); + /* Enable MSI-X */ + err = cptvf_enable_msix(cptvf); + if (err) { + dev_err(dev, "cptvf_enable_msix() failed"); + goto cptvf_err_release_regions; + } + + /* Register mailbox interrupts */ + cptvf_register_misc_intr(cptvf); + + /* Check ready with PF */ + /* Gets chip ID / device Id from PF if ready */ + err = cptvf_check_pf_ready(cptvf); + if (err) { + dev_err(dev, "PF not responding to READY msg"); + goto cptvf_err_release_regions; + } + + /* CPT VF software resources initialization */ + cptvf->cqinfo.qchunksize = CPT_CMD_QCHUNK_SIZE; + err = cptvf_sw_init(cptvf, CPT_CMD_QLEN, CPT_NUM_QS_PER_VF); + if (err) { + dev_err(dev, "cptvf_sw_init() failed"); + goto cptvf_err_release_regions; + } + /* Convey VQ LEN to PF */ + err = cptvf_send_vq_size_msg(cptvf); + if (err) { + dev_err(dev, "PF not responding to QLEN msg"); + goto cptvf_err_release_regions; + } + + /* CPT VF device initialization */ + cptvf_device_init(cptvf); + /* Send msg to PF to assign currnet Q to required group */ + cptvf->vfgrp = 1; + err = cptvf_send_vf_to_grp_msg(cptvf); + if (err) { + dev_err(dev, "PF not responding to VF_GRP msg"); + goto cptvf_err_release_regions; + } + + cptvf->priority = 1; + err = cptvf_send_vf_priority_msg(cptvf); + if (err) { + dev_err(dev, "PF not responding to VF_PRIO msg"); + goto cptvf_err_release_regions; + } + /* Register DONE interrupts */ + err = cptvf_register_done_intr(cptvf); + if (err) + goto cptvf_err_release_regions; + + /* Set irq affinity masks */ + cptvf_set_irq_affinity(cptvf); + /* Convey UP to PF */ + err = cptvf_send_vf_up(cptvf); + if (err) { + dev_err(dev, "PF not responding to UP msg"); + goto cptvf_up_fail; + } + err = cvm_crypto_init(cptvf); + if (err) { + dev_err(dev, "Algorithm register failed\n"); + goto cptvf_up_fail; + } + return 0; + +cptvf_up_fail: + cptvf_unregister_interrupts(cptvf); +cptvf_err_release_regions: + pci_release_regions(pdev); +cptvf_err_disable_device: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void cptvf_remove(struct pci_dev *pdev) +{ + struct cpt_vf *cptvf = pci_get_drvdata(pdev); + + if (!cptvf) + dev_err(&pdev->dev, "Invalid CPT-VF device\n"); + + /* Convey DOWN to PF */ + if (cptvf_send_vf_down(cptvf)) { + dev_err(&pdev->dev, "PF not responding to DOWN msg"); + } else { + cptvf_unregister_interrupts(cptvf); + cptvf_sw_cleanup(cptvf); + pci_set_drvdata(pdev, NULL); + pci_release_regions(pdev); + pci_disable_device(pdev); + cvm_crypto_exit(); + } +} + +static void cptvf_shutdown(struct pci_dev *pdev) +{ + cptvf_remove(pdev); +} + +/* Supported devices */ +static const struct pci_device_id cptvf_id_table[] = { + {PCI_VDEVICE(CAVIUM, CPT_81XX_PCI_VF_DEVICE_ID), 0}, + { 0, } /* end of table */ +}; + +static struct pci_driver cptvf_pci_driver = { + .name = DRV_NAME, + .id_table = cptvf_id_table, + .probe = cptvf_probe, + .remove = cptvf_remove, + .shutdown = cptvf_shutdown, +}; + +module_pci_driver(cptvf_pci_driver); + +MODULE_AUTHOR("George Cherian "); +MODULE_DESCRIPTION("Cavium Thunder CPT Virtual Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, cptvf_id_table); diff --git a/drivers/crypto/cavium/cpt/cptvf_mbox.c b/drivers/crypto/cavium/cpt/cptvf_mbox.c new file mode 100644 index 000000000000..d5ec3b8a9e61 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf_mbox.c @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include "cptvf.h" + +static void cptvf_send_msg_to_pf(struct cpt_vf *cptvf, struct cpt_mbox *mbx) +{ + /* Writing mbox(1) causes interrupt */ + cpt_write_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 0), + mbx->msg); + cpt_write_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 1), + mbx->data); +} + +/* ACKs PF's mailbox message + */ +void cptvf_mbox_send_ack(struct cpt_vf *cptvf, struct cpt_mbox *mbx) +{ + mbx->msg = CPT_MBOX_MSG_TYPE_ACK; + cptvf_send_msg_to_pf(cptvf, mbx); +} + +/* NACKs PF's mailbox message that VF is not able to + * complete the action + */ +void cptvf_mbox_send_nack(struct cpt_vf *cptvf, struct cpt_mbox *mbx) +{ + mbx->msg = CPT_MBOX_MSG_TYPE_NACK; + cptvf_send_msg_to_pf(cptvf, mbx); +} + +/* Interrupt handler to handle mailbox messages from VFs */ +void cptvf_handle_mbox_intr(struct cpt_vf *cptvf) +{ + struct cpt_mbox mbx = {}; + + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = cpt_read_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 0)); + mbx.data = cpt_read_csr64(cptvf->reg_base, CPTX_VFX_PF_MBOXX(0, 0, 1)); + dev_dbg(&cptvf->pdev->dev, "%s: Mailbox msg 0x%llx from PF\n", + __func__, mbx.msg); + switch (mbx.msg) { + case CPT_MSG_READY: + { + cptvf->pf_acked = true; + cptvf->vfid = mbx.data; + dev_dbg(&cptvf->pdev->dev, "Received VFID %d\n", cptvf->vfid); + break; + } + case CPT_MSG_QBIND_GRP: + cptvf->pf_acked = true; + cptvf->vftype = mbx.data; + dev_dbg(&cptvf->pdev->dev, "VF %d type %s group %d\n", + cptvf->vfid, ((mbx.data == SE_TYPES) ? "SE" : "AE"), + cptvf->vfgrp); + break; + case CPT_MBOX_MSG_TYPE_ACK: + cptvf->pf_acked = true; + break; + case CPT_MBOX_MSG_TYPE_NACK: + cptvf->pf_nacked = true; + break; + default: + dev_err(&cptvf->pdev->dev, "Invalid msg from PF, msg 0x%llx\n", + mbx.msg); + break; + } +} + +static int cptvf_send_msg_to_pf_timeout(struct cpt_vf *cptvf, + struct cpt_mbox *mbx) +{ + int timeout = CPT_MBOX_MSG_TIMEOUT; + int sleep = 10; + + cptvf->pf_acked = false; + cptvf->pf_nacked = false; + cptvf_send_msg_to_pf(cptvf, mbx); + /* Wait for previous message to be acked, timeout 2sec */ + while (!cptvf->pf_acked) { + if (cptvf->pf_nacked) + return -EINVAL; + msleep(sleep); + if (cptvf->pf_acked) + break; + timeout -= sleep; + if (!timeout) { + dev_err(&cptvf->pdev->dev, "PF didn't ack to mbox msg %llx from VF%u\n", + (mbx->msg & 0xFF), cptvf->vfid); + return -EBUSY; + } + } + + return 0; +} + +/* + * Checks if VF is able to comminicate with PF + * and also gets the CPT number this VF is associated to. + */ +int cptvf_check_pf_ready(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_READY; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to READY msg\n"); + return -EBUSY; + } + + return 0; +} + +/* + * Communicate VQs size to PF to program CPT(0)_PF_Q(0-15)_CTL of the VF. + * Must be ACKed. + */ +int cptvf_send_vq_size_msg(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_QLEN; + mbx.data = cptvf->qsize; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to vq_size msg\n"); + return -EBUSY; + } + + return 0; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int cptvf_send_vf_to_grp_msg(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_QBIND_GRP; + /* Convey group of the VF */ + mbx.data = cptvf->vfgrp; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to vf_type msg\n"); + return -EBUSY; + } + + return 0; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int cptvf_send_vf_priority_msg(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_VQ_PRIORITY; + /* Convey group of the VF */ + mbx.data = cptvf->priority; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to vf_type msg\n"); + return -EBUSY; + } + return 0; +} + +/* + * Communicate to PF that VF is UP and running + */ +int cptvf_send_vf_up(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_VF_UP; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to UP msg\n"); + return -EBUSY; + } + + return 0; +} + +/* + * Communicate to PF that VF is DOWN and running + */ +int cptvf_send_vf_down(struct cpt_vf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct cpt_mbox mbx = {}; + + mbx.msg = CPT_MSG_VF_DOWN; + if (cptvf_send_msg_to_pf_timeout(cptvf, &mbx)) { + dev_err(&pdev->dev, "PF didn't respond to DOWN msg\n"); + return -EBUSY; + } + + return 0; +} diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c new file mode 100644 index 000000000000..7f57f30f8863 --- /dev/null +++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c @@ -0,0 +1,593 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include "cptvf.h" +#include "request_manager.h" + +/** + * get_free_pending_entry - get free entry from pending queue + * @param pqinfo: pending_qinfo structure + * @param qno: queue number + */ +static struct pending_entry *get_free_pending_entry(struct pending_queue *q, + int qlen) +{ + struct pending_entry *ent = NULL; + + ent = &q->head[q->rear]; + if (unlikely(ent->busy)) { + ent = NULL; + goto no_free_entry; + } + + q->rear++; + if (unlikely(q->rear == qlen)) + q->rear = 0; + +no_free_entry: + return ent; +} + +static inline void pending_queue_inc_front(struct pending_qinfo *pqinfo, + int qno) +{ + struct pending_queue *queue = &pqinfo->queue[qno]; + + queue->front++; + if (unlikely(queue->front == pqinfo->qlen)) + queue->front = 0; +} + +static int setup_sgio_components(struct cpt_vf *cptvf, struct buf_ptr *list, + int buf_count, u8 *buffer) +{ + int ret = 0, i, j; + int components; + struct sglist_component *sg_ptr = NULL; + struct pci_dev *pdev = cptvf->pdev; + + if (unlikely(!list)) { + dev_err(&pdev->dev, "Input List pointer is NULL\n"); + return -EFAULT; + } + + for (i = 0; i < buf_count; i++) { + if (likely(list[i].vptr)) { + list[i].dma_addr = dma_map_single(&pdev->dev, + list[i].vptr, + list[i].size, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, + list[i].dma_addr))) { + dev_err(&pdev->dev, "DMA map kernel buffer failed for component: %d\n", + i); + ret = -EIO; + goto sg_cleanup; + } + } + } + + components = buf_count / 4; + sg_ptr = (struct sglist_component *)buffer; + for (i = 0; i < components; i++) { + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr); + sg_ptr++; + } + + components = buf_count % 4; + + switch (components) { + case 3: + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + /* Fall through */ + case 2: + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + /* Fall through */ + case 1: + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + break; + default: + break; + } + + return ret; + +sg_cleanup: + for (j = 0; j < i; j++) { + if (list[j].dma_addr) { + dma_unmap_single(&pdev->dev, list[i].dma_addr, + list[i].size, DMA_BIDIRECTIONAL); + } + + list[j].dma_addr = 0; + } + + return ret; +} + +static inline int setup_sgio_list(struct cpt_vf *cptvf, + struct cpt_info_buffer *info, + struct cpt_request_info *req) +{ + u16 g_sz_bytes = 0, s_sz_bytes = 0; + int ret = 0; + struct pci_dev *pdev = cptvf->pdev; + + if (req->incnt > MAX_SG_IN_CNT || req->outcnt > MAX_SG_OUT_CNT) { + dev_err(&pdev->dev, "Request SG components are higher than supported\n"); + ret = -EINVAL; + goto scatter_gather_clean; + } + + /* Setup gather (input) components */ + g_sz_bytes = ((req->incnt + 3) / 4) * sizeof(struct sglist_component); + info->gather_components = kzalloc(g_sz_bytes, GFP_KERNEL); + if (!info->gather_components) { + ret = -ENOMEM; + goto scatter_gather_clean; + } + + ret = setup_sgio_components(cptvf, req->in, + req->incnt, + info->gather_components); + if (ret) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + ret = -EFAULT; + goto scatter_gather_clean; + } + + /* Setup scatter (output) components */ + s_sz_bytes = ((req->outcnt + 3) / 4) * sizeof(struct sglist_component); + info->scatter_components = kzalloc(s_sz_bytes, GFP_KERNEL); + if (!info->scatter_components) { + ret = -ENOMEM; + goto scatter_gather_clean; + } + + ret = setup_sgio_components(cptvf, req->out, + req->outcnt, + info->scatter_components); + if (ret) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + ret = -EFAULT; + goto scatter_gather_clean; + } + + /* Create and initialize DPTR */ + info->dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; + info->in_buffer = kzalloc(info->dlen, GFP_KERNEL); + if (!info->in_buffer) { + ret = -ENOMEM; + goto scatter_gather_clean; + } + + ((u16 *)info->in_buffer)[0] = req->outcnt; + ((u16 *)info->in_buffer)[1] = req->incnt; + ((u16 *)info->in_buffer)[2] = 0; + ((u16 *)info->in_buffer)[3] = 0; + *(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer); + + memcpy(&info->in_buffer[8], info->gather_components, + g_sz_bytes); + memcpy(&info->in_buffer[8 + g_sz_bytes], + info->scatter_components, s_sz_bytes); + + info->dptr_baddr = dma_map_single(&pdev->dev, + (void *)info->in_buffer, + info->dlen, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&pdev->dev, info->dptr_baddr)) { + dev_err(&pdev->dev, "Mapping DPTR Failed %d\n", info->dlen); + ret = -EIO; + goto scatter_gather_clean; + } + + /* Create and initialize RPTR */ + info->out_buffer = kzalloc(COMPLETION_CODE_SIZE, GFP_KERNEL); + if (!info->out_buffer) { + ret = -ENOMEM; + goto scatter_gather_clean; + } + + *((u64 *)info->out_buffer) = ~((u64)COMPLETION_CODE_INIT); + info->alternate_caddr = (u64 *)info->out_buffer; + info->rptr_baddr = dma_map_single(&pdev->dev, + (void *)info->out_buffer, + COMPLETION_CODE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&pdev->dev, info->rptr_baddr)) { + dev_err(&pdev->dev, "Mapping RPTR Failed %d\n", + COMPLETION_CODE_SIZE); + ret = -EIO; + goto scatter_gather_clean; + } + + return 0; + +scatter_gather_clean: + return ret; +} + +int send_cpt_command(struct cpt_vf *cptvf, union cpt_inst_s *cmd, + u32 qno) +{ + struct pci_dev *pdev = cptvf->pdev; + struct command_qinfo *qinfo = NULL; + struct command_queue *queue; + struct command_chunk *chunk; + u8 *ent; + int ret = 0; + + if (unlikely(qno >= cptvf->nr_queues)) { + dev_err(&pdev->dev, "Invalid queue (qno: %d, nr_queues: %d)\n", + qno, cptvf->nr_queues); + return -EINVAL; + } + + qinfo = &cptvf->cqinfo; + queue = &qinfo->queue[qno]; + /* lock commad queue */ + spin_lock(&queue->lock); + ent = &queue->qhead->head[queue->idx * qinfo->cmd_size]; + memcpy(ent, (void *)cmd, qinfo->cmd_size); + + if (++queue->idx >= queue->qhead->size / 64) { + struct hlist_node *node; + + hlist_for_each(node, &queue->chead) { + chunk = hlist_entry(node, struct command_chunk, + nextchunk); + if (chunk == queue->qhead) { + continue; + } else { + queue->qhead = chunk; + break; + } + } + queue->idx = 0; + } + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + cptvf_write_vq_doorbell(cptvf, 1); + /* unlock command queue */ + spin_unlock(&queue->lock); + + return ret; +} + +void do_request_cleanup(struct cpt_vf *cptvf, + struct cpt_info_buffer *info) +{ + int i; + struct pci_dev *pdev = cptvf->pdev; + struct cpt_request_info *req; + + if (info->dptr_baddr) + dma_unmap_single(&pdev->dev, info->dptr_baddr, + info->dlen, DMA_BIDIRECTIONAL); + + if (info->rptr_baddr) + dma_unmap_single(&pdev->dev, info->rptr_baddr, + COMPLETION_CODE_SIZE, DMA_BIDIRECTIONAL); + + if (info->comp_baddr) + dma_unmap_single(&pdev->dev, info->comp_baddr, + sizeof(union cpt_res_s), DMA_BIDIRECTIONAL); + + if (info->req) { + req = info->req; + for (i = 0; i < req->outcnt; i++) { + if (req->out[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->out[i].dma_addr, + req->out[i].size, + DMA_BIDIRECTIONAL); + } + + for (i = 0; i < req->incnt; i++) { + if (req->in[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->in[i].dma_addr, + req->in[i].size, + DMA_BIDIRECTIONAL); + } + } + + if (info->scatter_components) + kzfree(info->scatter_components); + + if (info->gather_components) + kzfree(info->gather_components); + + if (info->out_buffer) + kzfree(info->out_buffer); + + if (info->in_buffer) + kzfree(info->in_buffer); + + if (info->completion_addr) + kzfree((void *)info->completion_addr); + + kzfree(info); +} + +void do_post_process(struct cpt_vf *cptvf, struct cpt_info_buffer *info) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!info || !cptvf) { + dev_err(&pdev->dev, "Input params are incorrect for post processing\n"); + return; + } + + do_request_cleanup(cptvf, info); +} + +static inline void process_pending_queue(struct cpt_vf *cptvf, + struct pending_qinfo *pqinfo, + int qno) +{ + struct pci_dev *pdev = cptvf->pdev; + struct pending_queue *pqueue = &pqinfo->queue[qno]; + struct pending_entry *pentry = NULL; + struct cpt_info_buffer *info = NULL; + union cpt_res_s *status = NULL; + unsigned char ccode; + + while (1) { + spin_lock_bh(&pqueue->lock); + pentry = &pqueue->head[pqueue->front]; + if (unlikely(!pentry->busy)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + info = (struct cpt_info_buffer *)pentry->post_arg; + if (unlikely(!info)) { + dev_err(&pdev->dev, "Pending Entry post arg NULL\n"); + pending_queue_inc_front(pqinfo, qno); + spin_unlock_bh(&pqueue->lock); + continue; + } + + status = (union cpt_res_s *)pentry->completion_addr; + ccode = status->s.compcode; + if ((status->s.compcode == CPT_COMP_E_FAULT) || + (status->s.compcode == CPT_COMP_E_SWERR)) { + dev_err(&pdev->dev, "Request failed with %s\n", + (status->s.compcode == CPT_COMP_E_FAULT) ? + "DMA Fault" : "Software error"); + pentry->completion_addr = NULL; + pentry->busy = false; + atomic64_dec((&pqueue->pending_count)); + pentry->post_arg = NULL; + pending_queue_inc_front(pqinfo, qno); + do_request_cleanup(cptvf, info); + spin_unlock_bh(&pqueue->lock); + break; + } else if (status->s.compcode == COMPLETION_CODE_INIT) { + /* check for timeout */ + if (time_after_eq(jiffies, + (info->time_in + + (CPT_COMMAND_TIMEOUT * HZ)))) { + dev_err(&pdev->dev, "Request timed out"); + pentry->completion_addr = NULL; + pentry->busy = false; + atomic64_dec((&pqueue->pending_count)); + pentry->post_arg = NULL; + pending_queue_inc_front(pqinfo, qno); + do_request_cleanup(cptvf, info); + spin_unlock_bh(&pqueue->lock); + break; + } else if ((*info->alternate_caddr == + (~COMPLETION_CODE_INIT)) && + (info->extra_time < TIME_IN_RESET_COUNT)) { + info->time_in = jiffies; + info->extra_time++; + spin_unlock_bh(&pqueue->lock); + break; + } + } + + pentry->completion_addr = NULL; + pentry->busy = false; + pentry->post_arg = NULL; + atomic64_dec((&pqueue->pending_count)); + pending_queue_inc_front(pqinfo, qno); + spin_unlock_bh(&pqueue->lock); + + do_post_process(info->cptvf, info); + /* + * Calling callback after we find + * that the request has been serviced + */ + pentry->callback(ccode, pentry->callback_arg); + } +} + +int process_request(struct cpt_vf *cptvf, struct cpt_request_info *req) +{ + int ret = 0, clear = 0, queue = 0; + struct cpt_info_buffer *info = NULL; + struct cptvf_request *cpt_req = NULL; + union ctrl_info *ctrl = NULL; + union cpt_res_s *result = NULL; + struct pending_entry *pentry = NULL; + struct pending_queue *pqueue = NULL; + struct pci_dev *pdev = cptvf->pdev; + u8 group = 0; + struct cpt_vq_command vq_cmd; + union cpt_inst_s cptinst; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (unlikely(!info)) { + dev_err(&pdev->dev, "Unable to allocate memory for info_buffer\n"); + return -ENOMEM; + } + + cpt_req = (struct cptvf_request *)&req->req; + ctrl = (union ctrl_info *)&req->ctrl; + + info->cptvf = cptvf; + group = ctrl->s.grp; + ret = setup_sgio_list(cptvf, info, req); + if (ret) { + dev_err(&pdev->dev, "Setting up SG list failed"); + goto request_cleanup; + } + + cpt_req->dlen = info->dlen; + /* + * Get buffer for union cpt_res_s response + * structure and its physical address + */ + info->completion_addr = kzalloc(sizeof(union cpt_res_s), GFP_KERNEL); + if (unlikely(!info->completion_addr)) { + dev_err(&pdev->dev, "Unable to allocate memory for completion_addr\n"); + return -ENOMEM; + } + + result = (union cpt_res_s *)info->completion_addr; + result->s.compcode = COMPLETION_CODE_INIT; + info->comp_baddr = dma_map_single(&pdev->dev, + (void *)info->completion_addr, + sizeof(union cpt_res_s), + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&pdev->dev, info->comp_baddr)) { + dev_err(&pdev->dev, "mapping compptr Failed %lu\n", + sizeof(union cpt_res_s)); + ret = -EFAULT; + goto request_cleanup; + } + + /* Fill the VQ command */ + vq_cmd.cmd.u64 = 0; + vq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags); + vq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1); + vq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2); + vq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen); + + /* 64-bit swap for microcode data reads, not needed for addresses*/ + vq_cmd.cmd.u64 = cpu_to_be64(vq_cmd.cmd.u64); + vq_cmd.dptr = info->dptr_baddr; + vq_cmd.rptr = info->rptr_baddr; + vq_cmd.cptr.u64 = 0; + vq_cmd.cptr.s.grp = group; + /* Get Pending Entry to submit command */ + /* Always queue 0, because 1 queue per VF */ + queue = 0; + pqueue = &cptvf->pqinfo.queue[queue]; + + if (atomic64_read(&pqueue->pending_count) > PENDING_THOLD) { + dev_err(&pdev->dev, "pending threshold reached\n"); + process_pending_queue(cptvf, &cptvf->pqinfo, queue); + } + +get_pending_entry: + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, cptvf->pqinfo.qlen); + if (unlikely(!pentry)) { + spin_unlock_bh(&pqueue->lock); + if (clear == 0) { + process_pending_queue(cptvf, &cptvf->pqinfo, queue); + clear = 1; + goto get_pending_entry; + } + dev_err(&pdev->dev, "Get free entry failed\n"); + dev_err(&pdev->dev, "queue: %d, rear: %d, front: %d\n", + queue, pqueue->rear, pqueue->front); + ret = -EFAULT; + goto request_cleanup; + } + + pentry->completion_addr = info->completion_addr; + pentry->post_arg = (void *)info; + pentry->callback = req->callback; + pentry->callback_arg = req->callback_arg; + info->pentry = pentry; + pentry->busy = true; + atomic64_inc(&pqueue->pending_count); + + /* Send CPT command */ + info->pentry = pentry; + info->time_in = jiffies; + info->req = req; + + /* Create the CPT_INST_S type command for HW intrepretation */ + cptinst.s.doneint = true; + cptinst.s.res_addr = (u64)info->comp_baddr; + cptinst.s.tag = 0; + cptinst.s.grp = 0; + cptinst.s.wq_ptr = 0; + cptinst.s.ei0 = vq_cmd.cmd.u64; + cptinst.s.ei1 = vq_cmd.dptr; + cptinst.s.ei2 = vq_cmd.rptr; + cptinst.s.ei3 = vq_cmd.cptr.u64; + + ret = send_cpt_command(cptvf, &cptinst, queue); + spin_unlock_bh(&pqueue->lock); + if (unlikely(ret)) { + dev_err(&pdev->dev, "Send command failed for AE\n"); + ret = -EFAULT; + goto request_cleanup; + } + + return 0; + +request_cleanup: + dev_dbg(&pdev->dev, "Failed to submit CPT command\n"); + do_request_cleanup(cptvf, info); + + return ret; +} + +void vq_post_process(struct cpt_vf *cptvf, u32 qno) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (unlikely(qno > cptvf->nr_queues)) { + dev_err(&pdev->dev, "Request for post processing on invalid pending queue: %u\n", + qno); + return; + } + + process_pending_queue(cptvf, &cptvf->pqinfo, qno); +} + +int cptvf_do_request(void *vfdev, struct cpt_request_info *req) +{ + struct cpt_vf *cptvf = (struct cpt_vf *)vfdev; + struct pci_dev *pdev = cptvf->pdev; + + if (!cpt_device_ready(cptvf)) { + dev_err(&pdev->dev, "CPT Device is not ready"); + return -ENODEV; + } + + if ((cptvf->vftype == SE_TYPES) && (!req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request", + cptvf->vfid); + return -EINVAL; + } else if ((cptvf->vftype == AE_TYPES) && (req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request", + cptvf->vfid); + return -EINVAL; + } + + return process_request(cptvf, req); +} diff --git a/drivers/crypto/cavium/cpt/request_manager.h b/drivers/crypto/cavium/cpt/request_manager.h new file mode 100644 index 000000000000..80ee074c6e0c --- /dev/null +++ b/drivers/crypto/cavium/cpt/request_manager.h @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef __REQUEST_MANAGER_H +#define __REQUEST_MANAGER_H + +#include "cpt_common.h" + +#define TIME_IN_RESET_COUNT 5 +#define COMPLETION_CODE_SIZE 8 +#define COMPLETION_CODE_INIT 0 +#define PENDING_THOLD 100 +#define MAX_SG_IN_CNT 12 +#define MAX_SG_OUT_CNT 13 +#define SG_LIST_HDR_SIZE 8 +#define MAX_BUF_CNT 16 + +union ctrl_info { + u32 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved0:26; + u32 grp:3; /* Group bits */ + u32 dma_mode:2; /* DMA mode */ + u32 se_req:1;/* To SE core */ +#else + u32 se_req:1; /* To SE core */ + u32 dma_mode:2; /* DMA mode */ + u32 grp:3; /* Group bits */ + u32 reserved0:26; +#endif + } s; +}; + +union opcode_info { + u16 flags; + struct { + u8 major; + u8 minor; + } s; +}; + +struct cptvf_request { + union opcode_info opcode; + u16 param1; + u16 param2; + u16 dlen; +}; + +struct buf_ptr { + u8 *vptr; + dma_addr_t dma_addr; + u16 size; +}; + +struct cpt_request_info { + u8 incnt; /* Number of input buffers */ + u8 outcnt; /* Number of output buffers */ + u16 rlen; /* Output length */ + union ctrl_info ctrl; /* User control information */ + struct cptvf_request req; /* Request Information (Core specific) */ + + struct buf_ptr in[MAX_BUF_CNT]; + struct buf_ptr out[MAX_BUF_CNT]; + + void (*callback)(int, void *); /* Kernel ASYNC request callabck */ + void *callback_arg; /* Kernel ASYNC request callabck arg */ +}; + +struct sglist_component { + union { + u64 len; + struct { + u16 len0; + u16 len1; + u16 len2; + u16 len3; + } s; + } u; + u64 ptr0; + u64 ptr1; + u64 ptr2; + u64 ptr3; +}; + +struct cpt_info_buffer { + struct cpt_vf *cptvf; + unsigned long time_in; + u8 extra_time; + + struct cpt_request_info *req; + dma_addr_t dptr_baddr; + u32 dlen; + dma_addr_t rptr_baddr; + dma_addr_t comp_baddr; + u8 *in_buffer; + u8 *out_buffer; + u8 *gather_components; + u8 *scatter_components; + + struct pending_entry *pentry; + volatile u64 *completion_addr; + volatile u64 *alternate_caddr; +}; + +/* + * CPT_INST_S software command definitions + * Words EI (0-3) + */ +union vq_cmd_word0 { + u64 u64; + struct { + u16 opcode; + u16 param1; + u16 param2; + u16 dlen; + } s; +}; + +union vq_cmd_word3 { + u64 u64; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 grp:3; + u64 cptr:61; +#else + u64 cptr:61; + u64 grp:3; +#endif + } s; +}; + +struct cpt_vq_command { + union vq_cmd_word0 cmd; + u64 dptr; + u64 rptr; + union vq_cmd_word3 cptr; +}; + +void vq_post_process(struct cpt_vf *cptvf, u32 qno); +int process_request(struct cpt_vf *cptvf, struct cpt_request_info *req); +#endif /* __REQUEST_MANAGER_H */ From 62ad8b5c09641d385a0bfdb58b5e0eb7f3c5015e Mon Sep 17 00:00:00 2001 From: George Cherian Date: Tue, 7 Feb 2017 14:51:15 +0000 Subject: [PATCH 111/142] crypto: cavium - Enable CPT options crypto for build Add the CPT options in crypto Kconfig and update the crypto Makefile Update the MAINTAINERS file too. Signed-off-by: George Cherian Reviewed-by: David Daney Signed-off-by: Herbert Xu --- MAINTAINERS | 7 +++++++ drivers/crypto/Kconfig | 1 + drivers/crypto/Makefile | 1 + 3 files changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5f0420a0da5b..4745b0a54809 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2996,6 +2996,13 @@ W: http://www.cavium.com S: Supported F: drivers/net/ethernet/cavium/liquidio/ +CAVIUM OCTEON-TX CRYPTO DRIVER +M: George Cherian +L: linux-crypto@vger.kernel.org +W: http://www.cavium.com +S: Supported +F: drivers/crypto/cavium/cpt/ + CC2520 IEEE-802.15.4 RADIO DRIVER M: Varka Bhadram L: linux-wpan@vger.kernel.org diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index f60de152a90d..9c2760f69226 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -497,6 +497,7 @@ config CRYPTO_DEV_MXS_DCP will be called mxs-dcp. source "drivers/crypto/qat/Kconfig" +source "drivers/crypto/cavium/cpt/Kconfig" config CRYPTO_DEV_QCE tristate "Qualcomm crypto engine accelerator" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 8891ccc5844c..ff4db52256f9 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/ obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/ +obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o From 206dc4fc27be61732db4800f78c7c3ef74d6441e Mon Sep 17 00:00:00 2001 From: Rob Rice Date: Fri, 3 Feb 2017 12:55:32 -0500 Subject: [PATCH 112/142] crypto: brcm - DT documentation for Broadcom SPU hardware Device tree documentation for Broadcom Secure Processing Unit (SPU) crypto hardware. Signed-off-by: Steve Lin Signed-off-by: Rob Rice Acked-by: Rob Herring Signed-off-by: Herbert Xu --- .../bindings/crypto/brcm,spu-crypto.txt | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt diff --git a/Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt b/Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt new file mode 100644 index 000000000000..29b6007568eb --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt @@ -0,0 +1,22 @@ +The Broadcom Secure Processing Unit (SPU) hardware supports symmetric +cryptographic offload for Broadcom SoCs. A SoC may have multiple SPU hardware +blocks. + +Required properties: +- compatible: Should be one of the following: + brcm,spum-crypto - for devices with SPU-M hardware + brcm,spu2-crypto - for devices with SPU2 hardware + brcm,spu2-v2-crypto - for devices with enhanced SPU2 hardware features like SHA3 + and Rabin Fingerprint support + brcm,spum-nsp-crypto - for the Northstar Plus variant of the SPU-M hardware + +- reg: Should contain SPU registers location and length. +- mboxes: The mailbox channel to be used to communicate with the SPU. + Mailbox channels correspond to DMA rings on the device. + +Example: + crypto@612d0000 { + compatible = "brcm,spum-crypto"; + reg = <0 0x612d0000 0 0x900>; + mboxes = <&pdc0 0>; + }; From 9d12ba86f818aa9cfe9f01b750336aa441f2ffa2 Mon Sep 17 00:00:00 2001 From: Rob Rice Date: Fri, 3 Feb 2017 12:55:33 -0500 Subject: [PATCH 113/142] crypto: brcm - Add Broadcom SPU driver Add Broadcom Secure Processing Unit (SPU) crypto driver for SPU hardware crypto offload. The driver supports ablkcipher, ahash, and aead symmetric crypto operations. Signed-off-by: Steve Lin Signed-off-by: Rob Rice Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 15 + drivers/crypto/Makefile | 1 + drivers/crypto/bcm/Makefile | 15 + drivers/crypto/bcm/cipher.c | 4964 +++++++++++++++++++++++++++++++++++ drivers/crypto/bcm/cipher.h | 483 ++++ drivers/crypto/bcm/spu.c | 1251 +++++++++ drivers/crypto/bcm/spu.h | 287 ++ drivers/crypto/bcm/spu2.c | 1401 ++++++++++ drivers/crypto/bcm/spu2.h | 228 ++ drivers/crypto/bcm/spum.h | 174 ++ drivers/crypto/bcm/util.c | 581 ++++ drivers/crypto/bcm/util.h | 116 + 12 files changed, 9516 insertions(+) create mode 100644 drivers/crypto/bcm/Makefile create mode 100644 drivers/crypto/bcm/cipher.c create mode 100644 drivers/crypto/bcm/cipher.h create mode 100644 drivers/crypto/bcm/spu.c create mode 100644 drivers/crypto/bcm/spu.h create mode 100644 drivers/crypto/bcm/spu2.c create mode 100644 drivers/crypto/bcm/spu2.h create mode 100644 drivers/crypto/bcm/spum.h create mode 100644 drivers/crypto/bcm/util.c create mode 100644 drivers/crypto/bcm/util.h diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 9c2760f69226..2cac445b02fd 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -587,4 +587,19 @@ source "drivers/crypto/chelsio/Kconfig" source "drivers/crypto/virtio/Kconfig" +config CRYPTO_DEV_BCM_SPU + tristate "Broadcom symmetric crypto/hash acceleration support" + depends on ARCH_BCM_IPROC + depends on BCM_PDC_MBOX + default m + select CRYPTO_DES + select CRYPTO_MD5 + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_SHA512 + help + This driver provides support for Broadcom crypto acceleration using the + Secure Processing Unit (SPU). The SPU driver registers ablkcipher, + ahash, and aead algorithms with the kernel cryptographic API. + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index ff4db52256f9..739609471169 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -35,3 +35,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ +obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/ diff --git a/drivers/crypto/bcm/Makefile b/drivers/crypto/bcm/Makefile new file mode 100644 index 000000000000..13cb80eb2665 --- /dev/null +++ b/drivers/crypto/bcm/Makefile @@ -0,0 +1,15 @@ +# File: drivers/crypto/bcm/Makefile +# +# Makefile for crypto acceleration files for Broadcom SPU driver +# +# Uncomment to enable debug tracing in the SPU driver. +# CFLAGS_util.o := -DDEBUG +# CFLAGS_cipher.o := -DDEBUG +# CFLAGS_spu.o := -DDEBUG +# CFLAGS_spu2.o := -DDEBUG + +obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) := bcm_crypto_spu.o + +bcm_crypto_spu-objs := util.o spu.o spu2.o cipher.o + +ccflags-y += -I. -DBCMDRIVER diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c new file mode 100644 index 000000000000..a654a01ff2ba --- /dev/null +++ b/drivers/crypto/bcm/cipher.c @@ -0,0 +1,4964 @@ +/* + * Copyright 2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation (the "GPL"). + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 (GPLv2) for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 (GPLv2) along with this source code. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "cipher.h" +#include "spu.h" +#include "spum.h" +#include "spu2.h" + +/* ================= Device Structure ================== */ + +struct device_private iproc_priv; + +/* ==================== Parameters ===================== */ + +int flow_debug_logging; +module_param(flow_debug_logging, int, 0644); +MODULE_PARM_DESC(flow_debug_logging, "Enable Flow Debug Logging"); + +int packet_debug_logging; +module_param(packet_debug_logging, int, 0644); +MODULE_PARM_DESC(packet_debug_logging, "Enable Packet Debug Logging"); + +int debug_logging_sleep; +module_param(debug_logging_sleep, int, 0644); +MODULE_PARM_DESC(debug_logging_sleep, "Packet Debug Logging Sleep"); + +/* + * The value of these module parameters is used to set the priority for each + * algo type when this driver registers algos with the kernel crypto API. + * To use a priority other than the default, set the priority in the insmod or + * modprobe. Changing the module priority after init time has no effect. + * + * The default priorities are chosen to be lower (less preferred) than ARMv8 CE + * algos, but more preferred than generic software algos. + */ +static int cipher_pri = 150; +module_param(cipher_pri, int, 0644); +MODULE_PARM_DESC(cipher_pri, "Priority for cipher algos"); + +static int hash_pri = 100; +module_param(hash_pri, int, 0644); +MODULE_PARM_DESC(hash_pri, "Priority for hash algos"); + +static int aead_pri = 150; +module_param(aead_pri, int, 0644); +MODULE_PARM_DESC(aead_pri, "Priority for AEAD algos"); + +#define MAX_SPUS 16 + +/* A type 3 BCM header, expected to precede the SPU header for SPU-M. + * Bits 3 and 4 in the first byte encode the channel number (the dma ringset). + * 0x60 - ring 0 + * 0x68 - ring 1 + * 0x70 - ring 2 + * 0x78 - ring 3 + */ +char BCMHEADER[] = { 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28 }; +/* + * Some SPU hw does not use BCM header on SPU messages. So BCM_HDR_LEN + * is set dynamically after reading SPU type from device tree. + */ +#define BCM_HDR_LEN iproc_priv.bcm_hdr_len + +/* min and max time to sleep before retrying when mbox queue is full. usec */ +#define MBOX_SLEEP_MIN 800 +#define MBOX_SLEEP_MAX 1000 + +/** + * select_channel() - Select a SPU channel to handle a crypto request. Selects + * channel in round robin order. + * + * Return: channel index + */ +static u8 select_channel(void) +{ + u8 chan_idx = atomic_inc_return(&iproc_priv.next_chan); + + return chan_idx % iproc_priv.spu.num_spu; +} + +/** + * spu_ablkcipher_rx_sg_create() - Build up the scatterlist of buffers used to + * receive a SPU response message for an ablkcipher request. Includes buffers to + * catch SPU message headers and the response data. + * @mssg: mailbox message containing the receive sg + * @rctx: crypto request context + * @rx_frag_num: number of scatterlist elements required to hold the + * SPU response message + * @chunksize: Number of bytes of response data expected + * @stat_pad_len: Number of bytes required to pad the STAT field to + * a 4-byte boundary + * + * The scatterlist that gets allocated here is freed in spu_chunk_cleanup() + * when the request completes, whether the request is handled successfully or + * there is an error. + * + * Returns: + * 0 if successful + * < 0 if an error + */ +static int +spu_ablkcipher_rx_sg_create(struct brcm_message *mssg, + struct iproc_reqctx_s *rctx, + u8 rx_frag_num, + unsigned int chunksize, u32 stat_pad_len) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct scatterlist *sg; /* used to build sgs in mbox message */ + struct iproc_ctx_s *ctx = rctx->ctx; + u32 datalen; /* Number of bytes of response data expected */ + + mssg->spu.dst = kcalloc(rx_frag_num, sizeof(struct scatterlist), + rctx->gfp); + if (!mssg->spu.dst) + return -ENOMEM; + + sg = mssg->spu.dst; + sg_init_table(sg, rx_frag_num); + /* Space for SPU message header */ + sg_set_buf(sg++, rctx->msg_buf.spu_resp_hdr, ctx->spu_resp_hdr_len); + + /* If XTS tweak in payload, add buffer to receive encrypted tweak */ + if ((ctx->cipher.mode == CIPHER_MODE_XTS) && + spu->spu_xts_tweak_in_payload()) + sg_set_buf(sg++, rctx->msg_buf.c.supdt_tweak, + SPU_XTS_TWEAK_SIZE); + + /* Copy in each dst sg entry from request, up to chunksize */ + datalen = spu_msg_sg_add(&sg, &rctx->dst_sg, &rctx->dst_skip, + rctx->dst_nents, chunksize); + if (datalen < chunksize) { + pr_err("%s(): failed to copy dst sg to mbox msg. chunksize %u, datalen %u", + __func__, chunksize, datalen); + return -EFAULT; + } + + if (ctx->cipher.alg == CIPHER_ALG_RC4) + /* Add buffer to catch 260-byte SUPDT field for RC4 */ + sg_set_buf(sg++, rctx->msg_buf.c.supdt_tweak, SPU_SUPDT_LEN); + + if (stat_pad_len) + sg_set_buf(sg++, rctx->msg_buf.rx_stat_pad, stat_pad_len); + + memset(rctx->msg_buf.rx_stat, 0, SPU_RX_STATUS_LEN); + sg_set_buf(sg, rctx->msg_buf.rx_stat, spu->spu_rx_status_len()); + + return 0; +} + +/** + * spu_ablkcipher_tx_sg_create() - Build up the scatterlist of buffers used to + * send a SPU request message for an ablkcipher request. Includes SPU message + * headers and the request data. + * @mssg: mailbox message containing the transmit sg + * @rctx: crypto request context + * @tx_frag_num: number of scatterlist elements required to construct the + * SPU request message + * @chunksize: Number of bytes of request data + * @pad_len: Number of pad bytes + * + * The scatterlist that gets allocated here is freed in spu_chunk_cleanup() + * when the request completes, whether the request is handled successfully or + * there is an error. + * + * Returns: + * 0 if successful + * < 0 if an error + */ +static int +spu_ablkcipher_tx_sg_create(struct brcm_message *mssg, + struct iproc_reqctx_s *rctx, + u8 tx_frag_num, unsigned int chunksize, u32 pad_len) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct scatterlist *sg; /* used to build sgs in mbox message */ + struct iproc_ctx_s *ctx = rctx->ctx; + u32 datalen; /* Number of bytes of response data expected */ + u32 stat_len; + + mssg->spu.src = kcalloc(tx_frag_num, sizeof(struct scatterlist), + rctx->gfp); + if (unlikely(!mssg->spu.src)) + return -ENOMEM; + + sg = mssg->spu.src; + sg_init_table(sg, tx_frag_num); + + sg_set_buf(sg++, rctx->msg_buf.bcm_spu_req_hdr, + BCM_HDR_LEN + ctx->spu_req_hdr_len); + + /* if XTS tweak in payload, copy from IV (where crypto API puts it) */ + if ((ctx->cipher.mode == CIPHER_MODE_XTS) && + spu->spu_xts_tweak_in_payload()) + sg_set_buf(sg++, rctx->msg_buf.iv_ctr, SPU_XTS_TWEAK_SIZE); + + /* Copy in each src sg entry from request, up to chunksize */ + datalen = spu_msg_sg_add(&sg, &rctx->src_sg, &rctx->src_skip, + rctx->src_nents, chunksize); + if (unlikely(datalen < chunksize)) { + pr_err("%s(): failed to copy src sg to mbox msg", + __func__); + return -EFAULT; + } + + if (pad_len) + sg_set_buf(sg++, rctx->msg_buf.spu_req_pad, pad_len); + + stat_len = spu->spu_tx_status_len(); + if (stat_len) { + memset(rctx->msg_buf.tx_stat, 0, stat_len); + sg_set_buf(sg, rctx->msg_buf.tx_stat, stat_len); + } + return 0; +} + +/** + * handle_ablkcipher_req() - Submit as much of a block cipher request as fits in + * a single SPU request message, starting at the current position in the request + * data. + * @rctx: Crypto request context + * + * This may be called on the crypto API thread, or, when a request is so large + * it must be broken into multiple SPU messages, on the thread used to invoke + * the response callback. When requests are broken into multiple SPU + * messages, we assume subsequent messages depend on previous results, and + * thus always wait for previous results before submitting the next message. + * Because requests are submitted in lock step like this, there is no need + * to synchronize access to request data structures. + * + * Return: -EINPROGRESS: request has been accepted and result will be returned + * asynchronously + * Any other value indicates an error + */ +static int handle_ablkcipher_req(struct iproc_reqctx_s *rctx) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct crypto_async_request *areq = rctx->parent; + struct ablkcipher_request *req = + container_of(areq, struct ablkcipher_request, base); + struct iproc_ctx_s *ctx = rctx->ctx; + struct spu_cipher_parms cipher_parms; + int err = 0; + unsigned int chunksize = 0; /* Num bytes of request to submit */ + int remaining = 0; /* Bytes of request still to process */ + int chunk_start; /* Beginning of data for current SPU msg */ + + /* IV or ctr value to use in this SPU msg */ + u8 local_iv_ctr[MAX_IV_SIZE]; + u32 stat_pad_len; /* num bytes to align status field */ + u32 pad_len; /* total length of all padding */ + bool update_key = false; + struct brcm_message *mssg; /* mailbox message */ + int retry_cnt = 0; + + /* number of entries in src and dst sg in mailbox message. */ + u8 rx_frag_num = 2; /* response header and STATUS */ + u8 tx_frag_num = 1; /* request header */ + + flow_log("%s\n", __func__); + + cipher_parms.alg = ctx->cipher.alg; + cipher_parms.mode = ctx->cipher.mode; + cipher_parms.type = ctx->cipher_type; + cipher_parms.key_len = ctx->enckeylen; + cipher_parms.key_buf = ctx->enckey; + cipher_parms.iv_buf = local_iv_ctr; + cipher_parms.iv_len = rctx->iv_ctr_len; + + mssg = &rctx->mb_mssg; + chunk_start = rctx->src_sent; + remaining = rctx->total_todo - chunk_start; + + /* determine the chunk we are breaking off and update the indexes */ + if ((ctx->max_payload != SPU_MAX_PAYLOAD_INF) && + (remaining > ctx->max_payload)) + chunksize = ctx->max_payload; + else + chunksize = remaining; + + rctx->src_sent += chunksize; + rctx->total_sent = rctx->src_sent; + + /* Count number of sg entries to be included in this request */ + rctx->src_nents = spu_sg_count(rctx->src_sg, rctx->src_skip, chunksize); + rctx->dst_nents = spu_sg_count(rctx->dst_sg, rctx->dst_skip, chunksize); + + if ((ctx->cipher.mode == CIPHER_MODE_CBC) && + rctx->is_encrypt && chunk_start) + /* + * Encrypting non-first first chunk. Copy last block of + * previous result to IV for this chunk. + */ + sg_copy_part_to_buf(req->dst, rctx->msg_buf.iv_ctr, + rctx->iv_ctr_len, + chunk_start - rctx->iv_ctr_len); + + if (rctx->iv_ctr_len) { + /* get our local copy of the iv */ + __builtin_memcpy(local_iv_ctr, rctx->msg_buf.iv_ctr, + rctx->iv_ctr_len); + + /* generate the next IV if possible */ + if ((ctx->cipher.mode == CIPHER_MODE_CBC) && + !rctx->is_encrypt) { + /* + * CBC Decrypt: next IV is the last ciphertext block in + * this chunk + */ + sg_copy_part_to_buf(req->src, rctx->msg_buf.iv_ctr, + rctx->iv_ctr_len, + rctx->src_sent - rctx->iv_ctr_len); + } else if (ctx->cipher.mode == CIPHER_MODE_CTR) { + /* + * The SPU hardware increments the counter once for + * each AES block of 16 bytes. So update the counter + * for the next chunk, if there is one. Note that for + * this chunk, the counter has already been copied to + * local_iv_ctr. We can assume a block size of 16, + * because we only support CTR mode for AES, not for + * any other cipher alg. + */ + add_to_ctr(rctx->msg_buf.iv_ctr, chunksize >> 4); + } + } + + if (ctx->cipher.alg == CIPHER_ALG_RC4) { + rx_frag_num++; + if (chunk_start) { + /* + * for non-first RC4 chunks, use SUPDT from previous + * response as key for this chunk. + */ + cipher_parms.key_buf = rctx->msg_buf.c.supdt_tweak; + update_key = true; + cipher_parms.type = CIPHER_TYPE_UPDT; + } else if (!rctx->is_encrypt) { + /* + * First RC4 chunk. For decrypt, key in pre-built msg + * header may have been changed if encrypt required + * multiple chunks. So revert the key to the + * ctx->enckey value. + */ + update_key = true; + cipher_parms.type = CIPHER_TYPE_INIT; + } + } + + if (ctx->max_payload == SPU_MAX_PAYLOAD_INF) + flow_log("max_payload infinite\n"); + else + flow_log("max_payload %u\n", ctx->max_payload); + + flow_log("sent:%u start:%u remains:%u size:%u\n", + rctx->src_sent, chunk_start, remaining, chunksize); + + /* Copy SPU header template created at setkey time */ + memcpy(rctx->msg_buf.bcm_spu_req_hdr, ctx->bcm_spu_req_hdr, + sizeof(rctx->msg_buf.bcm_spu_req_hdr)); + + /* + * Pass SUPDT field as key. Key field in finish() call is only used + * when update_key has been set above for RC4. Will be ignored in + * all other cases. + */ + spu->spu_cipher_req_finish(rctx->msg_buf.bcm_spu_req_hdr + BCM_HDR_LEN, + ctx->spu_req_hdr_len, !(rctx->is_encrypt), + &cipher_parms, update_key, chunksize); + + atomic64_add(chunksize, &iproc_priv.bytes_out); + + stat_pad_len = spu->spu_wordalign_padlen(chunksize); + if (stat_pad_len) + rx_frag_num++; + pad_len = stat_pad_len; + if (pad_len) { + tx_frag_num++; + spu->spu_request_pad(rctx->msg_buf.spu_req_pad, 0, + 0, ctx->auth.alg, ctx->auth.mode, + rctx->total_sent, stat_pad_len); + } + + spu->spu_dump_msg_hdr(rctx->msg_buf.bcm_spu_req_hdr + BCM_HDR_LEN, + ctx->spu_req_hdr_len); + packet_log("payload:\n"); + dump_sg(rctx->src_sg, rctx->src_skip, chunksize); + packet_dump(" pad: ", rctx->msg_buf.spu_req_pad, pad_len); + + /* + * Build mailbox message containing SPU request msg and rx buffers + * to catch response message + */ + memset(mssg, 0, sizeof(*mssg)); + mssg->type = BRCM_MESSAGE_SPU; + mssg->ctx = rctx; /* Will be returned in response */ + + /* Create rx scatterlist to catch result */ + rx_frag_num += rctx->dst_nents; + + if ((ctx->cipher.mode == CIPHER_MODE_XTS) && + spu->spu_xts_tweak_in_payload()) + rx_frag_num++; /* extra sg to insert tweak */ + + err = spu_ablkcipher_rx_sg_create(mssg, rctx, rx_frag_num, chunksize, + stat_pad_len); + if (err) + return err; + + /* Create tx scatterlist containing SPU request message */ + tx_frag_num += rctx->src_nents; + if (spu->spu_tx_status_len()) + tx_frag_num++; + + if ((ctx->cipher.mode == CIPHER_MODE_XTS) && + spu->spu_xts_tweak_in_payload()) + tx_frag_num++; /* extra sg to insert tweak */ + + err = spu_ablkcipher_tx_sg_create(mssg, rctx, tx_frag_num, chunksize, + pad_len); + if (err) + return err; + + err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx], mssg); + if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) { + while ((err == -ENOBUFS) && (retry_cnt < SPU_MB_RETRY_MAX)) { + /* + * Mailbox queue is full. Since MAY_SLEEP is set, assume + * not in atomic context and we can wait and try again. + */ + retry_cnt++; + usleep_range(MBOX_SLEEP_MIN, MBOX_SLEEP_MAX); + err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx], + mssg); + atomic_inc(&iproc_priv.mb_no_spc); + } + } + if (unlikely(err < 0)) { + atomic_inc(&iproc_priv.mb_send_fail); + return err; + } + + return -EINPROGRESS; +} + +/** + * handle_ablkcipher_resp() - Process a block cipher SPU response. Updates the + * total received count for the request and updates global stats. + * @rctx: Crypto request context + */ +static void handle_ablkcipher_resp(struct iproc_reqctx_s *rctx) +{ + struct spu_hw *spu = &iproc_priv.spu; +#ifdef DEBUG + struct crypto_async_request *areq = rctx->parent; + struct ablkcipher_request *req = ablkcipher_request_cast(areq); +#endif + struct iproc_ctx_s *ctx = rctx->ctx; + u32 payload_len; + + /* See how much data was returned */ + payload_len = spu->spu_payload_length(rctx->msg_buf.spu_resp_hdr); + + /* + * In XTS mode, the first SPU_XTS_TWEAK_SIZE bytes may be the + * encrypted tweak ("i") value; we don't count those. + */ + if ((ctx->cipher.mode == CIPHER_MODE_XTS) && + spu->spu_xts_tweak_in_payload() && + (payload_len >= SPU_XTS_TWEAK_SIZE)) + payload_len -= SPU_XTS_TWEAK_SIZE; + + atomic64_add(payload_len, &iproc_priv.bytes_in); + + flow_log("%s() offset: %u, bd_len: %u BD:\n", + __func__, rctx->total_received, payload_len); + + dump_sg(req->dst, rctx->total_received, payload_len); + if (ctx->cipher.alg == CIPHER_ALG_RC4) + packet_dump(" supdt ", rctx->msg_buf.c.supdt_tweak, + SPU_SUPDT_LEN); + + rctx->total_received += payload_len; + if (rctx->total_received == rctx->total_todo) { + atomic_inc(&iproc_priv.op_counts[SPU_OP_CIPHER]); + atomic_inc( + &iproc_priv.cipher_cnt[ctx->cipher.alg][ctx->cipher.mode]); + } +} + +/** + * spu_ahash_rx_sg_create() - Build up the scatterlist of buffers used to + * receive a SPU response message for an ahash request. + * @mssg: mailbox message containing the receive sg + * @rctx: crypto request context + * @rx_frag_num: number of scatterlist elements required to hold the + * SPU response message + * @digestsize: length of hash digest, in bytes + * @stat_pad_len: Number of bytes required to pad the STAT field to + * a 4-byte boundary + * + * The scatterlist that gets allocated here is freed in spu_chunk_cleanup() + * when the request completes, whether the request is handled successfully or + * there is an error. + * + * Return: + * 0 if successful + * < 0 if an error + */ +static int +spu_ahash_rx_sg_create(struct brcm_message *mssg, + struct iproc_reqctx_s *rctx, + u8 rx_frag_num, unsigned int digestsize, + u32 stat_pad_len) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct scatterlist *sg; /* used to build sgs in mbox message */ + struct iproc_ctx_s *ctx = rctx->ctx; + + mssg->spu.dst = kcalloc(rx_frag_num, sizeof(struct scatterlist), + rctx->gfp); + if (!mssg->spu.dst) + return -ENOMEM; + + sg = mssg->spu.dst; + sg_init_table(sg, rx_frag_num); + /* Space for SPU message header */ + sg_set_buf(sg++, rctx->msg_buf.spu_resp_hdr, ctx->spu_resp_hdr_len); + + /* Space for digest */ + sg_set_buf(sg++, rctx->msg_buf.digest, digestsize); + + if (stat_pad_len) + sg_set_buf(sg++, rctx->msg_buf.rx_stat_pad, stat_pad_len); + + memset(rctx->msg_buf.rx_stat, 0, SPU_RX_STATUS_LEN); + sg_set_buf(sg, rctx->msg_buf.rx_stat, spu->spu_rx_status_len()); + return 0; +} + +/** + * spu_ahash_tx_sg_create() - Build up the scatterlist of buffers used to send + * a SPU request message for an ahash request. Includes SPU message headers and + * the request data. + * @mssg: mailbox message containing the transmit sg + * @rctx: crypto request context + * @tx_frag_num: number of scatterlist elements required to construct the + * SPU request message + * @spu_hdr_len: length in bytes of SPU message header + * @hash_carry_len: Number of bytes of data carried over from previous req + * @new_data_len: Number of bytes of new request data + * @pad_len: Number of pad bytes + * + * The scatterlist that gets allocated here is freed in spu_chunk_cleanup() + * when the request completes, whether the request is handled successfully or + * there is an error. + * + * Return: + * 0 if successful + * < 0 if an error + */ +static int +spu_ahash_tx_sg_create(struct brcm_message *mssg, + struct iproc_reqctx_s *rctx, + u8 tx_frag_num, + u32 spu_hdr_len, + unsigned int hash_carry_len, + unsigned int new_data_len, u32 pad_len) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct scatterlist *sg; /* used to build sgs in mbox message */ + u32 datalen; /* Number of bytes of response data expected */ + u32 stat_len; + + mssg->spu.src = kcalloc(tx_frag_num, sizeof(struct scatterlist), + rctx->gfp); + if (!mssg->spu.src) + return -ENOMEM; + + sg = mssg->spu.src; + sg_init_table(sg, tx_frag_num); + + sg_set_buf(sg++, rctx->msg_buf.bcm_spu_req_hdr, + BCM_HDR_LEN + spu_hdr_len); + + if (hash_carry_len) + sg_set_buf(sg++, rctx->hash_carry, hash_carry_len); + + if (new_data_len) { + /* Copy in each src sg entry from request, up to chunksize */ + datalen = spu_msg_sg_add(&sg, &rctx->src_sg, &rctx->src_skip, + rctx->src_nents, new_data_len); + if (datalen < new_data_len) { + pr_err("%s(): failed to copy src sg to mbox msg", + __func__); + return -EFAULT; + } + } + + if (pad_len) + sg_set_buf(sg++, rctx->msg_buf.spu_req_pad, pad_len); + + stat_len = spu->spu_tx_status_len(); + if (stat_len) { + memset(rctx->msg_buf.tx_stat, 0, stat_len); + sg_set_buf(sg, rctx->msg_buf.tx_stat, stat_len); + } + + return 0; +} + +/** + * handle_ahash_req() - Process an asynchronous hash request from the crypto + * API. + * @rctx: Crypto request context + * + * Builds a SPU request message embedded in a mailbox message and submits the + * mailbox message on a selected mailbox channel. The SPU request message is + * constructed as a scatterlist, including entries from the crypto API's + * src scatterlist to avoid copying the data to be hashed. This function is + * called either on the thread from the crypto API, or, in the case that the + * crypto API request is too large to fit in a single SPU request message, + * on the thread that invokes the receive callback with a response message. + * Because some operations require the response from one chunk before the next + * chunk can be submitted, we always wait for the response for the previous + * chunk before submitting the next chunk. Because requests are submitted in + * lock step like this, there is no need to synchronize access to request data + * structures. + * + * Return: + * -EINPROGRESS: request has been submitted to SPU and response will be + * returned asynchronously + * -EAGAIN: non-final request included a small amount of data, which for + * efficiency we did not submit to the SPU, but instead stored + * to be submitted to the SPU with the next part of the request + * other: an error code + */ +static int handle_ahash_req(struct iproc_reqctx_s *rctx) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct crypto_async_request *areq = rctx->parent; + struct ahash_request *req = ahash_request_cast(areq); + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct crypto_tfm *tfm = crypto_ahash_tfm(ahash); + unsigned int blocksize = crypto_tfm_alg_blocksize(tfm); + struct iproc_ctx_s *ctx = rctx->ctx; + + /* number of bytes still to be hashed in this req */ + unsigned int nbytes_to_hash = 0; + int err = 0; + unsigned int chunksize = 0; /* length of hash carry + new data */ + /* + * length of new data, not from hash carry, to be submitted in + * this hw request + */ + unsigned int new_data_len; + + unsigned int chunk_start = 0; + u32 db_size; /* Length of data field, incl gcm and hash padding */ + int pad_len = 0; /* total pad len, including gcm, hash, stat padding */ + u32 data_pad_len = 0; /* length of GCM/CCM padding */ + u32 stat_pad_len = 0; /* length of padding to align STATUS word */ + struct brcm_message *mssg; /* mailbox message */ + struct spu_request_opts req_opts; + struct spu_cipher_parms cipher_parms; + struct spu_hash_parms hash_parms; + struct spu_aead_parms aead_parms; + unsigned int local_nbuf; + u32 spu_hdr_len; + unsigned int digestsize; + u16 rem = 0; + int retry_cnt = 0; + + /* + * number of entries in src and dst sg. Always includes SPU msg header. + * rx always includes a buffer to catch digest and STATUS. + */ + u8 rx_frag_num = 3; + u8 tx_frag_num = 1; + + flow_log("total_todo %u, total_sent %u\n", + rctx->total_todo, rctx->total_sent); + + memset(&req_opts, 0, sizeof(req_opts)); + memset(&cipher_parms, 0, sizeof(cipher_parms)); + memset(&hash_parms, 0, sizeof(hash_parms)); + memset(&aead_parms, 0, sizeof(aead_parms)); + + req_opts.bd_suppress = true; + hash_parms.alg = ctx->auth.alg; + hash_parms.mode = ctx->auth.mode; + hash_parms.type = HASH_TYPE_NONE; + hash_parms.key_buf = (u8 *)ctx->authkey; + hash_parms.key_len = ctx->authkeylen; + + /* + * For hash algorithms below assignment looks bit odd but + * it's needed for AES-XCBC and AES-CMAC hash algorithms + * to differentiate between 128, 192, 256 bit key values. + * Based on the key values, hash algorithm is selected. + * For example for 128 bit key, hash algorithm is AES-128. + */ + cipher_parms.type = ctx->cipher_type; + + mssg = &rctx->mb_mssg; + chunk_start = rctx->src_sent; + + /* + * Compute the amount remaining to hash. This may include data + * carried over from previous requests. + */ + nbytes_to_hash = rctx->total_todo - rctx->total_sent; + chunksize = nbytes_to_hash; + if ((ctx->max_payload != SPU_MAX_PAYLOAD_INF) && + (chunksize > ctx->max_payload)) + chunksize = ctx->max_payload; + + /* + * If this is not a final request and the request data is not a multiple + * of a full block, then simply park the extra data and prefix it to the + * data for the next request. + */ + if (!rctx->is_final) { + u8 *dest = rctx->hash_carry + rctx->hash_carry_len; + u16 new_len; /* len of data to add to hash carry */ + + rem = chunksize % blocksize; /* remainder */ + if (rem) { + /* chunksize not a multiple of blocksize */ + chunksize -= rem; + if (chunksize == 0) { + /* Don't have a full block to submit to hw */ + new_len = rem - rctx->hash_carry_len; + sg_copy_part_to_buf(req->src, dest, new_len, + rctx->src_sent); + rctx->hash_carry_len = rem; + flow_log("Exiting with hash carry len: %u\n", + rctx->hash_carry_len); + packet_dump(" buf: ", + rctx->hash_carry, + rctx->hash_carry_len); + return -EAGAIN; + } + } + } + + /* if we have hash carry, then prefix it to the data in this request */ + local_nbuf = rctx->hash_carry_len; + rctx->hash_carry_len = 0; + if (local_nbuf) + tx_frag_num++; + new_data_len = chunksize - local_nbuf; + + /* Count number of sg entries to be used in this request */ + rctx->src_nents = spu_sg_count(rctx->src_sg, rctx->src_skip, + new_data_len); + + /* AES hashing keeps key size in type field, so need to copy it here */ + if (hash_parms.alg == HASH_ALG_AES) + hash_parms.type = cipher_parms.type; + else + hash_parms.type = spu->spu_hash_type(rctx->total_sent); + + digestsize = spu->spu_digest_size(ctx->digestsize, ctx->auth.alg, + hash_parms.type); + hash_parms.digestsize = digestsize; + + /* update the indexes */ + rctx->total_sent += chunksize; + /* if you sent a prebuf then that wasn't from this req->src */ + rctx->src_sent += new_data_len; + + if ((rctx->total_sent == rctx->total_todo) && rctx->is_final) + hash_parms.pad_len = spu->spu_hash_pad_len(hash_parms.alg, + hash_parms.mode, + chunksize, + blocksize); + + /* + * If a non-first chunk, then include the digest returned from the + * previous chunk so that hw can add to it (except for AES types). + */ + if ((hash_parms.type == HASH_TYPE_UPDT) && + (hash_parms.alg != HASH_ALG_AES)) { + hash_parms.key_buf = rctx->incr_hash; + hash_parms.key_len = digestsize; + } + + atomic64_add(chunksize, &iproc_priv.bytes_out); + + flow_log("%s() final: %u nbuf: %u ", + __func__, rctx->is_final, local_nbuf); + + if (ctx->max_payload == SPU_MAX_PAYLOAD_INF) + flow_log("max_payload infinite\n"); + else + flow_log("max_payload %u\n", ctx->max_payload); + + flow_log("chunk_start: %u chunk_size: %u\n", chunk_start, chunksize); + + /* Prepend SPU header with type 3 BCM header */ + memcpy(rctx->msg_buf.bcm_spu_req_hdr, BCMHEADER, BCM_HDR_LEN); + + hash_parms.prebuf_len = local_nbuf; + spu_hdr_len = spu->spu_create_request(rctx->msg_buf.bcm_spu_req_hdr + + BCM_HDR_LEN, + &req_opts, &cipher_parms, + &hash_parms, &aead_parms, + new_data_len); + + if (spu_hdr_len == 0) { + pr_err("Failed to create SPU request header\n"); + return -EFAULT; + } + + /* + * Determine total length of padding required. Put all padding in one + * buffer. + */ + data_pad_len = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode, chunksize); + db_size = spu_real_db_size(0, 0, local_nbuf, new_data_len, + 0, 0, hash_parms.pad_len); + if (spu->spu_tx_status_len()) + stat_pad_len = spu->spu_wordalign_padlen(db_size); + if (stat_pad_len) + rx_frag_num++; + pad_len = hash_parms.pad_len + data_pad_len + stat_pad_len; + if (pad_len) { + tx_frag_num++; + spu->spu_request_pad(rctx->msg_buf.spu_req_pad, data_pad_len, + hash_parms.pad_len, ctx->auth.alg, + ctx->auth.mode, rctx->total_sent, + stat_pad_len); + } + + spu->spu_dump_msg_hdr(rctx->msg_buf.bcm_spu_req_hdr + BCM_HDR_LEN, + spu_hdr_len); + packet_dump(" prebuf: ", rctx->hash_carry, local_nbuf); + flow_log("Data:\n"); + dump_sg(rctx->src_sg, rctx->src_skip, new_data_len); + packet_dump(" pad: ", rctx->msg_buf.spu_req_pad, pad_len); + + /* + * Build mailbox message containing SPU request msg and rx buffers + * to catch response message + */ + memset(mssg, 0, sizeof(*mssg)); + mssg->type = BRCM_MESSAGE_SPU; + mssg->ctx = rctx; /* Will be returned in response */ + + /* Create rx scatterlist to catch result */ + err = spu_ahash_rx_sg_create(mssg, rctx, rx_frag_num, digestsize, + stat_pad_len); + if (err) + return err; + + /* Create tx scatterlist containing SPU request message */ + tx_frag_num += rctx->src_nents; + if (spu->spu_tx_status_len()) + tx_frag_num++; + err = spu_ahash_tx_sg_create(mssg, rctx, tx_frag_num, spu_hdr_len, + local_nbuf, new_data_len, pad_len); + if (err) + return err; + + err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx], mssg); + if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) { + while ((err == -ENOBUFS) && (retry_cnt < SPU_MB_RETRY_MAX)) { + /* + * Mailbox queue is full. Since MAY_SLEEP is set, assume + * not in atomic context and we can wait and try again. + */ + retry_cnt++; + usleep_range(MBOX_SLEEP_MIN, MBOX_SLEEP_MAX); + err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx], + mssg); + atomic_inc(&iproc_priv.mb_no_spc); + } + } + if (err < 0) { + atomic_inc(&iproc_priv.mb_send_fail); + return err; + } + return -EINPROGRESS; +} + +/** + * spu_hmac_outer_hash() - Request synchonous software compute of the outer hash + * for an HMAC request. + * @req: The HMAC request from the crypto API + * @ctx: The session context + * + * Return: 0 if synchronous hash operation successful + * -EINVAL if the hash algo is unrecognized + * any other value indicates an error + */ +static int spu_hmac_outer_hash(struct ahash_request *req, + struct iproc_ctx_s *ctx) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + unsigned int blocksize = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash)); + int rc; + + switch (ctx->auth.alg) { + case HASH_ALG_MD5: + rc = do_shash("md5", req->result, ctx->opad, blocksize, + req->result, ctx->digestsize, NULL, 0); + break; + case HASH_ALG_SHA1: + rc = do_shash("sha1", req->result, ctx->opad, blocksize, + req->result, ctx->digestsize, NULL, 0); + break; + case HASH_ALG_SHA224: + rc = do_shash("sha224", req->result, ctx->opad, blocksize, + req->result, ctx->digestsize, NULL, 0); + break; + case HASH_ALG_SHA256: + rc = do_shash("sha256", req->result, ctx->opad, blocksize, + req->result, ctx->digestsize, NULL, 0); + break; + case HASH_ALG_SHA384: + rc = do_shash("sha384", req->result, ctx->opad, blocksize, + req->result, ctx->digestsize, NULL, 0); + break; + case HASH_ALG_SHA512: + rc = do_shash("sha512", req->result, ctx->opad, blocksize, + req->result, ctx->digestsize, NULL, 0); + break; + default: + pr_err("%s() Error : unknown hmac type\n", __func__); + rc = -EINVAL; + } + return rc; +} + +/** + * ahash_req_done() - Process a hash result from the SPU hardware. + * @rctx: Crypto request context + * + * Return: 0 if successful + * < 0 if an error + */ +static int ahash_req_done(struct iproc_reqctx_s *rctx) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct crypto_async_request *areq = rctx->parent; + struct ahash_request *req = ahash_request_cast(areq); + struct iproc_ctx_s *ctx = rctx->ctx; + int err; + + memcpy(req->result, rctx->msg_buf.digest, ctx->digestsize); + + if (spu->spu_type == SPU_TYPE_SPUM) { + /* byte swap the output from the UPDT function to network byte + * order + */ + if (ctx->auth.alg == HASH_ALG_MD5) { + __swab32s((u32 *)req->result); + __swab32s(((u32 *)req->result) + 1); + __swab32s(((u32 *)req->result) + 2); + __swab32s(((u32 *)req->result) + 3); + __swab32s(((u32 *)req->result) + 4); + } + } + + flow_dump(" digest ", req->result, ctx->digestsize); + + /* if this an HMAC then do the outer hash */ + if (rctx->is_sw_hmac) { + err = spu_hmac_outer_hash(req, ctx); + if (err < 0) + return err; + flow_dump(" hmac: ", req->result, ctx->digestsize); + } + + if (rctx->is_sw_hmac || ctx->auth.mode == HASH_MODE_HMAC) { + atomic_inc(&iproc_priv.op_counts[SPU_OP_HMAC]); + atomic_inc(&iproc_priv.hmac_cnt[ctx->auth.alg]); + } else { + atomic_inc(&iproc_priv.op_counts[SPU_OP_HASH]); + atomic_inc(&iproc_priv.hash_cnt[ctx->auth.alg]); + } + + return 0; +} + +/** + * handle_ahash_resp() - Process a SPU response message for a hash request. + * Checks if the entire crypto API request has been processed, and if so, + * invokes post processing on the result. + * @rctx: Crypto request context + */ +static void handle_ahash_resp(struct iproc_reqctx_s *rctx) +{ + struct iproc_ctx_s *ctx = rctx->ctx; +#ifdef DEBUG + struct crypto_async_request *areq = rctx->parent; + struct ahash_request *req = ahash_request_cast(areq); + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + unsigned int blocksize = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash)); +#endif + /* + * Save hash to use as input to next op if incremental. Might be copying + * too much, but that's easier than figuring out actual digest size here + */ + memcpy(rctx->incr_hash, rctx->msg_buf.digest, MAX_DIGEST_SIZE); + + flow_log("%s() blocksize:%u digestsize:%u\n", + __func__, blocksize, ctx->digestsize); + + atomic64_add(ctx->digestsize, &iproc_priv.bytes_in); + + if (rctx->is_final && (rctx->total_sent == rctx->total_todo)) + ahash_req_done(rctx); +} + +/** + * spu_aead_rx_sg_create() - Build up the scatterlist of buffers used to receive + * a SPU response message for an AEAD request. Includes buffers to catch SPU + * message headers and the response data. + * @mssg: mailbox message containing the receive sg + * @rctx: crypto request context + * @rx_frag_num: number of scatterlist elements required to hold the + * SPU response message + * @assoc_len: Length of associated data included in the crypto request + * @ret_iv_len: Length of IV returned in response + * @resp_len: Number of bytes of response data expected to be written to + * dst buffer from crypto API + * @digestsize: Length of hash digest, in bytes + * @stat_pad_len: Number of bytes required to pad the STAT field to + * a 4-byte boundary + * + * The scatterlist that gets allocated here is freed in spu_chunk_cleanup() + * when the request completes, whether the request is handled successfully or + * there is an error. + * + * Returns: + * 0 if successful + * < 0 if an error + */ +static int spu_aead_rx_sg_create(struct brcm_message *mssg, + struct aead_request *req, + struct iproc_reqctx_s *rctx, + u8 rx_frag_num, + unsigned int assoc_len, + u32 ret_iv_len, unsigned int resp_len, + unsigned int digestsize, u32 stat_pad_len) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct scatterlist *sg; /* used to build sgs in mbox message */ + struct iproc_ctx_s *ctx = rctx->ctx; + u32 datalen; /* Number of bytes of response data expected */ + u32 assoc_buf_len; + u8 data_padlen = 0; + + if (ctx->is_rfc4543) { + /* RFC4543: only pad after data, not after AAD */ + data_padlen = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode, + assoc_len + resp_len); + assoc_buf_len = assoc_len; + } else { + data_padlen = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode, + resp_len); + assoc_buf_len = spu->spu_assoc_resp_len(ctx->cipher.mode, + assoc_len, ret_iv_len, + rctx->is_encrypt); + } + + if (ctx->cipher.mode == CIPHER_MODE_CCM) + /* ICV (after data) must be in the next 32-bit word for CCM */ + data_padlen += spu->spu_wordalign_padlen(assoc_buf_len + + resp_len + + data_padlen); + + if (data_padlen) + /* have to catch gcm pad in separate buffer */ + rx_frag_num++; + + mssg->spu.dst = kcalloc(rx_frag_num, sizeof(struct scatterlist), + rctx->gfp); + if (!mssg->spu.dst) + return -ENOMEM; + + sg = mssg->spu.dst; + sg_init_table(sg, rx_frag_num); + + /* Space for SPU message header */ + sg_set_buf(sg++, rctx->msg_buf.spu_resp_hdr, ctx->spu_resp_hdr_len); + + if (assoc_buf_len) { + /* + * Don't write directly to req->dst, because SPU may pad the + * assoc data in the response + */ + memset(rctx->msg_buf.a.resp_aad, 0, assoc_buf_len); + sg_set_buf(sg++, rctx->msg_buf.a.resp_aad, assoc_buf_len); + } + + if (resp_len) { + /* + * Copy in each dst sg entry from request, up to chunksize. + * dst sg catches just the data. digest caught in separate buf. + */ + datalen = spu_msg_sg_add(&sg, &rctx->dst_sg, &rctx->dst_skip, + rctx->dst_nents, resp_len); + if (datalen < (resp_len)) { + pr_err("%s(): failed to copy dst sg to mbox msg. expected len %u, datalen %u", + __func__, resp_len, datalen); + return -EFAULT; + } + } + + /* If GCM/CCM data is padded, catch padding in separate buffer */ + if (data_padlen) { + memset(rctx->msg_buf.a.gcmpad, 0, data_padlen); + sg_set_buf(sg++, rctx->msg_buf.a.gcmpad, data_padlen); + } + + /* Always catch ICV in separate buffer */ + sg_set_buf(sg++, rctx->msg_buf.digest, digestsize); + + flow_log("stat_pad_len %u\n", stat_pad_len); + if (stat_pad_len) { + memset(rctx->msg_buf.rx_stat_pad, 0, stat_pad_len); + sg_set_buf(sg++, rctx->msg_buf.rx_stat_pad, stat_pad_len); + } + + memset(rctx->msg_buf.rx_stat, 0, SPU_RX_STATUS_LEN); + sg_set_buf(sg, rctx->msg_buf.rx_stat, spu->spu_rx_status_len()); + + return 0; +} + +/** + * spu_aead_tx_sg_create() - Build up the scatterlist of buffers used to send a + * SPU request message for an AEAD request. Includes SPU message headers and the + * request data. + * @mssg: mailbox message containing the transmit sg + * @rctx: crypto request context + * @tx_frag_num: number of scatterlist elements required to construct the + * SPU request message + * @spu_hdr_len: length of SPU message header in bytes + * @assoc: crypto API associated data scatterlist + * @assoc_len: length of associated data + * @assoc_nents: number of scatterlist entries containing assoc data + * @aead_iv_len: length of AEAD IV, if included + * @chunksize: Number of bytes of request data + * @aad_pad_len: Number of bytes of padding at end of AAD. For GCM/CCM. + * @pad_len: Number of pad bytes + * @incl_icv: If true, write separate ICV buffer after data and + * any padding + * + * The scatterlist that gets allocated here is freed in spu_chunk_cleanup() + * when the request completes, whether the request is handled successfully or + * there is an error. + * + * Return: + * 0 if successful + * < 0 if an error + */ +static int spu_aead_tx_sg_create(struct brcm_message *mssg, + struct iproc_reqctx_s *rctx, + u8 tx_frag_num, + u32 spu_hdr_len, + struct scatterlist *assoc, + unsigned int assoc_len, + int assoc_nents, + unsigned int aead_iv_len, + unsigned int chunksize, + u32 aad_pad_len, u32 pad_len, bool incl_icv) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct scatterlist *sg; /* used to build sgs in mbox message */ + struct scatterlist *assoc_sg = assoc; + struct iproc_ctx_s *ctx = rctx->ctx; + u32 datalen; /* Number of bytes of data to write */ + u32 written; /* Number of bytes of data written */ + u32 assoc_offset = 0; + u32 stat_len; + + mssg->spu.src = kcalloc(tx_frag_num, sizeof(struct scatterlist), + rctx->gfp); + if (!mssg->spu.src) + return -ENOMEM; + + sg = mssg->spu.src; + sg_init_table(sg, tx_frag_num); + + sg_set_buf(sg++, rctx->msg_buf.bcm_spu_req_hdr, + BCM_HDR_LEN + spu_hdr_len); + + if (assoc_len) { + /* Copy in each associated data sg entry from request */ + written = spu_msg_sg_add(&sg, &assoc_sg, &assoc_offset, + assoc_nents, assoc_len); + if (written < assoc_len) { + pr_err("%s(): failed to copy assoc sg to mbox msg", + __func__); + return -EFAULT; + } + } + + if (aead_iv_len) + sg_set_buf(sg++, rctx->msg_buf.iv_ctr, aead_iv_len); + + if (aad_pad_len) { + memset(rctx->msg_buf.a.req_aad_pad, 0, aad_pad_len); + sg_set_buf(sg++, rctx->msg_buf.a.req_aad_pad, aad_pad_len); + } + + datalen = chunksize; + if ((chunksize > ctx->digestsize) && incl_icv) + datalen -= ctx->digestsize; + if (datalen) { + /* For aead, a single msg should consume the entire src sg */ + written = spu_msg_sg_add(&sg, &rctx->src_sg, &rctx->src_skip, + rctx->src_nents, datalen); + if (written < datalen) { + pr_err("%s(): failed to copy src sg to mbox msg", + __func__); + return -EFAULT; + } + } + + if (pad_len) { + memset(rctx->msg_buf.spu_req_pad, 0, pad_len); + sg_set_buf(sg++, rctx->msg_buf.spu_req_pad, pad_len); + } + + if (incl_icv) + sg_set_buf(sg++, rctx->msg_buf.digest, ctx->digestsize); + + stat_len = spu->spu_tx_status_len(); + if (stat_len) { + memset(rctx->msg_buf.tx_stat, 0, stat_len); + sg_set_buf(sg, rctx->msg_buf.tx_stat, stat_len); + } + return 0; +} + +/** + * handle_aead_req() - Submit a SPU request message for the next chunk of the + * current AEAD request. + * @rctx: Crypto request context + * + * Unlike other operation types, we assume the length of the request fits in + * a single SPU request message. aead_enqueue() makes sure this is true. + * Comments for other op types regarding threads applies here as well. + * + * Unlike incremental hash ops, where the spu returns the entire hash for + * truncated algs like sha-224, the SPU returns just the truncated hash in + * response to aead requests. So digestsize is always ctx->digestsize here. + * + * Return: -EINPROGRESS: crypto request has been accepted and result will be + * returned asynchronously + * Any other value indicates an error + */ +static int handle_aead_req(struct iproc_reqctx_s *rctx) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct crypto_async_request *areq = rctx->parent; + struct aead_request *req = container_of(areq, + struct aead_request, base); + struct iproc_ctx_s *ctx = rctx->ctx; + int err; + unsigned int chunksize; + unsigned int resp_len; + u32 spu_hdr_len; + u32 db_size; + u32 stat_pad_len; + u32 pad_len; + struct brcm_message *mssg; /* mailbox message */ + struct spu_request_opts req_opts; + struct spu_cipher_parms cipher_parms; + struct spu_hash_parms hash_parms; + struct spu_aead_parms aead_parms; + int assoc_nents = 0; + bool incl_icv = false; + unsigned int digestsize = ctx->digestsize; + int retry_cnt = 0; + + /* number of entries in src and dst sg. Always includes SPU msg header. + */ + u8 rx_frag_num = 2; /* and STATUS */ + u8 tx_frag_num = 1; + + /* doing the whole thing at once */ + chunksize = rctx->total_todo; + + flow_log("%s: chunksize %u\n", __func__, chunksize); + + memset(&req_opts, 0, sizeof(req_opts)); + memset(&hash_parms, 0, sizeof(hash_parms)); + memset(&aead_parms, 0, sizeof(aead_parms)); + + req_opts.is_inbound = !(rctx->is_encrypt); + req_opts.auth_first = ctx->auth_first; + req_opts.is_aead = true; + req_opts.is_esp = ctx->is_esp; + + cipher_parms.alg = ctx->cipher.alg; + cipher_parms.mode = ctx->cipher.mode; + cipher_parms.type = ctx->cipher_type; + cipher_parms.key_buf = ctx->enckey; + cipher_parms.key_len = ctx->enckeylen; + cipher_parms.iv_buf = rctx->msg_buf.iv_ctr; + cipher_parms.iv_len = rctx->iv_ctr_len; + + hash_parms.alg = ctx->auth.alg; + hash_parms.mode = ctx->auth.mode; + hash_parms.type = HASH_TYPE_NONE; + hash_parms.key_buf = (u8 *)ctx->authkey; + hash_parms.key_len = ctx->authkeylen; + hash_parms.digestsize = digestsize; + + if ((ctx->auth.alg == HASH_ALG_SHA224) && + (ctx->authkeylen < SHA224_DIGEST_SIZE)) + hash_parms.key_len = SHA224_DIGEST_SIZE; + + aead_parms.assoc_size = req->assoclen; + if (ctx->is_esp && !ctx->is_rfc4543) { + /* + * 8-byte IV is included assoc data in request. SPU2 + * expects AAD to include just SPI and seqno. So + * subtract off the IV len. + */ + aead_parms.assoc_size -= GCM_ESP_IV_SIZE; + + if (rctx->is_encrypt) { + aead_parms.return_iv = true; + aead_parms.ret_iv_len = GCM_ESP_IV_SIZE; + aead_parms.ret_iv_off = GCM_ESP_SALT_SIZE; + } + } else { + aead_parms.ret_iv_len = 0; + } + + /* + * Count number of sg entries from the crypto API request that are to + * be included in this mailbox message. For dst sg, don't count space + * for digest. Digest gets caught in a separate buffer and copied back + * to dst sg when processing response. + */ + rctx->src_nents = spu_sg_count(rctx->src_sg, rctx->src_skip, chunksize); + rctx->dst_nents = spu_sg_count(rctx->dst_sg, rctx->dst_skip, chunksize); + if (aead_parms.assoc_size) + assoc_nents = spu_sg_count(rctx->assoc, 0, + aead_parms.assoc_size); + + mssg = &rctx->mb_mssg; + + rctx->total_sent = chunksize; + rctx->src_sent = chunksize; + if (spu->spu_assoc_resp_len(ctx->cipher.mode, + aead_parms.assoc_size, + aead_parms.ret_iv_len, + rctx->is_encrypt)) + rx_frag_num++; + + aead_parms.iv_len = spu->spu_aead_ivlen(ctx->cipher.mode, + rctx->iv_ctr_len); + + if (ctx->auth.alg == HASH_ALG_AES) + hash_parms.type = ctx->cipher_type; + + /* General case AAD padding (CCM and RFC4543 special cases below) */ + aead_parms.aad_pad_len = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode, + aead_parms.assoc_size); + + /* General case data padding (CCM decrypt special case below) */ + aead_parms.data_pad_len = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode, + chunksize); + + if (ctx->cipher.mode == CIPHER_MODE_CCM) { + /* + * for CCM, AAD len + 2 (rather than AAD len) needs to be + * 128-bit aligned + */ + aead_parms.aad_pad_len = spu->spu_gcm_ccm_pad_len( + ctx->cipher.mode, + aead_parms.assoc_size + 2); + + /* + * And when decrypting CCM, need to pad without including + * size of ICV which is tacked on to end of chunk + */ + if (!rctx->is_encrypt) + aead_parms.data_pad_len = + spu->spu_gcm_ccm_pad_len(ctx->cipher.mode, + chunksize - digestsize); + + /* CCM also requires software to rewrite portions of IV: */ + spu->spu_ccm_update_iv(digestsize, &cipher_parms, req->assoclen, + chunksize, rctx->is_encrypt, + ctx->is_esp); + } + + if (ctx->is_rfc4543) { + /* + * RFC4543: data is included in AAD, so don't pad after AAD + * and pad data based on both AAD + data size + */ + aead_parms.aad_pad_len = 0; + if (!rctx->is_encrypt) + aead_parms.data_pad_len = spu->spu_gcm_ccm_pad_len( + ctx->cipher.mode, + aead_parms.assoc_size + chunksize - + digestsize); + else + aead_parms.data_pad_len = spu->spu_gcm_ccm_pad_len( + ctx->cipher.mode, + aead_parms.assoc_size + chunksize); + + req_opts.is_rfc4543 = true; + } + + if (spu_req_incl_icv(ctx->cipher.mode, rctx->is_encrypt)) { + incl_icv = true; + tx_frag_num++; + /* Copy ICV from end of src scatterlist to digest buf */ + sg_copy_part_to_buf(req->src, rctx->msg_buf.digest, digestsize, + req->assoclen + rctx->total_sent - + digestsize); + } + + atomic64_add(chunksize, &iproc_priv.bytes_out); + + flow_log("%s()-sent chunksize:%u\n", __func__, chunksize); + + /* Prepend SPU header with type 3 BCM header */ + memcpy(rctx->msg_buf.bcm_spu_req_hdr, BCMHEADER, BCM_HDR_LEN); + + spu_hdr_len = spu->spu_create_request(rctx->msg_buf.bcm_spu_req_hdr + + BCM_HDR_LEN, &req_opts, + &cipher_parms, &hash_parms, + &aead_parms, chunksize); + + /* Determine total length of padding. Put all padding in one buffer. */ + db_size = spu_real_db_size(aead_parms.assoc_size, aead_parms.iv_len, 0, + chunksize, aead_parms.aad_pad_len, + aead_parms.data_pad_len, 0); + + stat_pad_len = spu->spu_wordalign_padlen(db_size); + + if (stat_pad_len) + rx_frag_num++; + pad_len = aead_parms.data_pad_len + stat_pad_len; + if (pad_len) { + tx_frag_num++; + spu->spu_request_pad(rctx->msg_buf.spu_req_pad, + aead_parms.data_pad_len, 0, + ctx->auth.alg, ctx->auth.mode, + rctx->total_sent, stat_pad_len); + } + + spu->spu_dump_msg_hdr(rctx->msg_buf.bcm_spu_req_hdr + BCM_HDR_LEN, + spu_hdr_len); + dump_sg(rctx->assoc, 0, aead_parms.assoc_size); + packet_dump(" aead iv: ", rctx->msg_buf.iv_ctr, aead_parms.iv_len); + packet_log("BD:\n"); + dump_sg(rctx->src_sg, rctx->src_skip, chunksize); + packet_dump(" pad: ", rctx->msg_buf.spu_req_pad, pad_len); + + /* + * Build mailbox message containing SPU request msg and rx buffers + * to catch response message + */ + memset(mssg, 0, sizeof(*mssg)); + mssg->type = BRCM_MESSAGE_SPU; + mssg->ctx = rctx; /* Will be returned in response */ + + /* Create rx scatterlist to catch result */ + rx_frag_num += rctx->dst_nents; + resp_len = chunksize; + + /* + * Always catch ICV in separate buffer. Have to for GCM/CCM because of + * padding. Have to for SHA-224 and other truncated SHAs because SPU + * sends entire digest back. + */ + rx_frag_num++; + + if (((ctx->cipher.mode == CIPHER_MODE_GCM) || + (ctx->cipher.mode == CIPHER_MODE_CCM)) && !rctx->is_encrypt) { + /* + * Input is ciphertxt plus ICV, but ICV not incl + * in output. + */ + resp_len -= ctx->digestsize; + if (resp_len == 0) + /* no rx frags to catch output data */ + rx_frag_num -= rctx->dst_nents; + } + + err = spu_aead_rx_sg_create(mssg, req, rctx, rx_frag_num, + aead_parms.assoc_size, + aead_parms.ret_iv_len, resp_len, digestsize, + stat_pad_len); + if (err) + return err; + + /* Create tx scatterlist containing SPU request message */ + tx_frag_num += rctx->src_nents; + tx_frag_num += assoc_nents; + if (aead_parms.aad_pad_len) + tx_frag_num++; + if (aead_parms.iv_len) + tx_frag_num++; + if (spu->spu_tx_status_len()) + tx_frag_num++; + err = spu_aead_tx_sg_create(mssg, rctx, tx_frag_num, spu_hdr_len, + rctx->assoc, aead_parms.assoc_size, + assoc_nents, aead_parms.iv_len, chunksize, + aead_parms.aad_pad_len, pad_len, incl_icv); + if (err) + return err; + + err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx], mssg); + if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) { + while ((err == -ENOBUFS) && (retry_cnt < SPU_MB_RETRY_MAX)) { + /* + * Mailbox queue is full. Since MAY_SLEEP is set, assume + * not in atomic context and we can wait and try again. + */ + retry_cnt++; + usleep_range(MBOX_SLEEP_MIN, MBOX_SLEEP_MAX); + err = mbox_send_message(iproc_priv.mbox[rctx->chan_idx], + mssg); + atomic_inc(&iproc_priv.mb_no_spc); + } + } + if (err < 0) { + atomic_inc(&iproc_priv.mb_send_fail); + return err; + } + + return -EINPROGRESS; +} + +/** + * handle_aead_resp() - Process a SPU response message for an AEAD request. + * @rctx: Crypto request context + */ +static void handle_aead_resp(struct iproc_reqctx_s *rctx) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct crypto_async_request *areq = rctx->parent; + struct aead_request *req = container_of(areq, + struct aead_request, base); + struct iproc_ctx_s *ctx = rctx->ctx; + u32 payload_len; + unsigned int icv_offset; + u32 result_len; + + /* See how much data was returned */ + payload_len = spu->spu_payload_length(rctx->msg_buf.spu_resp_hdr); + flow_log("payload_len %u\n", payload_len); + + /* only count payload */ + atomic64_add(payload_len, &iproc_priv.bytes_in); + + if (req->assoclen) + packet_dump(" assoc_data ", rctx->msg_buf.a.resp_aad, + req->assoclen); + + /* + * Copy the ICV back to the destination + * buffer. In decrypt case, SPU gives us back the digest, but crypto + * API doesn't expect ICV in dst buffer. + */ + result_len = req->cryptlen; + if (rctx->is_encrypt) { + icv_offset = req->assoclen + rctx->total_sent; + packet_dump(" ICV: ", rctx->msg_buf.digest, ctx->digestsize); + flow_log("copying ICV to dst sg at offset %u\n", icv_offset); + sg_copy_part_from_buf(req->dst, rctx->msg_buf.digest, + ctx->digestsize, icv_offset); + result_len += ctx->digestsize; + } + + packet_log("response data: "); + dump_sg(req->dst, req->assoclen, result_len); + + atomic_inc(&iproc_priv.op_counts[SPU_OP_AEAD]); + if (ctx->cipher.alg == CIPHER_ALG_AES) { + if (ctx->cipher.mode == CIPHER_MODE_CCM) + atomic_inc(&iproc_priv.aead_cnt[AES_CCM]); + else if (ctx->cipher.mode == CIPHER_MODE_GCM) + atomic_inc(&iproc_priv.aead_cnt[AES_GCM]); + else + atomic_inc(&iproc_priv.aead_cnt[AUTHENC]); + } else { + atomic_inc(&iproc_priv.aead_cnt[AUTHENC]); + } +} + +/** + * spu_chunk_cleanup() - Do cleanup after processing one chunk of a request + * @rctx: request context + * + * Mailbox scatterlists are allocated for each chunk. So free them after + * processing each chunk. + */ +static void spu_chunk_cleanup(struct iproc_reqctx_s *rctx) +{ + /* mailbox message used to tx request */ + struct brcm_message *mssg = &rctx->mb_mssg; + + kfree(mssg->spu.src); + kfree(mssg->spu.dst); + memset(mssg, 0, sizeof(struct brcm_message)); +} + +/** + * finish_req() - Used to invoke the complete callback from the requester when + * a request has been handled asynchronously. + * @rctx: Request context + * @err: Indicates whether the request was successful or not + * + * Ensures that cleanup has been done for request + */ +static void finish_req(struct iproc_reqctx_s *rctx, int err) +{ + struct crypto_async_request *areq = rctx->parent; + + flow_log("%s() err:%d\n\n", __func__, err); + + /* No harm done if already called */ + spu_chunk_cleanup(rctx); + + if (areq) + areq->complete(areq, err); +} + +/** + * spu_rx_callback() - Callback from mailbox framework with a SPU response. + * @cl: mailbox client structure for SPU driver + * @msg: mailbox message containing SPU response + */ +static void spu_rx_callback(struct mbox_client *cl, void *msg) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct brcm_message *mssg = msg; + struct iproc_reqctx_s *rctx; + struct iproc_ctx_s *ctx; + struct crypto_async_request *areq; + int err = 0; + + rctx = mssg->ctx; + if (unlikely(!rctx)) { + /* This is fatal */ + pr_err("%s(): no request context", __func__); + err = -EFAULT; + goto cb_finish; + } + areq = rctx->parent; + ctx = rctx->ctx; + + /* process the SPU status */ + err = spu->spu_status_process(rctx->msg_buf.rx_stat); + if (err != 0) { + if (err == SPU_INVALID_ICV) + atomic_inc(&iproc_priv.bad_icv); + err = -EBADMSG; + goto cb_finish; + } + + /* Process the SPU response message */ + switch (rctx->ctx->alg->type) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + handle_ablkcipher_resp(rctx); + break; + case CRYPTO_ALG_TYPE_AHASH: + handle_ahash_resp(rctx); + break; + case CRYPTO_ALG_TYPE_AEAD: + handle_aead_resp(rctx); + break; + default: + err = -EINVAL; + goto cb_finish; + } + + /* + * If this response does not complete the request, then send the next + * request chunk. + */ + if (rctx->total_sent < rctx->total_todo) { + /* Deallocate anything specific to previous chunk */ + spu_chunk_cleanup(rctx); + + switch (rctx->ctx->alg->type) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + err = handle_ablkcipher_req(rctx); + break; + case CRYPTO_ALG_TYPE_AHASH: + err = handle_ahash_req(rctx); + if (err == -EAGAIN) + /* + * we saved data in hash carry, but tell crypto + * API we successfully completed request. + */ + err = 0; + break; + case CRYPTO_ALG_TYPE_AEAD: + err = handle_aead_req(rctx); + break; + default: + err = -EINVAL; + } + + if (err == -EINPROGRESS) + /* Successfully submitted request for next chunk */ + return; + } + +cb_finish: + finish_req(rctx, err); +} + +/* ==================== Kernel Cryptographic API ==================== */ + +/** + * ablkcipher_enqueue() - Handle ablkcipher encrypt or decrypt request. + * @req: Crypto API request + * @encrypt: true if encrypting; false if decrypting + * + * Return: -EINPROGRESS if request accepted and result will be returned + * asynchronously + * < 0 if an error + */ +static int ablkcipher_enqueue(struct ablkcipher_request *req, bool encrypt) +{ + struct iproc_reqctx_s *rctx = ablkcipher_request_ctx(req); + struct iproc_ctx_s *ctx = + crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); + int err; + + flow_log("%s() enc:%u\n", __func__, encrypt); + + rctx->gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + rctx->parent = &req->base; + rctx->is_encrypt = encrypt; + rctx->bd_suppress = false; + rctx->total_todo = req->nbytes; + rctx->src_sent = 0; + rctx->total_sent = 0; + rctx->total_received = 0; + rctx->ctx = ctx; + + /* Initialize current position in src and dst scatterlists */ + rctx->src_sg = req->src; + rctx->src_nents = 0; + rctx->src_skip = 0; + rctx->dst_sg = req->dst; + rctx->dst_nents = 0; + rctx->dst_skip = 0; + + if (ctx->cipher.mode == CIPHER_MODE_CBC || + ctx->cipher.mode == CIPHER_MODE_CTR || + ctx->cipher.mode == CIPHER_MODE_OFB || + ctx->cipher.mode == CIPHER_MODE_XTS || + ctx->cipher.mode == CIPHER_MODE_GCM || + ctx->cipher.mode == CIPHER_MODE_CCM) { + rctx->iv_ctr_len = + crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req)); + memcpy(rctx->msg_buf.iv_ctr, req->info, rctx->iv_ctr_len); + } else { + rctx->iv_ctr_len = 0; + } + + /* Choose a SPU to process this request */ + rctx->chan_idx = select_channel(); + err = handle_ablkcipher_req(rctx); + if (err != -EINPROGRESS) + /* synchronous result */ + spu_chunk_cleanup(rctx); + + return err; +} + +static int des_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher); + u32 tmp[DES_EXPKEY_WORDS]; + + if (keylen == DES_KEY_SIZE) { + if (des_ekey(tmp, key) == 0) { + if (crypto_ablkcipher_get_flags(cipher) & + CRYPTO_TFM_REQ_WEAK_KEY) { + u32 flags = CRYPTO_TFM_RES_WEAK_KEY; + + crypto_ablkcipher_set_flags(cipher, flags); + return -EINVAL; + } + } + + ctx->cipher_type = CIPHER_TYPE_DES; + } else { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + return 0; +} + +static int threedes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher); + + if (keylen == (DES_KEY_SIZE * 3)) { + const u32 *K = (const u32 *)key; + u32 flags = CRYPTO_TFM_RES_BAD_KEY_SCHED; + + if (!((K[0] ^ K[2]) | (K[1] ^ K[3])) || + !((K[2] ^ K[4]) | (K[3] ^ K[5]))) { + crypto_ablkcipher_set_flags(cipher, flags); + return -EINVAL; + } + + ctx->cipher_type = CIPHER_TYPE_3DES; + } else { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + return 0; +} + +static int aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher); + + if (ctx->cipher.mode == CIPHER_MODE_XTS) + /* XTS includes two keys of equal length */ + keylen = keylen / 2; + + switch (keylen) { + case AES_KEYSIZE_128: + ctx->cipher_type = CIPHER_TYPE_AES128; + break; + case AES_KEYSIZE_192: + ctx->cipher_type = CIPHER_TYPE_AES192; + break; + case AES_KEYSIZE_256: + ctx->cipher_type = CIPHER_TYPE_AES256; + break; + default: + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + WARN_ON((ctx->max_payload != SPU_MAX_PAYLOAD_INF) && + ((ctx->max_payload % AES_BLOCK_SIZE) != 0)); + return 0; +} + +static int rc4_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher); + int i; + + ctx->enckeylen = ARC4_MAX_KEY_SIZE + ARC4_STATE_SIZE; + + ctx->enckey[0] = 0x00; /* 0x00 */ + ctx->enckey[1] = 0x00; /* i */ + ctx->enckey[2] = 0x00; /* 0x00 */ + ctx->enckey[3] = 0x00; /* j */ + for (i = 0; i < ARC4_MAX_KEY_SIZE; i++) + ctx->enckey[i + ARC4_STATE_SIZE] = key[i % keylen]; + + ctx->cipher_type = CIPHER_TYPE_INIT; + + return 0; +} + +static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher); + struct spu_cipher_parms cipher_parms; + u32 alloc_len = 0; + int err; + + flow_log("ablkcipher_setkey() keylen: %d\n", keylen); + flow_dump(" key: ", key, keylen); + + switch (ctx->cipher.alg) { + case CIPHER_ALG_DES: + err = des_setkey(cipher, key, keylen); + break; + case CIPHER_ALG_3DES: + err = threedes_setkey(cipher, key, keylen); + break; + case CIPHER_ALG_AES: + err = aes_setkey(cipher, key, keylen); + break; + case CIPHER_ALG_RC4: + err = rc4_setkey(cipher, key, keylen); + break; + default: + pr_err("%s() Error: unknown cipher alg\n", __func__); + err = -EINVAL; + } + if (err) + return err; + + /* RC4 already populated ctx->enkey */ + if (ctx->cipher.alg != CIPHER_ALG_RC4) { + memcpy(ctx->enckey, key, keylen); + ctx->enckeylen = keylen; + } + /* SPU needs XTS keys in the reverse order the crypto API presents */ + if ((ctx->cipher.alg == CIPHER_ALG_AES) && + (ctx->cipher.mode == CIPHER_MODE_XTS)) { + unsigned int xts_keylen = keylen / 2; + + memcpy(ctx->enckey, key + xts_keylen, xts_keylen); + memcpy(ctx->enckey + xts_keylen, key, xts_keylen); + } + + if (spu->spu_type == SPU_TYPE_SPUM) + alloc_len = BCM_HDR_LEN + SPU_HEADER_ALLOC_LEN; + else if (spu->spu_type == SPU_TYPE_SPU2) + alloc_len = BCM_HDR_LEN + SPU2_HEADER_ALLOC_LEN; + memset(ctx->bcm_spu_req_hdr, 0, alloc_len); + cipher_parms.iv_buf = NULL; + cipher_parms.iv_len = crypto_ablkcipher_ivsize(cipher); + flow_log("%s: iv_len %u\n", __func__, cipher_parms.iv_len); + + cipher_parms.alg = ctx->cipher.alg; + cipher_parms.mode = ctx->cipher.mode; + cipher_parms.type = ctx->cipher_type; + cipher_parms.key_buf = ctx->enckey; + cipher_parms.key_len = ctx->enckeylen; + + /* Prepend SPU request message with BCM header */ + memcpy(ctx->bcm_spu_req_hdr, BCMHEADER, BCM_HDR_LEN); + ctx->spu_req_hdr_len = + spu->spu_cipher_req_init(ctx->bcm_spu_req_hdr + BCM_HDR_LEN, + &cipher_parms); + + ctx->spu_resp_hdr_len = spu->spu_response_hdr_len(ctx->authkeylen, + ctx->enckeylen, + false); + + atomic_inc(&iproc_priv.setkey_cnt[SPU_OP_CIPHER]); + + return 0; +} + +static int ablkcipher_encrypt(struct ablkcipher_request *req) +{ + flow_log("ablkcipher_encrypt() nbytes:%u\n", req->nbytes); + + return ablkcipher_enqueue(req, true); +} + +static int ablkcipher_decrypt(struct ablkcipher_request *req) +{ + flow_log("ablkcipher_decrypt() nbytes:%u\n", req->nbytes); + return ablkcipher_enqueue(req, false); +} + +static int ahash_enqueue(struct ahash_request *req) +{ + struct iproc_reqctx_s *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm); + int err = 0; + const char *alg_name; + + flow_log("ahash_enqueue() nbytes:%u\n", req->nbytes); + + rctx->gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + rctx->parent = &req->base; + rctx->ctx = ctx; + rctx->bd_suppress = true; + memset(&rctx->mb_mssg, 0, sizeof(struct brcm_message)); + + /* Initialize position in src scatterlist */ + rctx->src_sg = req->src; + rctx->src_skip = 0; + rctx->src_nents = 0; + rctx->dst_sg = NULL; + rctx->dst_skip = 0; + rctx->dst_nents = 0; + + /* SPU2 hardware does not compute hash of zero length data */ + if ((rctx->is_final == 1) && (rctx->total_todo == 0) && + (iproc_priv.spu.spu_type == SPU_TYPE_SPU2)) { + alg_name = crypto_tfm_alg_name(crypto_ahash_tfm(tfm)); + flow_log("Doing %sfinal %s zero-len hash request in software\n", + rctx->is_final ? "" : "non-", alg_name); + err = do_shash((unsigned char *)alg_name, req->result, + NULL, 0, NULL, 0, ctx->authkey, + ctx->authkeylen); + if (err < 0) + flow_log("Hash request failed with error %d\n", err); + return err; + } + /* Choose a SPU to process this request */ + rctx->chan_idx = select_channel(); + + err = handle_ahash_req(rctx); + if (err != -EINPROGRESS) + /* synchronous result */ + spu_chunk_cleanup(rctx); + + if (err == -EAGAIN) + /* + * we saved data in hash carry, but tell crypto API + * we successfully completed request. + */ + err = 0; + + return err; +} + +static int __ahash_init(struct ahash_request *req) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct iproc_reqctx_s *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm); + + flow_log("%s()\n", __func__); + + /* Initialize the context */ + rctx->hash_carry_len = 0; + rctx->is_final = 0; + + rctx->total_todo = 0; + rctx->src_sent = 0; + rctx->total_sent = 0; + rctx->total_received = 0; + + ctx->digestsize = crypto_ahash_digestsize(tfm); + /* If we add a hash whose digest is larger, catch it here. */ + WARN_ON(ctx->digestsize > MAX_DIGEST_SIZE); + + rctx->is_sw_hmac = false; + + ctx->spu_resp_hdr_len = spu->spu_response_hdr_len(ctx->authkeylen, 0, + true); + + return 0; +} + +/** + * spu_no_incr_hash() - Determine whether incremental hashing is supported. + * @ctx: Crypto session context + * + * SPU-2 does not support incremental hashing (we'll have to revisit and + * condition based on chip revision or device tree entry if future versions do + * support incremental hash) + * + * SPU-M also doesn't support incremental hashing of AES-XCBC + * + * Return: true if incremental hashing is not supported + * false otherwise + */ +bool spu_no_incr_hash(struct iproc_ctx_s *ctx) +{ + struct spu_hw *spu = &iproc_priv.spu; + + if (spu->spu_type == SPU_TYPE_SPU2) + return true; + + if ((ctx->auth.alg == HASH_ALG_AES) && + (ctx->auth.mode == HASH_MODE_XCBC)) + return true; + + /* Otherwise, incremental hashing is supported */ + return false; +} + +static int ahash_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm); + const char *alg_name; + struct crypto_shash *hash; + int ret; + gfp_t gfp; + + if (spu_no_incr_hash(ctx)) { + /* + * If we get an incremental hashing request and it's not + * supported by the hardware, we need to handle it in software + * by calling synchronous hash functions. + */ + alg_name = crypto_tfm_alg_name(crypto_ahash_tfm(tfm)); + hash = crypto_alloc_shash(alg_name, 0, 0); + if (IS_ERR(hash)) { + ret = PTR_ERR(hash); + goto err; + } + + gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + ctx->shash = kmalloc(sizeof(*ctx->shash) + + crypto_shash_descsize(hash), gfp); + if (!ctx->shash) { + ret = -ENOMEM; + goto err_hash; + } + ctx->shash->tfm = hash; + ctx->shash->flags = 0; + + /* Set the key using data we already have from setkey */ + if (ctx->authkeylen > 0) { + ret = crypto_shash_setkey(hash, ctx->authkey, + ctx->authkeylen); + if (ret) + goto err_shash; + } + + /* Initialize hash w/ this key and other params */ + ret = crypto_shash_init(ctx->shash); + if (ret) + goto err_shash; + } else { + /* Otherwise call the internal function which uses SPU hw */ + ret = __ahash_init(req); + } + + return ret; + +err_shash: + kfree(ctx->shash); +err_hash: + crypto_free_shash(hash); +err: + return ret; +} + +static int __ahash_update(struct ahash_request *req) +{ + struct iproc_reqctx_s *rctx = ahash_request_ctx(req); + + flow_log("ahash_update() nbytes:%u\n", req->nbytes); + + if (!req->nbytes) + return 0; + rctx->total_todo += req->nbytes; + rctx->src_sent = 0; + + return ahash_enqueue(req); +} + +static int ahash_update(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm); + u8 *tmpbuf; + int ret; + int nents; + gfp_t gfp; + + if (spu_no_incr_hash(ctx)) { + /* + * If we get an incremental hashing request and it's not + * supported by the hardware, we need to handle it in software + * by calling synchronous hash functions. + */ + if (req->src) + nents = sg_nents(req->src); + else + return -EINVAL; + + /* Copy data from req scatterlist to tmp buffer */ + gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + tmpbuf = kmalloc(req->nbytes, gfp); + if (!tmpbuf) + return -ENOMEM; + + if (sg_copy_to_buffer(req->src, nents, tmpbuf, req->nbytes) != + req->nbytes) { + kfree(tmpbuf); + return -EINVAL; + } + + /* Call synchronous update */ + ret = crypto_shash_update(ctx->shash, tmpbuf, req->nbytes); + kfree(tmpbuf); + } else { + /* Otherwise call the internal function which uses SPU hw */ + ret = __ahash_update(req); + } + + return ret; +} + +static int __ahash_final(struct ahash_request *req) +{ + struct iproc_reqctx_s *rctx = ahash_request_ctx(req); + + flow_log("ahash_final() nbytes:%u\n", req->nbytes); + + rctx->is_final = 1; + + return ahash_enqueue(req); +} + +static int ahash_final(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm); + int ret; + + if (spu_no_incr_hash(ctx)) { + /* + * If we get an incremental hashing request and it's not + * supported by the hardware, we need to handle it in software + * by calling synchronous hash functions. + */ + ret = crypto_shash_final(ctx->shash, req->result); + + /* Done with hash, can deallocate it now */ + crypto_free_shash(ctx->shash->tfm); + kfree(ctx->shash); + + } else { + /* Otherwise call the internal function which uses SPU hw */ + ret = __ahash_final(req); + } + + return ret; +} + +static int __ahash_finup(struct ahash_request *req) +{ + struct iproc_reqctx_s *rctx = ahash_request_ctx(req); + + flow_log("ahash_finup() nbytes:%u\n", req->nbytes); + + rctx->total_todo += req->nbytes; + rctx->src_sent = 0; + rctx->is_final = 1; + + return ahash_enqueue(req); +} + +static int ahash_finup(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm); + u8 *tmpbuf; + int ret; + int nents; + gfp_t gfp; + + if (spu_no_incr_hash(ctx)) { + /* + * If we get an incremental hashing request and it's not + * supported by the hardware, we need to handle it in software + * by calling synchronous hash functions. + */ + if (req->src) { + nents = sg_nents(req->src); + } else { + ret = -EINVAL; + goto ahash_finup_exit; + } + + /* Copy data from req scatterlist to tmp buffer */ + gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + tmpbuf = kmalloc(req->nbytes, gfp); + if (!tmpbuf) { + ret = -ENOMEM; + goto ahash_finup_exit; + } + + if (sg_copy_to_buffer(req->src, nents, tmpbuf, req->nbytes) != + req->nbytes) { + ret = -EINVAL; + goto ahash_finup_free; + } + + /* Call synchronous update */ + ret = crypto_shash_finup(ctx->shash, tmpbuf, req->nbytes, + req->result); + kfree(tmpbuf); + } else { + /* Otherwise call the internal function which uses SPU hw */ + return __ahash_finup(req); + } +ahash_finup_free: + kfree(tmpbuf); + +ahash_finup_exit: + /* Done with hash, can deallocate it now */ + crypto_free_shash(ctx->shash->tfm); + kfree(ctx->shash); + return ret; +} + +static int ahash_digest(struct ahash_request *req) +{ + int err = 0; + + flow_log("ahash_digest() nbytes:%u\n", req->nbytes); + + /* whole thing at once */ + err = __ahash_init(req); + if (!err) + err = __ahash_finup(req); + + return err; +} + +static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, + unsigned int keylen) +{ + struct iproc_ctx_s *ctx = crypto_ahash_ctx(ahash); + + flow_log("%s() ahash:%p key:%p keylen:%u\n", + __func__, ahash, key, keylen); + flow_dump(" key: ", key, keylen); + + if (ctx->auth.alg == HASH_ALG_AES) { + switch (keylen) { + case AES_KEYSIZE_128: + ctx->cipher_type = CIPHER_TYPE_AES128; + break; + case AES_KEYSIZE_192: + ctx->cipher_type = CIPHER_TYPE_AES192; + break; + case AES_KEYSIZE_256: + ctx->cipher_type = CIPHER_TYPE_AES256; + break; + default: + pr_err("%s() Error: Invalid key length\n", __func__); + return -EINVAL; + } + } else { + pr_err("%s() Error: unknown hash alg\n", __func__); + return -EINVAL; + } + memcpy(ctx->authkey, key, keylen); + ctx->authkeylen = keylen; + + return 0; +} + +static int ahash_export(struct ahash_request *req, void *out) +{ + const struct iproc_reqctx_s *rctx = ahash_request_ctx(req); + struct spu_hash_export_s *spu_exp = (struct spu_hash_export_s *)out; + + spu_exp->total_todo = rctx->total_todo; + spu_exp->total_sent = rctx->total_sent; + spu_exp->is_sw_hmac = rctx->is_sw_hmac; + memcpy(spu_exp->hash_carry, rctx->hash_carry, sizeof(rctx->hash_carry)); + spu_exp->hash_carry_len = rctx->hash_carry_len; + memcpy(spu_exp->incr_hash, rctx->incr_hash, sizeof(rctx->incr_hash)); + + return 0; +} + +static int ahash_import(struct ahash_request *req, const void *in) +{ + struct iproc_reqctx_s *rctx = ahash_request_ctx(req); + struct spu_hash_export_s *spu_exp = (struct spu_hash_export_s *)in; + + rctx->total_todo = spu_exp->total_todo; + rctx->total_sent = spu_exp->total_sent; + rctx->is_sw_hmac = spu_exp->is_sw_hmac; + memcpy(rctx->hash_carry, spu_exp->hash_carry, sizeof(rctx->hash_carry)); + rctx->hash_carry_len = spu_exp->hash_carry_len; + memcpy(rctx->incr_hash, spu_exp->incr_hash, sizeof(rctx->incr_hash)); + + return 0; +} + +static int ahash_hmac_setkey(struct crypto_ahash *ahash, const u8 *key, + unsigned int keylen) +{ + struct iproc_ctx_s *ctx = crypto_ahash_ctx(ahash); + unsigned int blocksize = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(ahash)); + unsigned int digestsize = crypto_ahash_digestsize(ahash); + unsigned int index; + int rc; + + flow_log("%s() ahash:%p key:%p keylen:%u blksz:%u digestsz:%u\n", + __func__, ahash, key, keylen, blocksize, digestsize); + flow_dump(" key: ", key, keylen); + + if (keylen > blocksize) { + switch (ctx->auth.alg) { + case HASH_ALG_MD5: + rc = do_shash("md5", ctx->authkey, key, keylen, NULL, + 0, NULL, 0); + break; + case HASH_ALG_SHA1: + rc = do_shash("sha1", ctx->authkey, key, keylen, NULL, + 0, NULL, 0); + break; + case HASH_ALG_SHA224: + rc = do_shash("sha224", ctx->authkey, key, keylen, NULL, + 0, NULL, 0); + break; + case HASH_ALG_SHA256: + rc = do_shash("sha256", ctx->authkey, key, keylen, NULL, + 0, NULL, 0); + break; + case HASH_ALG_SHA384: + rc = do_shash("sha384", ctx->authkey, key, keylen, NULL, + 0, NULL, 0); + break; + case HASH_ALG_SHA512: + rc = do_shash("sha512", ctx->authkey, key, keylen, NULL, + 0, NULL, 0); + break; + case HASH_ALG_SHA3_224: + rc = do_shash("sha3-224", ctx->authkey, key, keylen, + NULL, 0, NULL, 0); + break; + case HASH_ALG_SHA3_256: + rc = do_shash("sha3-256", ctx->authkey, key, keylen, + NULL, 0, NULL, 0); + break; + case HASH_ALG_SHA3_384: + rc = do_shash("sha3-384", ctx->authkey, key, keylen, + NULL, 0, NULL, 0); + break; + case HASH_ALG_SHA3_512: + rc = do_shash("sha3-512", ctx->authkey, key, keylen, + NULL, 0, NULL, 0); + break; + default: + pr_err("%s() Error: unknown hash alg\n", __func__); + return -EINVAL; + } + if (rc < 0) { + pr_err("%s() Error %d computing shash for %s\n", + __func__, rc, hash_alg_name[ctx->auth.alg]); + return rc; + } + ctx->authkeylen = digestsize; + + flow_log(" keylen > digestsize... hashed\n"); + flow_dump(" newkey: ", ctx->authkey, ctx->authkeylen); + } else { + memcpy(ctx->authkey, key, keylen); + ctx->authkeylen = keylen; + } + + /* + * Full HMAC operation in SPUM is not verified, + * So keeping the generation of IPAD, OPAD and + * outer hashing in software. + */ + if (iproc_priv.spu.spu_type == SPU_TYPE_SPUM) { + memcpy(ctx->ipad, ctx->authkey, ctx->authkeylen); + memset(ctx->ipad + ctx->authkeylen, 0, + blocksize - ctx->authkeylen); + ctx->authkeylen = 0; + memcpy(ctx->opad, ctx->ipad, blocksize); + + for (index = 0; index < blocksize; index++) { + ctx->ipad[index] ^= 0x36; + ctx->opad[index] ^= 0x5c; + } + + flow_dump(" ipad: ", ctx->ipad, blocksize); + flow_dump(" opad: ", ctx->opad, blocksize); + } + ctx->digestsize = digestsize; + atomic_inc(&iproc_priv.setkey_cnt[SPU_OP_HMAC]); + + return 0; +} + +static int ahash_hmac_init(struct ahash_request *req) +{ + struct iproc_reqctx_s *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm); + unsigned int blocksize = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + + flow_log("ahash_hmac_init()\n"); + + /* init the context as a hash */ + ahash_init(req); + + if (!spu_no_incr_hash(ctx)) { + /* SPU-M can do incr hashing but needs sw for outer HMAC */ + rctx->is_sw_hmac = true; + ctx->auth.mode = HASH_MODE_HASH; + /* start with a prepended ipad */ + memcpy(rctx->hash_carry, ctx->ipad, blocksize); + rctx->hash_carry_len = blocksize; + rctx->total_todo += blocksize; + } + + return 0; +} + +static int ahash_hmac_update(struct ahash_request *req) +{ + flow_log("ahash_hmac_update() nbytes:%u\n", req->nbytes); + + if (!req->nbytes) + return 0; + + return ahash_update(req); +} + +static int ahash_hmac_final(struct ahash_request *req) +{ + flow_log("ahash_hmac_final() nbytes:%u\n", req->nbytes); + + return ahash_final(req); +} + +static int ahash_hmac_finup(struct ahash_request *req) +{ + flow_log("ahash_hmac_finupl() nbytes:%u\n", req->nbytes); + + return ahash_finup(req); +} + +static int ahash_hmac_digest(struct ahash_request *req) +{ + struct iproc_reqctx_s *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct iproc_ctx_s *ctx = crypto_ahash_ctx(tfm); + unsigned int blocksize = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + + flow_log("ahash_hmac_digest() nbytes:%u\n", req->nbytes); + + /* Perform initialization and then call finup */ + __ahash_init(req); + + if (iproc_priv.spu.spu_type == SPU_TYPE_SPU2) { + /* + * SPU2 supports full HMAC implementation in the + * hardware, need not to generate IPAD, OPAD and + * outer hash in software. + * Only for hash key len > hash block size, SPU2 + * expects to perform hashing on the key, shorten + * it to digest size and feed it as hash key. + */ + rctx->is_sw_hmac = false; + ctx->auth.mode = HASH_MODE_HMAC; + } else { + rctx->is_sw_hmac = true; + ctx->auth.mode = HASH_MODE_HASH; + /* start with a prepended ipad */ + memcpy(rctx->hash_carry, ctx->ipad, blocksize); + rctx->hash_carry_len = blocksize; + rctx->total_todo += blocksize; + } + + return __ahash_finup(req); +} + +/* aead helpers */ + +static int aead_need_fallback(struct aead_request *req) +{ + struct iproc_reqctx_s *rctx = aead_request_ctx(req); + struct spu_hw *spu = &iproc_priv.spu; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct iproc_ctx_s *ctx = crypto_aead_ctx(aead); + u32 payload_len; + + /* + * SPU hardware cannot handle the AES-GCM/CCM case where plaintext + * and AAD are both 0 bytes long. So use fallback in this case. + */ + if (((ctx->cipher.mode == CIPHER_MODE_GCM) || + (ctx->cipher.mode == CIPHER_MODE_CCM)) && + (req->assoclen == 0)) { + if ((rctx->is_encrypt && (req->cryptlen == 0)) || + (!rctx->is_encrypt && (req->cryptlen == ctx->digestsize))) { + flow_log("AES GCM/CCM needs fallback for 0 len req\n"); + return 1; + } + } + + /* SPU-M hardware only supports CCM digest size of 8, 12, or 16 bytes */ + if ((ctx->cipher.mode == CIPHER_MODE_CCM) && + (spu->spu_type == SPU_TYPE_SPUM) && + (ctx->digestsize != 8) && (ctx->digestsize != 12) && + (ctx->digestsize != 16)) { + flow_log("%s() AES CCM needs fallbck for digest size %d\n", + __func__, ctx->digestsize); + return 1; + } + + /* + * SPU-M on NSP has an issue where AES-CCM hash is not correct + * when AAD size is 0 + */ + if ((ctx->cipher.mode == CIPHER_MODE_CCM) && + (spu->spu_subtype == SPU_SUBTYPE_SPUM_NSP) && + (req->assoclen == 0)) { + flow_log("%s() AES_CCM needs fallback for 0 len AAD on NSP\n", + __func__); + return 1; + } + + payload_len = req->cryptlen; + if (spu->spu_type == SPU_TYPE_SPUM) + payload_len += req->assoclen; + + flow_log("%s() payload len: %u\n", __func__, payload_len); + + if (ctx->max_payload == SPU_MAX_PAYLOAD_INF) + return 0; + else + return payload_len > ctx->max_payload; +} + +static void aead_complete(struct crypto_async_request *areq, int err) +{ + struct aead_request *req = + container_of(areq, struct aead_request, base); + struct iproc_reqctx_s *rctx = aead_request_ctx(req); + struct crypto_aead *aead = crypto_aead_reqtfm(req); + + flow_log("%s() err:%d\n", __func__, err); + + areq->tfm = crypto_aead_tfm(aead); + + areq->complete = rctx->old_complete; + areq->data = rctx->old_data; + + areq->complete(areq, err); +} + +static int aead_do_fallback(struct aead_request *req, bool is_encrypt) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct iproc_reqctx_s *rctx = aead_request_ctx(req); + struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm); + int err; + u32 req_flags; + + flow_log("%s() enc:%u\n", __func__, is_encrypt); + + if (ctx->fallback_cipher) { + /* Store the cipher tfm and then use the fallback tfm */ + rctx->old_tfm = tfm; + aead_request_set_tfm(req, ctx->fallback_cipher); + /* + * Save the callback and chain ourselves in, so we can restore + * the tfm + */ + rctx->old_complete = req->base.complete; + rctx->old_data = req->base.data; + req_flags = aead_request_flags(req); + aead_request_set_callback(req, req_flags, aead_complete, req); + err = is_encrypt ? crypto_aead_encrypt(req) : + crypto_aead_decrypt(req); + + if (err == 0) { + /* + * fallback was synchronous (did not return + * -EINPROGRESS). So restore request state here. + */ + aead_request_set_callback(req, req_flags, + rctx->old_complete, req); + req->base.data = rctx->old_data; + aead_request_set_tfm(req, aead); + flow_log("%s() fallback completed successfully\n\n", + __func__); + } + } else { + err = -EINVAL; + } + + return err; +} + +static int aead_enqueue(struct aead_request *req, bool is_encrypt) +{ + struct iproc_reqctx_s *rctx = aead_request_ctx(req); + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct iproc_ctx_s *ctx = crypto_aead_ctx(aead); + int err; + + flow_log("%s() enc:%u\n", __func__, is_encrypt); + + if (req->assoclen > MAX_ASSOC_SIZE) { + pr_err + ("%s() Error: associated data too long. (%u > %u bytes)\n", + __func__, req->assoclen, MAX_ASSOC_SIZE); + return -EINVAL; + } + + rctx->gfp = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + rctx->parent = &req->base; + rctx->is_encrypt = is_encrypt; + rctx->bd_suppress = false; + rctx->total_todo = req->cryptlen; + rctx->src_sent = 0; + rctx->total_sent = 0; + rctx->total_received = 0; + rctx->is_sw_hmac = false; + rctx->ctx = ctx; + memset(&rctx->mb_mssg, 0, sizeof(struct brcm_message)); + + /* assoc data is at start of src sg */ + rctx->assoc = req->src; + + /* + * Init current position in src scatterlist to be after assoc data. + * src_skip set to buffer offset where data begins. (Assoc data could + * end in the middle of a buffer.) + */ + if (spu_sg_at_offset(req->src, req->assoclen, &rctx->src_sg, + &rctx->src_skip) < 0) { + pr_err("%s() Error: Unable to find start of src data\n", + __func__); + return -EINVAL; + } + + rctx->src_nents = 0; + rctx->dst_nents = 0; + if (req->dst == req->src) { + rctx->dst_sg = rctx->src_sg; + rctx->dst_skip = rctx->src_skip; + } else { + /* + * Expect req->dst to have room for assoc data followed by + * output data and ICV, if encrypt. So initialize dst_sg + * to point beyond assoc len offset. + */ + if (spu_sg_at_offset(req->dst, req->assoclen, &rctx->dst_sg, + &rctx->dst_skip) < 0) { + pr_err("%s() Error: Unable to find start of dst data\n", + __func__); + return -EINVAL; + } + } + + if (ctx->cipher.mode == CIPHER_MODE_CBC || + ctx->cipher.mode == CIPHER_MODE_CTR || + ctx->cipher.mode == CIPHER_MODE_OFB || + ctx->cipher.mode == CIPHER_MODE_XTS || + ctx->cipher.mode == CIPHER_MODE_GCM) { + rctx->iv_ctr_len = + ctx->salt_len + + crypto_aead_ivsize(crypto_aead_reqtfm(req)); + } else if (ctx->cipher.mode == CIPHER_MODE_CCM) { + rctx->iv_ctr_len = CCM_AES_IV_SIZE; + } else { + rctx->iv_ctr_len = 0; + } + + rctx->hash_carry_len = 0; + + flow_log(" src sg: %p\n", req->src); + flow_log(" rctx->src_sg: %p, src_skip %u\n", + rctx->src_sg, rctx->src_skip); + flow_log(" assoc: %p, assoclen %u\n", rctx->assoc, req->assoclen); + flow_log(" dst sg: %p\n", req->dst); + flow_log(" rctx->dst_sg: %p, dst_skip %u\n", + rctx->dst_sg, rctx->dst_skip); + flow_log(" iv_ctr_len:%u\n", rctx->iv_ctr_len); + flow_dump(" iv: ", req->iv, rctx->iv_ctr_len); + flow_log(" authkeylen:%u\n", ctx->authkeylen); + flow_log(" is_esp: %s\n", ctx->is_esp ? "yes" : "no"); + + if (ctx->max_payload == SPU_MAX_PAYLOAD_INF) + flow_log(" max_payload infinite"); + else + flow_log(" max_payload: %u\n", ctx->max_payload); + + if (unlikely(aead_need_fallback(req))) + return aead_do_fallback(req, is_encrypt); + + /* + * Do memory allocations for request after fallback check, because if we + * do fallback, we won't call finish_req() to dealloc. + */ + if (rctx->iv_ctr_len) { + if (ctx->salt_len) + memcpy(rctx->msg_buf.iv_ctr + ctx->salt_offset, + ctx->salt, ctx->salt_len); + memcpy(rctx->msg_buf.iv_ctr + ctx->salt_offset + ctx->salt_len, + req->iv, + rctx->iv_ctr_len - ctx->salt_len - ctx->salt_offset); + } + + rctx->chan_idx = select_channel(); + err = handle_aead_req(rctx); + if (err != -EINPROGRESS) + /* synchronous result */ + spu_chunk_cleanup(rctx); + + return err; +} + +static int aead_authenc_setkey(struct crypto_aead *cipher, + const u8 *key, unsigned int keylen) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher); + struct crypto_tfm *tfm = crypto_aead_tfm(cipher); + struct rtattr *rta = (void *)key; + struct crypto_authenc_key_param *param; + const u8 *origkey = key; + const unsigned int origkeylen = keylen; + + int ret = 0; + + flow_log("%s() aead:%p key:%p keylen:%u\n", __func__, cipher, key, + keylen); + flow_dump(" key: ", key, keylen); + + if (!RTA_OK(rta, keylen)) + goto badkey; + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + goto badkey; + if (RTA_PAYLOAD(rta) < sizeof(*param)) + goto badkey; + + param = RTA_DATA(rta); + ctx->enckeylen = be32_to_cpu(param->enckeylen); + + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + + if (keylen < ctx->enckeylen) + goto badkey; + if (ctx->enckeylen > MAX_KEY_SIZE) + goto badkey; + + ctx->authkeylen = keylen - ctx->enckeylen; + + if (ctx->authkeylen > MAX_KEY_SIZE) + goto badkey; + + memcpy(ctx->enckey, key + ctx->authkeylen, ctx->enckeylen); + /* May end up padding auth key. So make sure it's zeroed. */ + memset(ctx->authkey, 0, sizeof(ctx->authkey)); + memcpy(ctx->authkey, key, ctx->authkeylen); + + switch (ctx->alg->cipher_info.alg) { + case CIPHER_ALG_DES: + if (ctx->enckeylen == DES_KEY_SIZE) { + u32 tmp[DES_EXPKEY_WORDS]; + u32 flags = CRYPTO_TFM_RES_WEAK_KEY; + + if (des_ekey(tmp, key) == 0) { + if (crypto_aead_get_flags(cipher) & + CRYPTO_TFM_REQ_WEAK_KEY) { + crypto_aead_set_flags(cipher, flags); + return -EINVAL; + } + } + + ctx->cipher_type = CIPHER_TYPE_DES; + } else { + goto badkey; + } + break; + case CIPHER_ALG_3DES: + if (ctx->enckeylen == (DES_KEY_SIZE * 3)) { + const u32 *K = (const u32 *)key; + u32 flags = CRYPTO_TFM_RES_BAD_KEY_SCHED; + + if (!((K[0] ^ K[2]) | (K[1] ^ K[3])) || + !((K[2] ^ K[4]) | (K[3] ^ K[5]))) { + crypto_aead_set_flags(cipher, flags); + return -EINVAL; + } + + ctx->cipher_type = CIPHER_TYPE_3DES; + } else { + crypto_aead_set_flags(cipher, + CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + break; + case CIPHER_ALG_AES: + switch (ctx->enckeylen) { + case AES_KEYSIZE_128: + ctx->cipher_type = CIPHER_TYPE_AES128; + break; + case AES_KEYSIZE_192: + ctx->cipher_type = CIPHER_TYPE_AES192; + break; + case AES_KEYSIZE_256: + ctx->cipher_type = CIPHER_TYPE_AES256; + break; + default: + goto badkey; + } + break; + case CIPHER_ALG_RC4: + ctx->cipher_type = CIPHER_TYPE_INIT; + break; + default: + pr_err("%s() Error: Unknown cipher alg\n", __func__); + return -EINVAL; + } + + flow_log(" enckeylen:%u authkeylen:%u\n", ctx->enckeylen, + ctx->authkeylen); + flow_dump(" enc: ", ctx->enckey, ctx->enckeylen); + flow_dump(" auth: ", ctx->authkey, ctx->authkeylen); + + /* setkey the fallback just in case we needto use it */ + if (ctx->fallback_cipher) { + flow_log(" running fallback setkey()\n"); + + ctx->fallback_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + ctx->fallback_cipher->base.crt_flags |= + tfm->crt_flags & CRYPTO_TFM_REQ_MASK; + ret = + crypto_aead_setkey(ctx->fallback_cipher, origkey, + origkeylen); + if (ret) { + flow_log(" fallback setkey() returned:%d\n", ret); + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= + (ctx->fallback_cipher->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + } + + ctx->spu_resp_hdr_len = spu->spu_response_hdr_len(ctx->authkeylen, + ctx->enckeylen, + false); + + atomic_inc(&iproc_priv.setkey_cnt[SPU_OP_AEAD]); + + return ret; + +badkey: + ctx->enckeylen = 0; + ctx->authkeylen = 0; + ctx->digestsize = 0; + + crypto_aead_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; +} + +static int aead_gcm_ccm_setkey(struct crypto_aead *cipher, + const u8 *key, unsigned int keylen) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher); + struct crypto_tfm *tfm = crypto_aead_tfm(cipher); + + int ret = 0; + + flow_log("%s() keylen:%u\n", __func__, keylen); + flow_dump(" key: ", key, keylen); + + if (!ctx->is_esp) + ctx->digestsize = keylen; + + ctx->enckeylen = keylen; + ctx->authkeylen = 0; + memcpy(ctx->enckey, key, ctx->enckeylen); + + switch (ctx->enckeylen) { + case AES_KEYSIZE_128: + ctx->cipher_type = CIPHER_TYPE_AES128; + break; + case AES_KEYSIZE_192: + ctx->cipher_type = CIPHER_TYPE_AES192; + break; + case AES_KEYSIZE_256: + ctx->cipher_type = CIPHER_TYPE_AES256; + break; + default: + goto badkey; + } + + flow_log(" enckeylen:%u authkeylen:%u\n", ctx->enckeylen, + ctx->authkeylen); + flow_dump(" enc: ", ctx->enckey, ctx->enckeylen); + flow_dump(" auth: ", ctx->authkey, ctx->authkeylen); + + /* setkey the fallback just in case we need to use it */ + if (ctx->fallback_cipher) { + flow_log(" running fallback setkey()\n"); + + ctx->fallback_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + ctx->fallback_cipher->base.crt_flags |= + tfm->crt_flags & CRYPTO_TFM_REQ_MASK; + ret = crypto_aead_setkey(ctx->fallback_cipher, key, + keylen + ctx->salt_len); + if (ret) { + flow_log(" fallback setkey() returned:%d\n", ret); + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= + (ctx->fallback_cipher->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + } + + ctx->spu_resp_hdr_len = spu->spu_response_hdr_len(ctx->authkeylen, + ctx->enckeylen, + false); + + atomic_inc(&iproc_priv.setkey_cnt[SPU_OP_AEAD]); + + flow_log(" enckeylen:%u authkeylen:%u\n", ctx->enckeylen, + ctx->authkeylen); + + return ret; + +badkey: + ctx->enckeylen = 0; + ctx->authkeylen = 0; + ctx->digestsize = 0; + + crypto_aead_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; +} + +/** + * aead_gcm_esp_setkey() - setkey() operation for ESP variant of GCM AES. + * @cipher: AEAD structure + * @key: Key followed by 4 bytes of salt + * @keylen: Length of key plus salt, in bytes + * + * Extracts salt from key and stores it to be prepended to IV on each request. + * Digest is always 16 bytes + * + * Return: Value from generic gcm setkey. + */ +static int aead_gcm_esp_setkey(struct crypto_aead *cipher, + const u8 *key, unsigned int keylen) +{ + struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher); + + flow_log("%s\n", __func__); + ctx->salt_len = GCM_ESP_SALT_SIZE; + ctx->salt_offset = GCM_ESP_SALT_OFFSET; + memcpy(ctx->salt, key + keylen - GCM_ESP_SALT_SIZE, GCM_ESP_SALT_SIZE); + keylen -= GCM_ESP_SALT_SIZE; + ctx->digestsize = GCM_ESP_DIGESTSIZE; + ctx->is_esp = true; + flow_dump("salt: ", ctx->salt, GCM_ESP_SALT_SIZE); + + return aead_gcm_ccm_setkey(cipher, key, keylen); +} + +/** + * rfc4543_gcm_esp_setkey() - setkey operation for RFC4543 variant of GCM/GMAC. + * cipher: AEAD structure + * key: Key followed by 4 bytes of salt + * keylen: Length of key plus salt, in bytes + * + * Extracts salt from key and stores it to be prepended to IV on each request. + * Digest is always 16 bytes + * + * Return: Value from generic gcm setkey. + */ +static int rfc4543_gcm_esp_setkey(struct crypto_aead *cipher, + const u8 *key, unsigned int keylen) +{ + struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher); + + flow_log("%s\n", __func__); + ctx->salt_len = GCM_ESP_SALT_SIZE; + ctx->salt_offset = GCM_ESP_SALT_OFFSET; + memcpy(ctx->salt, key + keylen - GCM_ESP_SALT_SIZE, GCM_ESP_SALT_SIZE); + keylen -= GCM_ESP_SALT_SIZE; + ctx->digestsize = GCM_ESP_DIGESTSIZE; + ctx->is_esp = true; + ctx->is_rfc4543 = true; + flow_dump("salt: ", ctx->salt, GCM_ESP_SALT_SIZE); + + return aead_gcm_ccm_setkey(cipher, key, keylen); +} + +/** + * aead_ccm_esp_setkey() - setkey() operation for ESP variant of CCM AES. + * @cipher: AEAD structure + * @key: Key followed by 4 bytes of salt + * @keylen: Length of key plus salt, in bytes + * + * Extracts salt from key and stores it to be prepended to IV on each request. + * Digest is always 16 bytes + * + * Return: Value from generic ccm setkey. + */ +static int aead_ccm_esp_setkey(struct crypto_aead *cipher, + const u8 *key, unsigned int keylen) +{ + struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher); + + flow_log("%s\n", __func__); + ctx->salt_len = CCM_ESP_SALT_SIZE; + ctx->salt_offset = CCM_ESP_SALT_OFFSET; + memcpy(ctx->salt, key + keylen - CCM_ESP_SALT_SIZE, CCM_ESP_SALT_SIZE); + keylen -= CCM_ESP_SALT_SIZE; + ctx->is_esp = true; + flow_dump("salt: ", ctx->salt, CCM_ESP_SALT_SIZE); + + return aead_gcm_ccm_setkey(cipher, key, keylen); +} + +static int aead_setauthsize(struct crypto_aead *cipher, unsigned int authsize) +{ + struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher); + int ret = 0; + + flow_log("%s() authkeylen:%u authsize:%u\n", + __func__, ctx->authkeylen, authsize); + + ctx->digestsize = authsize; + + /* setkey the fallback just in case we needto use it */ + if (ctx->fallback_cipher) { + flow_log(" running fallback setauth()\n"); + + ret = crypto_aead_setauthsize(ctx->fallback_cipher, authsize); + if (ret) + flow_log(" fallback setauth() returned:%d\n", ret); + } + + return ret; +} + +static int aead_encrypt(struct aead_request *req) +{ + flow_log("%s() cryptlen:%u %08x\n", __func__, req->cryptlen, + req->cryptlen); + dump_sg(req->src, 0, req->cryptlen + req->assoclen); + flow_log(" assoc_len:%u\n", req->assoclen); + + return aead_enqueue(req, true); +} + +static int aead_decrypt(struct aead_request *req) +{ + flow_log("%s() cryptlen:%u\n", __func__, req->cryptlen); + dump_sg(req->src, 0, req->cryptlen + req->assoclen); + flow_log(" assoc_len:%u\n", req->assoclen); + + return aead_enqueue(req, false); +} + +/* ==================== Supported Cipher Algorithms ==================== */ + +static struct iproc_alg_s driver_algs[] = { + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK + }, + .setkey = aead_gcm_ccm_setkey, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_GCM, + }, + .auth_info = { + .alg = HASH_ALG_AES, + .mode = HASH_MODE_GCM, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "ccm(aes)", + .cra_driver_name = "ccm-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK + }, + .setkey = aead_gcm_ccm_setkey, + .ivsize = CCM_AES_IV_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_CCM, + }, + .auth_info = { + .alg = HASH_ALG_AES, + .mode = HASH_MODE_CCM, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "gcm-aes-esp-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK + }, + .setkey = aead_gcm_esp_setkey, + .ivsize = GCM_ESP_IV_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_GCM, + }, + .auth_info = { + .alg = HASH_ALG_AES, + .mode = HASH_MODE_GCM, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "rfc4309(ccm(aes))", + .cra_driver_name = "ccm-aes-esp-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK + }, + .setkey = aead_ccm_esp_setkey, + .ivsize = CCM_AES_IV_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_CCM, + }, + .auth_info = { + .alg = HASH_ALG_AES, + .mode = HASH_MODE_CCM, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "rfc4543(gcm(aes))", + .cra_driver_name = "gmac-aes-esp-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK + }, + .setkey = rfc4543_gcm_esp_setkey, + .ivsize = GCM_ESP_IV_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_GCM, + }, + .auth_info = { + .alg = HASH_ALG_AES, + .mode = HASH_MODE_GCM, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(aes))", + .cra_driver_name = "authenc-hmac-md5-cbc-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_MD5, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha1-cbc-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA1, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha256-cbc-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA256, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(des))", + .cra_driver_name = "authenc-hmac-md5-cbc-des-iproc", + .cra_blocksize = DES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_MD5, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(des))", + .cra_driver_name = "authenc-hmac-sha1-cbc-des-iproc", + .cra_blocksize = DES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA1, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224),cbc(des))", + .cra_driver_name = "authenc-hmac-sha224-cbc-des-iproc", + .cra_blocksize = DES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA224, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(des))", + .cra_driver_name = "authenc-hmac-sha256-cbc-des-iproc", + .cra_blocksize = DES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA256, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha384),cbc(des))", + .cra_driver_name = "authenc-hmac-sha384-cbc-des-iproc", + .cra_blocksize = DES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA384, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha512),cbc(des))", + .cra_driver_name = "authenc-hmac-sha512-cbc-des-iproc", + .cra_blocksize = DES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA512, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-md5-cbc-des3-iproc", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_3DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_MD5, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha1-cbc-des3-iproc", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_3DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA1, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha224-cbc-des3-iproc", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_3DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA224, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha256-cbc-des3-iproc", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_3DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA256, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha384),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha384-cbc-des3-iproc", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_3DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA384, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + { + .type = CRYPTO_ALG_TYPE_AEAD, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha512),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha512-cbc-des3-iproc", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC + }, + .setkey = aead_authenc_setkey, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, + }, + .cipher_info = { + .alg = CIPHER_ALG_3DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_SHA512, + .mode = HASH_MODE_HMAC, + }, + .auth_first = 0, + }, + +/* ABLKCIPHER algorithms. */ + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "ecb(arc4)", + .cra_driver_name = "ecb-arc4-iproc", + .cra_blocksize = ARC4_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = ARC4_MIN_KEY_SIZE, + .max_keysize = ARC4_MAX_KEY_SIZE, + .ivsize = 0, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_RC4, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "ofb(des)", + .cra_driver_name = "ofb-des-iproc", + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_DES, + .mode = CIPHER_MODE_OFB, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "cbc(des)", + .cra_driver_name = "cbc-des-iproc", + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "ecb(des)", + .cra_driver_name = "ecb-des-iproc", + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = 0, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_DES, + .mode = CIPHER_MODE_ECB, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "ofb(des3_ede)", + .cra_driver_name = "ofb-des3-iproc", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_3DES, + .mode = CIPHER_MODE_OFB, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3-iproc", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_3DES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3-iproc", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = 0, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_3DES, + .mode = CIPHER_MODE_ECB, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "ofb(aes)", + .cra_driver_name = "ofb-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_OFB, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_CBC, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = 0, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_ECB, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ablkcipher = { + /* .geniv = "chainiv", */ + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_CTR, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, +{ + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .alg.crypto = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_AES, + .mode = CIPHER_MODE_XTS, + }, + .auth_info = { + .alg = HASH_ALG_NONE, + .mode = HASH_MODE_NONE, + }, + }, + +/* AHASH algorithms. */ + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = MD5_DIGEST_SIZE, + .halg.base = { + .cra_name = "md5", + .cra_driver_name = "md5-iproc", + .cra_blocksize = MD5_BLOCK_WORDS * 4, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_MD5, + .mode = HASH_MODE_HASH, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = MD5_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(md5)", + .cra_driver_name = "hmac-md5-iproc", + .cra_blocksize = MD5_BLOCK_WORDS * 4, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_MD5, + .mode = HASH_MODE_HMAC, + }, + }, + {.type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-iproc", + .cra_blocksize = SHA1_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA1, + .mode = HASH_MODE_HASH, + }, + }, + {.type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "hmac-sha1-iproc", + .cra_blocksize = SHA1_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA1, + .mode = HASH_MODE_HMAC, + }, + }, + {.type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-iproc", + .cra_blocksize = SHA224_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA224, + .mode = HASH_MODE_HASH, + }, + }, + {.type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "hmac-sha224-iproc", + .cra_blocksize = SHA224_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA224, + .mode = HASH_MODE_HMAC, + }, + }, + {.type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-iproc", + .cra_blocksize = SHA256_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA256, + .mode = HASH_MODE_HASH, + }, + }, + {.type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "hmac-sha256-iproc", + .cra_blocksize = SHA256_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA256, + .mode = HASH_MODE_HMAC, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-iproc", + .cra_blocksize = SHA384_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA384, + .mode = HASH_MODE_HASH, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "hmac-sha384-iproc", + .cra_blocksize = SHA384_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA384, + .mode = HASH_MODE_HMAC, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-iproc", + .cra_blocksize = SHA512_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA512, + .mode = HASH_MODE_HASH, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "hmac-sha512-iproc", + .cra_blocksize = SHA512_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA512, + .mode = HASH_MODE_HMAC, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA3_224_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha3-224", + .cra_driver_name = "sha3-224-iproc", + .cra_blocksize = SHA3_224_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA3_224, + .mode = HASH_MODE_HASH, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA3_224_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha3-224)", + .cra_driver_name = "hmac-sha3-224-iproc", + .cra_blocksize = SHA3_224_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA3_224, + .mode = HASH_MODE_HMAC + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA3_256_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha3-256", + .cra_driver_name = "sha3-256-iproc", + .cra_blocksize = SHA3_256_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA3_256, + .mode = HASH_MODE_HASH, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA3_256_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha3-256)", + .cra_driver_name = "hmac-sha3-256-iproc", + .cra_blocksize = SHA3_256_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA3_256, + .mode = HASH_MODE_HMAC, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA3_384_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha3-384", + .cra_driver_name = "sha3-384-iproc", + .cra_blocksize = SHA3_224_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA3_384, + .mode = HASH_MODE_HASH, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA3_384_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha3-384)", + .cra_driver_name = "hmac-sha3-384-iproc", + .cra_blocksize = SHA3_384_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA3_384, + .mode = HASH_MODE_HMAC, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA3_512_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha3-512", + .cra_driver_name = "sha3-512-iproc", + .cra_blocksize = SHA3_512_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA3_512, + .mode = HASH_MODE_HASH, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = SHA3_512_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha3-512)", + .cra_driver_name = "hmac-sha3-512-iproc", + .cra_blocksize = SHA3_512_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_SHA3_512, + .mode = HASH_MODE_HMAC, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = AES_BLOCK_SIZE, + .halg.base = { + .cra_name = "xcbc(aes)", + .cra_driver_name = "xcbc-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_AES, + .mode = HASH_MODE_XCBC, + }, + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .alg.hash = { + .halg.digestsize = AES_BLOCK_SIZE, + .halg.base = { + .cra_name = "cmac(aes)", + .cra_driver_name = "cmac-aes-iproc", + .cra_blocksize = AES_BLOCK_SIZE, + } + }, + .cipher_info = { + .alg = CIPHER_ALG_NONE, + .mode = CIPHER_MODE_NONE, + }, + .auth_info = { + .alg = HASH_ALG_AES, + .mode = HASH_MODE_CMAC, + }, + }, +}; + +static int generic_cra_init(struct crypto_tfm *tfm, + struct iproc_alg_s *cipher_alg) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm); + unsigned int blocksize = crypto_tfm_alg_blocksize(tfm); + + flow_log("%s()\n", __func__); + + ctx->alg = cipher_alg; + ctx->cipher = cipher_alg->cipher_info; + ctx->auth = cipher_alg->auth_info; + ctx->auth_first = cipher_alg->auth_first; + ctx->max_payload = spu->spu_ctx_max_payload(ctx->cipher.alg, + ctx->cipher.mode, + blocksize); + ctx->fallback_cipher = NULL; + + ctx->enckeylen = 0; + ctx->authkeylen = 0; + + atomic_inc(&iproc_priv.stream_count); + atomic_inc(&iproc_priv.session_count); + + return 0; +} + +static int ablkcipher_cra_init(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct iproc_alg_s *cipher_alg; + + flow_log("%s()\n", __func__); + + tfm->crt_ablkcipher.reqsize = sizeof(struct iproc_reqctx_s); + + cipher_alg = container_of(alg, struct iproc_alg_s, alg.crypto); + return generic_cra_init(tfm, cipher_alg); +} + +static int ahash_cra_init(struct crypto_tfm *tfm) +{ + int err; + struct crypto_alg *alg = tfm->__crt_alg; + struct iproc_alg_s *cipher_alg; + + cipher_alg = container_of(__crypto_ahash_alg(alg), struct iproc_alg_s, + alg.hash); + + err = generic_cra_init(tfm, cipher_alg); + flow_log("%s()\n", __func__); + + /* + * export state size has to be < 512 bytes. So don't include msg bufs + * in state size. + */ + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct iproc_reqctx_s)); + + return err; +} + +static int aead_cra_init(struct crypto_aead *aead) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm); + struct crypto_alg *alg = tfm->__crt_alg; + struct aead_alg *aalg = container_of(alg, struct aead_alg, base); + struct iproc_alg_s *cipher_alg = container_of(aalg, struct iproc_alg_s, + alg.aead); + + int err = generic_cra_init(tfm, cipher_alg); + + flow_log("%s()\n", __func__); + + crypto_aead_set_reqsize(aead, sizeof(struct iproc_reqctx_s)); + ctx->is_esp = false; + ctx->salt_len = 0; + ctx->salt_offset = 0; + + /* random first IV */ + get_random_bytes(ctx->iv, MAX_IV_SIZE); + flow_dump(" iv: ", ctx->iv, MAX_IV_SIZE); + + if (!err) { + if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { + flow_log("%s() creating fallback cipher\n", __func__); + + ctx->fallback_cipher = + crypto_alloc_aead(alg->cra_name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fallback_cipher)) { + pr_err("%s() Error: failed to allocate fallback for %s\n", + __func__, alg->cra_name); + return PTR_ERR(ctx->fallback_cipher); + } + } + } + + return err; +} + +static void generic_cra_exit(struct crypto_tfm *tfm) +{ + atomic_dec(&iproc_priv.session_count); +} + +static void aead_cra_exit(struct crypto_aead *aead) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm); + + generic_cra_exit(tfm); + + if (ctx->fallback_cipher) { + crypto_free_aead(ctx->fallback_cipher); + ctx->fallback_cipher = NULL; + } +} + +/** + * spu_functions_register() - Specify hardware-specific SPU functions based on + * SPU type read from device tree. + * @dev: device structure + * @spu_type: SPU hardware generation + * @spu_subtype: SPU hardware version + */ +static void spu_functions_register(struct device *dev, + enum spu_spu_type spu_type, + enum spu_spu_subtype spu_subtype) +{ + struct spu_hw *spu = &iproc_priv.spu; + + if (spu_type == SPU_TYPE_SPUM) { + dev_dbg(dev, "Registering SPUM functions"); + spu->spu_dump_msg_hdr = spum_dump_msg_hdr; + spu->spu_payload_length = spum_payload_length; + spu->spu_response_hdr_len = spum_response_hdr_len; + spu->spu_hash_pad_len = spum_hash_pad_len; + spu->spu_gcm_ccm_pad_len = spum_gcm_ccm_pad_len; + spu->spu_assoc_resp_len = spum_assoc_resp_len; + spu->spu_aead_ivlen = spum_aead_ivlen; + spu->spu_hash_type = spum_hash_type; + spu->spu_digest_size = spum_digest_size; + spu->spu_create_request = spum_create_request; + spu->spu_cipher_req_init = spum_cipher_req_init; + spu->spu_cipher_req_finish = spum_cipher_req_finish; + spu->spu_request_pad = spum_request_pad; + spu->spu_tx_status_len = spum_tx_status_len; + spu->spu_rx_status_len = spum_rx_status_len; + spu->spu_status_process = spum_status_process; + spu->spu_xts_tweak_in_payload = spum_xts_tweak_in_payload; + spu->spu_ccm_update_iv = spum_ccm_update_iv; + spu->spu_wordalign_padlen = spum_wordalign_padlen; + if (spu_subtype == SPU_SUBTYPE_SPUM_NS2) + spu->spu_ctx_max_payload = spum_ns2_ctx_max_payload; + else + spu->spu_ctx_max_payload = spum_nsp_ctx_max_payload; + } else { + dev_dbg(dev, "Registering SPU2 functions"); + spu->spu_dump_msg_hdr = spu2_dump_msg_hdr; + spu->spu_ctx_max_payload = spu2_ctx_max_payload; + spu->spu_payload_length = spu2_payload_length; + spu->spu_response_hdr_len = spu2_response_hdr_len; + spu->spu_hash_pad_len = spu2_hash_pad_len; + spu->spu_gcm_ccm_pad_len = spu2_gcm_ccm_pad_len; + spu->spu_assoc_resp_len = spu2_assoc_resp_len; + spu->spu_aead_ivlen = spu2_aead_ivlen; + spu->spu_hash_type = spu2_hash_type; + spu->spu_digest_size = spu2_digest_size; + spu->spu_create_request = spu2_create_request; + spu->spu_cipher_req_init = spu2_cipher_req_init; + spu->spu_cipher_req_finish = spu2_cipher_req_finish; + spu->spu_request_pad = spu2_request_pad; + spu->spu_tx_status_len = spu2_tx_status_len; + spu->spu_rx_status_len = spu2_rx_status_len; + spu->spu_status_process = spu2_status_process; + spu->spu_xts_tweak_in_payload = spu2_xts_tweak_in_payload; + spu->spu_ccm_update_iv = spu2_ccm_update_iv; + spu->spu_wordalign_padlen = spu2_wordalign_padlen; + } +} + +/** + * spu_mb_init() - Initialize mailbox client. Request ownership of a mailbox + * channel for the SPU being probed. + * @dev: SPU driver device structure + * + * Return: 0 if successful + * < 0 otherwise + */ +static int spu_mb_init(struct device *dev) +{ + struct mbox_client *mcl = &iproc_priv.mcl[iproc_priv.spu.num_spu]; + int err; + + mcl->dev = dev; + mcl->tx_block = false; + mcl->tx_tout = 0; + mcl->knows_txdone = false; + mcl->rx_callback = spu_rx_callback; + mcl->tx_done = NULL; + + iproc_priv.mbox[iproc_priv.spu.num_spu] = + mbox_request_channel(mcl, 0); + if (IS_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu])) { + err = (int)PTR_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu]); + dev_err(dev, + "Mbox channel %d request failed with err %d", + iproc_priv.spu.num_spu, err); + iproc_priv.mbox[iproc_priv.spu.num_spu] = NULL; + return err; + } + + return 0; +} + +static void spu_mb_release(struct platform_device *pdev) +{ + int i; + + for (i = 0; i < iproc_priv.spu.num_spu; i++) + mbox_free_channel(iproc_priv.mbox[i]); +} + +static void spu_counters_init(void) +{ + int i; + int j; + + atomic_set(&iproc_priv.session_count, 0); + atomic_set(&iproc_priv.stream_count, 0); + atomic_set(&iproc_priv.next_chan, (int)iproc_priv.spu.num_spu); + atomic64_set(&iproc_priv.bytes_in, 0); + atomic64_set(&iproc_priv.bytes_out, 0); + for (i = 0; i < SPU_OP_NUM; i++) { + atomic_set(&iproc_priv.op_counts[i], 0); + atomic_set(&iproc_priv.setkey_cnt[i], 0); + } + for (i = 0; i < CIPHER_ALG_LAST; i++) + for (j = 0; j < CIPHER_MODE_LAST; j++) + atomic_set(&iproc_priv.cipher_cnt[i][j], 0); + + for (i = 0; i < HASH_ALG_LAST; i++) { + atomic_set(&iproc_priv.hash_cnt[i], 0); + atomic_set(&iproc_priv.hmac_cnt[i], 0); + } + for (i = 0; i < AEAD_TYPE_LAST; i++) + atomic_set(&iproc_priv.aead_cnt[i], 0); + + atomic_set(&iproc_priv.mb_no_spc, 0); + atomic_set(&iproc_priv.mb_send_fail, 0); + atomic_set(&iproc_priv.bad_icv, 0); +} + +static int spu_register_ablkcipher(struct iproc_alg_s *driver_alg) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct crypto_alg *crypto = &driver_alg->alg.crypto; + int err; + + /* SPU2 does not support RC4 */ + if ((driver_alg->cipher_info.alg == CIPHER_ALG_RC4) && + (spu->spu_type == SPU_TYPE_SPU2)) + return 0; + + crypto->cra_module = THIS_MODULE; + crypto->cra_priority = cipher_pri; + crypto->cra_alignmask = 0; + crypto->cra_ctxsize = sizeof(struct iproc_ctx_s); + INIT_LIST_HEAD(&crypto->cra_list); + + crypto->cra_init = ablkcipher_cra_init; + crypto->cra_exit = generic_cra_exit; + crypto->cra_type = &crypto_ablkcipher_type; + crypto->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY; + + crypto->cra_ablkcipher.setkey = ablkcipher_setkey; + crypto->cra_ablkcipher.encrypt = ablkcipher_encrypt; + crypto->cra_ablkcipher.decrypt = ablkcipher_decrypt; + + err = crypto_register_alg(crypto); + /* Mark alg as having been registered, if successful */ + if (err == 0) + driver_alg->registered = true; + pr_debug(" registered ablkcipher %s\n", crypto->cra_driver_name); + return err; +} + +static int spu_register_ahash(struct iproc_alg_s *driver_alg) +{ + struct spu_hw *spu = &iproc_priv.spu; + struct ahash_alg *hash = &driver_alg->alg.hash; + int err; + + /* AES-XCBC is the only AES hash type currently supported on SPU-M */ + if ((driver_alg->auth_info.alg == HASH_ALG_AES) && + (driver_alg->auth_info.mode != HASH_MODE_XCBC) && + (spu->spu_type == SPU_TYPE_SPUM)) + return 0; + + /* SHA3 algorithm variants are not registered for SPU-M or SPU2. */ + if ((driver_alg->auth_info.alg >= HASH_ALG_SHA3_224) && + (spu->spu_subtype != SPU_SUBTYPE_SPU2_V2)) + return 0; + + hash->halg.base.cra_module = THIS_MODULE; + hash->halg.base.cra_priority = hash_pri; + hash->halg.base.cra_alignmask = 0; + hash->halg.base.cra_ctxsize = sizeof(struct iproc_ctx_s); + hash->halg.base.cra_init = ahash_cra_init; + hash->halg.base.cra_exit = generic_cra_exit; + hash->halg.base.cra_type = &crypto_ahash_type; + hash->halg.base.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC; + hash->halg.statesize = sizeof(struct spu_hash_export_s); + + if (driver_alg->auth_info.mode != HASH_MODE_HMAC) { + hash->setkey = ahash_setkey; + hash->init = ahash_init; + hash->update = ahash_update; + hash->final = ahash_final; + hash->finup = ahash_finup; + hash->digest = ahash_digest; + } else { + hash->setkey = ahash_hmac_setkey; + hash->init = ahash_hmac_init; + hash->update = ahash_hmac_update; + hash->final = ahash_hmac_final; + hash->finup = ahash_hmac_finup; + hash->digest = ahash_hmac_digest; + } + hash->export = ahash_export; + hash->import = ahash_import; + + err = crypto_register_ahash(hash); + /* Mark alg as having been registered, if successful */ + if (err == 0) + driver_alg->registered = true; + pr_debug(" registered ahash %s\n", + hash->halg.base.cra_driver_name); + return err; +} + +static int spu_register_aead(struct iproc_alg_s *driver_alg) +{ + struct aead_alg *aead = &driver_alg->alg.aead; + int err; + + aead->base.cra_module = THIS_MODULE; + aead->base.cra_priority = aead_pri; + aead->base.cra_alignmask = 0; + aead->base.cra_ctxsize = sizeof(struct iproc_ctx_s); + INIT_LIST_HEAD(&aead->base.cra_list); + + aead->base.cra_flags |= CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC; + /* setkey set in alg initialization */ + aead->setauthsize = aead_setauthsize; + aead->encrypt = aead_encrypt; + aead->decrypt = aead_decrypt; + aead->init = aead_cra_init; + aead->exit = aead_cra_exit; + + err = crypto_register_aead(aead); + /* Mark alg as having been registered, if successful */ + if (err == 0) + driver_alg->registered = true; + pr_debug(" registered aead %s\n", aead->base.cra_driver_name); + return err; +} + +/* register crypto algorithms the device supports */ +static int spu_algs_register(struct device *dev) +{ + int i, j; + int err; + + for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { + switch (driver_algs[i].type) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + err = spu_register_ablkcipher(&driver_algs[i]); + break; + case CRYPTO_ALG_TYPE_AHASH: + err = spu_register_ahash(&driver_algs[i]); + break; + case CRYPTO_ALG_TYPE_AEAD: + err = spu_register_aead(&driver_algs[i]); + break; + default: + dev_err(dev, + "iproc-crypto: unknown alg type: %d", + driver_algs[i].type); + err = -EINVAL; + } + + if (err) { + dev_err(dev, "alg registration failed with error %d\n", + err); + goto err_algs; + } + } + + return 0; + +err_algs: + for (j = 0; j < i; j++) { + /* Skip any algorithm not registered */ + if (!driver_algs[j].registered) + continue; + switch (driver_algs[j].type) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + crypto_unregister_alg(&driver_algs[j].alg.crypto); + driver_algs[j].registered = false; + break; + case CRYPTO_ALG_TYPE_AHASH: + crypto_unregister_ahash(&driver_algs[j].alg.hash); + driver_algs[j].registered = false; + break; + case CRYPTO_ALG_TYPE_AEAD: + crypto_unregister_aead(&driver_algs[j].alg.aead); + driver_algs[j].registered = false; + break; + } + } + return err; +} + +/* ==================== Kernel Platform API ==================== */ + +static struct spu_type_subtype spum_ns2_types = { + SPU_TYPE_SPUM, SPU_SUBTYPE_SPUM_NS2 +}; + +static struct spu_type_subtype spum_nsp_types = { + SPU_TYPE_SPUM, SPU_SUBTYPE_SPUM_NSP +}; + +static struct spu_type_subtype spu2_types = { + SPU_TYPE_SPU2, SPU_SUBTYPE_SPU2_V1 +}; + +static struct spu_type_subtype spu2_v2_types = { + SPU_TYPE_SPU2, SPU_SUBTYPE_SPU2_V2 +}; + +static const struct of_device_id bcm_spu_dt_ids[] = { + { + .compatible = "brcm,spum-crypto", + .data = &spum_ns2_types, + }, + { + .compatible = "brcm,spum-nsp-crypto", + .data = &spum_nsp_types, + }, + { + .compatible = "brcm,spu2-crypto", + .data = &spu2_types, + }, + { + .compatible = "brcm,spu2-v2-crypto", + .data = &spu2_v2_types, + }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(of, bcm_spu_dt_ids); + +static int spu_dt_read(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct spu_hw *spu = &iproc_priv.spu; + struct resource *spu_ctrl_regs; + const struct of_device_id *match; + const struct spu_type_subtype *matched_spu_type; + void __iomem *spu_reg_vbase[MAX_SPUS]; + int err; + + match = of_match_device(of_match_ptr(bcm_spu_dt_ids), dev); + matched_spu_type = match->data; + + if (iproc_priv.spu.num_spu > 1) { + /* If this is 2nd or later SPU, make sure it's same type */ + if ((spu->spu_type != matched_spu_type->type) || + (spu->spu_subtype != matched_spu_type->subtype)) { + err = -EINVAL; + dev_err(&pdev->dev, "Multiple SPU types not allowed"); + return err; + } + } else { + /* Record type of first SPU */ + spu->spu_type = matched_spu_type->type; + spu->spu_subtype = matched_spu_type->subtype; + } + + /* Get and map SPU registers */ + spu_ctrl_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!spu_ctrl_regs) { + err = -EINVAL; + dev_err(&pdev->dev, "Invalid/missing registers for SPU\n"); + return err; + } + + spu_reg_vbase[iproc_priv.spu.num_spu] = + devm_ioremap_resource(dev, spu_ctrl_regs); + if (IS_ERR(spu_reg_vbase[iproc_priv.spu.num_spu])) { + err = PTR_ERR(spu_reg_vbase[iproc_priv.spu.num_spu]); + dev_err(&pdev->dev, "Failed to map registers: %d\n", + err); + spu_reg_vbase[iproc_priv.spu.num_spu] = NULL; + return err; + } + + dev_dbg(dev, "SPU %d detected.", iproc_priv.spu.num_spu); + + spu->reg_vbase[iproc_priv.spu.num_spu] = spu_reg_vbase; + + return 0; +} + +int bcm_spu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct spu_hw *spu = &iproc_priv.spu; + int err = 0; + + iproc_priv.pdev[iproc_priv.spu.num_spu] = pdev; + platform_set_drvdata(iproc_priv.pdev[iproc_priv.spu.num_spu], + &iproc_priv); + + err = spu_dt_read(pdev); + if (err < 0) + goto failure; + + err = spu_mb_init(&pdev->dev); + if (err < 0) + goto failure; + + iproc_priv.spu.num_spu++; + + /* If already initialized, we've just added another SPU and are done */ + if (iproc_priv.inited) + return 0; + + if (spu->spu_type == SPU_TYPE_SPUM) + iproc_priv.bcm_hdr_len = 8; + else if (spu->spu_type == SPU_TYPE_SPU2) + iproc_priv.bcm_hdr_len = 0; + + spu_functions_register(&pdev->dev, spu->spu_type, spu->spu_subtype); + + spu_counters_init(); + + spu_setup_debugfs(); + + err = spu_algs_register(dev); + if (err < 0) + goto fail_reg; + + iproc_priv.inited = true; + + return 0; + +fail_reg: + spu_free_debugfs(); +failure: + spu_mb_release(pdev); + dev_err(dev, "%s failed with error %d.\n", __func__, err); + + return err; +} + +int bcm_spu_remove(struct platform_device *pdev) +{ + int i; + struct device *dev = &pdev->dev; + char *cdn; + + for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { + /* + * Not all algorithms were registered, depending on whether + * hardware is SPU or SPU2. So here we make sure to skip + * those algorithms that were not previously registered. + */ + if (!driver_algs[i].registered) + continue; + + switch (driver_algs[i].type) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + crypto_unregister_alg(&driver_algs[i].alg.crypto); + dev_dbg(dev, " unregistered cipher %s\n", + driver_algs[i].alg.crypto.cra_driver_name); + driver_algs[i].registered = false; + break; + case CRYPTO_ALG_TYPE_AHASH: + crypto_unregister_ahash(&driver_algs[i].alg.hash); + cdn = driver_algs[i].alg.hash.halg.base.cra_driver_name; + dev_dbg(dev, " unregistered hash %s\n", cdn); + driver_algs[i].registered = false; + break; + case CRYPTO_ALG_TYPE_AEAD: + crypto_unregister_aead(&driver_algs[i].alg.aead); + dev_dbg(dev, " unregistered aead %s\n", + driver_algs[i].alg.aead.base.cra_driver_name); + driver_algs[i].registered = false; + break; + } + } + spu_free_debugfs(); + spu_mb_release(pdev); + return 0; +} + +/* ===== Kernel Module API ===== */ + +static struct platform_driver bcm_spu_pdriver = { + .driver = { + .name = "brcm-spu-crypto", + .of_match_table = of_match_ptr(bcm_spu_dt_ids), + }, + .probe = bcm_spu_probe, + .remove = bcm_spu_remove, +}; +module_platform_driver(bcm_spu_pdriver); + +MODULE_AUTHOR("Rob Rice "); +MODULE_DESCRIPTION("Broadcom symmetric crypto offload driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/bcm/cipher.h b/drivers/crypto/bcm/cipher.h new file mode 100644 index 000000000000..51dca529ce8f --- /dev/null +++ b/drivers/crypto/bcm/cipher.h @@ -0,0 +1,483 @@ +/* + * Copyright 2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation (the "GPL"). + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 (GPLv2) for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 (GPLv2) along with this source code. + */ + +#ifndef _CIPHER_H +#define _CIPHER_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spu.h" +#include "spum.h" +#include "spu2.h" + +/* Driver supports up to MAX_SPUS SPU blocks */ +#define MAX_SPUS 16 + +#define ARC4_MIN_KEY_SIZE 1 +#define ARC4_MAX_KEY_SIZE 256 +#define ARC4_BLOCK_SIZE 1 +#define ARC4_STATE_SIZE 4 + +#define CCM_AES_IV_SIZE 16 +#define GCM_AES_IV_SIZE 12 +#define GCM_ESP_IV_SIZE 8 +#define CCM_ESP_IV_SIZE 8 +#define RFC4543_ICV_SIZE 16 + +#define MAX_KEY_SIZE ARC4_MAX_KEY_SIZE +#define MAX_IV_SIZE AES_BLOCK_SIZE +#define MAX_DIGEST_SIZE SHA3_512_DIGEST_SIZE +#define MAX_ASSOC_SIZE 512 + +/* size of salt value for AES-GCM-ESP and AES-CCM-ESP */ +#define GCM_ESP_SALT_SIZE 4 +#define CCM_ESP_SALT_SIZE 3 +#define MAX_SALT_SIZE GCM_ESP_SALT_SIZE +#define GCM_ESP_SALT_OFFSET 0 +#define CCM_ESP_SALT_OFFSET 1 + +#define GCM_ESP_DIGESTSIZE 16 + +#define MAX_HASH_BLOCK_SIZE SHA512_BLOCK_SIZE + +/* + * Maximum number of bytes from a non-final hash request that can be deferred + * until more data is available. With new crypto API framework, this + * can be no more than one block of data. + */ +#define HASH_CARRY_MAX MAX_HASH_BLOCK_SIZE + +/* Force at least 4-byte alignment of all SPU message fields */ +#define SPU_MSG_ALIGN 4 + +/* Number of times to resend mailbox message if mb queue is full */ +#define SPU_MB_RETRY_MAX 1000 + +/* op_counts[] indexes */ +enum op_type { + SPU_OP_CIPHER, + SPU_OP_HASH, + SPU_OP_HMAC, + SPU_OP_AEAD, + SPU_OP_NUM +}; + +enum spu_spu_type { + SPU_TYPE_SPUM, + SPU_TYPE_SPU2, +}; + +/* + * SPUM_NS2 and SPUM_NSP are the SPU-M block on Northstar 2 and Northstar Plus, + * respectively. + */ +enum spu_spu_subtype { + SPU_SUBTYPE_SPUM_NS2, + SPU_SUBTYPE_SPUM_NSP, + SPU_SUBTYPE_SPU2_V1, + SPU_SUBTYPE_SPU2_V2 +}; + +struct spu_type_subtype { + enum spu_spu_type type; + enum spu_spu_subtype subtype; +}; + +struct cipher_op { + enum spu_cipher_alg alg; + enum spu_cipher_mode mode; +}; + +struct auth_op { + enum hash_alg alg; + enum hash_mode mode; +}; + +struct iproc_alg_s { + u32 type; + union { + struct crypto_alg crypto; + struct ahash_alg hash; + struct aead_alg aead; + } alg; + struct cipher_op cipher_info; + struct auth_op auth_info; + bool auth_first; + bool registered; +}; + +/* + * Buffers for a SPU request/reply message pair. All part of one structure to + * allow a single alloc per request. + */ +struct spu_msg_buf { + /* Request message fragments */ + + /* + * SPU request message header. For SPU-M, holds MH, EMH, SCTX, BDESC, + * and BD header. For SPU2, holds FMD, OMD. + */ + u8 bcm_spu_req_hdr[ALIGN(SPU2_HEADER_ALLOC_LEN, SPU_MSG_ALIGN)]; + + /* IV or counter. Size to include salt. Also used for XTS tweek. */ + u8 iv_ctr[ALIGN(2 * AES_BLOCK_SIZE, SPU_MSG_ALIGN)]; + + /* Hash digest. request and response. */ + u8 digest[ALIGN(MAX_DIGEST_SIZE, SPU_MSG_ALIGN)]; + + /* SPU request message padding */ + u8 spu_req_pad[ALIGN(SPU_PAD_LEN_MAX, SPU_MSG_ALIGN)]; + + /* SPU-M request message STATUS field */ + u8 tx_stat[ALIGN(SPU_TX_STATUS_LEN, SPU_MSG_ALIGN)]; + + /* Response message fragments */ + + /* SPU response message header */ + u8 spu_resp_hdr[ALIGN(SPU2_HEADER_ALLOC_LEN, SPU_MSG_ALIGN)]; + + /* SPU response message STATUS field padding */ + u8 rx_stat_pad[ALIGN(SPU_STAT_PAD_MAX, SPU_MSG_ALIGN)]; + + /* SPU response message STATUS field */ + u8 rx_stat[ALIGN(SPU_RX_STATUS_LEN, SPU_MSG_ALIGN)]; + + union { + /* Buffers only used for ablkcipher */ + struct { + /* + * Field used for either SUPDT when RC4 is used + * -OR- tweak value when XTS/AES is used + */ + u8 supdt_tweak[ALIGN(SPU_SUPDT_LEN, SPU_MSG_ALIGN)]; + } c; + + /* Buffers only used for aead */ + struct { + /* SPU response pad for GCM data */ + u8 gcmpad[ALIGN(AES_BLOCK_SIZE, SPU_MSG_ALIGN)]; + + /* SPU request msg padding for GCM AAD */ + u8 req_aad_pad[ALIGN(SPU_PAD_LEN_MAX, SPU_MSG_ALIGN)]; + + /* SPU response data to be discarded */ + u8 resp_aad[ALIGN(MAX_ASSOC_SIZE + MAX_IV_SIZE, + SPU_MSG_ALIGN)]; + } a; + }; +}; + +struct iproc_ctx_s { + u8 enckey[MAX_KEY_SIZE + ARC4_STATE_SIZE]; + unsigned int enckeylen; + + u8 authkey[MAX_KEY_SIZE + ARC4_STATE_SIZE]; + unsigned int authkeylen; + + u8 salt[MAX_SALT_SIZE]; + unsigned int salt_len; + unsigned int salt_offset; + u8 iv[MAX_IV_SIZE]; + + unsigned int digestsize; + + struct iproc_alg_s *alg; + bool is_esp; + + struct cipher_op cipher; + enum spu_cipher_type cipher_type; + + struct auth_op auth; + bool auth_first; + + /* + * The maximum length in bytes of the payload in a SPU message for this + * context. For SPU-M, the payload is the combination of AAD and data. + * For SPU2, the payload is just data. A value of SPU_MAX_PAYLOAD_INF + * indicates that there is no limit to the length of the SPU message + * payload. + */ + unsigned int max_payload; + + struct crypto_aead *fallback_cipher; + + /* auth_type is determined during processing of request */ + + u8 ipad[MAX_HASH_BLOCK_SIZE]; + u8 opad[MAX_HASH_BLOCK_SIZE]; + + /* + * Buffer to hold SPU message header template. Template is created at + * setkey time for ablkcipher requests, since most of the fields in the + * header are known at that time. At request time, just fill in a few + * missing pieces related to length of data in the request and IVs, etc. + */ + u8 bcm_spu_req_hdr[ALIGN(SPU2_HEADER_ALLOC_LEN, SPU_MSG_ALIGN)]; + + /* Length of SPU request header */ + u16 spu_req_hdr_len; + + /* Expected length of SPU response header */ + u16 spu_resp_hdr_len; + + /* + * shash descriptor - needed to perform incremental hashing in + * in software, when hw doesn't support it. + */ + struct shash_desc *shash; + + bool is_rfc4543; /* RFC 4543 style of GMAC */ +}; + +/* state from iproc_reqctx_s necessary for hash state export/import */ +struct spu_hash_export_s { + unsigned int total_todo; + unsigned int total_sent; + u8 hash_carry[HASH_CARRY_MAX]; + unsigned int hash_carry_len; + u8 incr_hash[MAX_DIGEST_SIZE]; + bool is_sw_hmac; +}; + +struct iproc_reqctx_s { + /* general context */ + struct crypto_async_request *parent; + + /* only valid after enqueue() */ + struct iproc_ctx_s *ctx; + + u8 chan_idx; /* Mailbox channel to be used to submit this request */ + + /* total todo, rx'd, and sent for this request */ + unsigned int total_todo; + unsigned int total_received; /* only valid for ablkcipher */ + unsigned int total_sent; + + /* + * num bytes sent to hw from the src sg in this request. This can differ + * from total_sent for incremental hashing. total_sent includes previous + * init() and update() data. src_sent does not. + */ + unsigned int src_sent; + + /* + * For AEAD requests, start of associated data. This will typically + * point to the beginning of the src scatterlist from the request, + * since assoc data is at the beginning of the src scatterlist rather + * than in its own sg. + */ + struct scatterlist *assoc; + + /* + * scatterlist entry and offset to start of data for next chunk. Crypto + * API src scatterlist for AEAD starts with AAD, if present. For first + * chunk, src_sg is sg entry at beginning of input data (after AAD). + * src_skip begins at the offset in that sg entry where data begins. + */ + struct scatterlist *src_sg; + int src_nents; /* Number of src entries with data */ + u32 src_skip; /* bytes of current sg entry already used */ + + /* + * Same for destination. For AEAD, if there is AAD, output data must + * be written at offset following AAD. + */ + struct scatterlist *dst_sg; + int dst_nents; /* Number of dst entries with data */ + u32 dst_skip; /* bytes of current sg entry already written */ + + /* Mailbox message used to send this request to PDC driver */ + struct brcm_message mb_mssg; + + bool bd_suppress; /* suppress BD field in SPU response? */ + + /* cipher context */ + bool is_encrypt; + + /* + * CBC mode: IV. CTR mode: counter. Else empty. Used as a DMA + * buffer for AEAD requests. So allocate as DMAable memory. If IV + * concatenated with salt, includes the salt. + */ + u8 *iv_ctr; + /* Length of IV or counter, in bytes */ + unsigned int iv_ctr_len; + + /* + * Hash requests can be of any size, whether initial, update, or final. + * A non-final request must be submitted to the SPU as an integral + * number of blocks. This may leave data at the end of the request + * that is not a full block. Since the request is non-final, it cannot + * be padded. So, we write the remainder to this hash_carry buffer and + * hold it until the next request arrives. The carry data is then + * submitted at the beginning of the data in the next SPU msg. + * hash_carry_len is the number of bytes currently in hash_carry. These + * fields are only used for ahash requests. + */ + u8 hash_carry[HASH_CARRY_MAX]; + unsigned int hash_carry_len; + unsigned int is_final; /* is this the final for the hash op? */ + + /* + * Digest from incremental hash is saved here to include in next hash + * operation. Cannot be stored in req->result for truncated hashes, + * since result may be sized for final digest. Cannot be saved in + * msg_buf because that gets deleted between incremental hash ops + * and is not saved as part of export(). + */ + u8 incr_hash[MAX_DIGEST_SIZE]; + + /* hmac context */ + bool is_sw_hmac; + + /* aead context */ + struct crypto_tfm *old_tfm; + crypto_completion_t old_complete; + void *old_data; + + gfp_t gfp; + + /* Buffers used to build SPU request and response messages */ + struct spu_msg_buf msg_buf; +}; + +/* + * Structure encapsulates a set of function pointers specific to the type of + * SPU hardware running. These functions handling creation and parsing of + * SPU request messages and SPU response messages. Includes hardware-specific + * values read from device tree. + */ +struct spu_hw { + void (*spu_dump_msg_hdr)(u8 *buf, unsigned int buf_len); + u32 (*spu_ctx_max_payload)(enum spu_cipher_alg cipher_alg, + enum spu_cipher_mode cipher_mode, + unsigned int blocksize); + u32 (*spu_payload_length)(u8 *spu_hdr); + u16 (*spu_response_hdr_len)(u16 auth_key_len, u16 enc_key_len, + bool is_hash); + u16 (*spu_hash_pad_len)(enum hash_alg hash_alg, + enum hash_mode hash_mode, u32 chunksize, + u16 hash_block_size); + u32 (*spu_gcm_ccm_pad_len)(enum spu_cipher_mode cipher_mode, + unsigned int data_size); + u32 (*spu_assoc_resp_len)(enum spu_cipher_mode cipher_mode, + unsigned int assoc_len, + unsigned int iv_len, bool is_encrypt); + u8 (*spu_aead_ivlen)(enum spu_cipher_mode cipher_mode, + u16 iv_len); + enum hash_type (*spu_hash_type)(u32 src_sent); + u32 (*spu_digest_size)(u32 digest_size, enum hash_alg alg, + enum hash_type); + u32 (*spu_create_request)(u8 *spu_hdr, + struct spu_request_opts *req_opts, + struct spu_cipher_parms *cipher_parms, + struct spu_hash_parms *hash_parms, + struct spu_aead_parms *aead_parms, + unsigned int data_size); + u16 (*spu_cipher_req_init)(u8 *spu_hdr, + struct spu_cipher_parms *cipher_parms); + void (*spu_cipher_req_finish)(u8 *spu_hdr, + u16 spu_req_hdr_len, + unsigned int is_inbound, + struct spu_cipher_parms *cipher_parms, + bool update_key, + unsigned int data_size); + void (*spu_request_pad)(u8 *pad_start, u32 gcm_padding, + u32 hash_pad_len, enum hash_alg auth_alg, + enum hash_mode auth_mode, + unsigned int total_sent, u32 status_padding); + u8 (*spu_xts_tweak_in_payload)(void); + u8 (*spu_tx_status_len)(void); + u8 (*spu_rx_status_len)(void); + int (*spu_status_process)(u8 *statp); + void (*spu_ccm_update_iv)(unsigned int digestsize, + struct spu_cipher_parms *cipher_parms, + unsigned int assoclen, unsigned int chunksize, + bool is_encrypt, bool is_esp); + u32 (*spu_wordalign_padlen)(u32 data_size); + + /* The base virtual address of the SPU hw registers */ + void __iomem *reg_vbase[MAX_SPUS]; + + /* Version of the SPU hardware */ + enum spu_spu_type spu_type; + + /* Sub-version of the SPU hardware */ + enum spu_spu_subtype spu_subtype; + + /* The number of SPUs on this platform */ + u32 num_spu; +}; + +struct device_private { + struct platform_device *pdev[MAX_SPUS]; + + struct spu_hw spu; + + atomic_t session_count; /* number of streams active */ + atomic_t stream_count; /* monotonic counter for streamID's */ + + /* Length of BCM header. Set to 0 when hw does not expect BCM HEADER. */ + u8 bcm_hdr_len; + + /* The index of the channel to use for the next crypto request */ + atomic_t next_chan; + + struct dentry *debugfs_dir; + struct dentry *debugfs_stats; + + /* Number of request bytes processed and result bytes returned */ + atomic64_t bytes_in; + atomic64_t bytes_out; + + /* Number of operations of each type */ + atomic_t op_counts[SPU_OP_NUM]; + + atomic_t cipher_cnt[CIPHER_ALG_LAST][CIPHER_MODE_LAST]; + atomic_t hash_cnt[HASH_ALG_LAST]; + atomic_t hmac_cnt[HASH_ALG_LAST]; + atomic_t aead_cnt[AEAD_TYPE_LAST]; + + /* Number of calls to setkey() for each operation type */ + atomic_t setkey_cnt[SPU_OP_NUM]; + + /* Number of times request was resubmitted because mb was full */ + atomic_t mb_no_spc; + + /* Number of mailbox send failures */ + atomic_t mb_send_fail; + + /* Number of ICV check failures for AEAD messages */ + atomic_t bad_icv; + + struct mbox_client mcl[MAX_SPUS]; + /* Array of mailbox channel pointers, one for each channel */ + struct mbox_chan *mbox[MAX_SPUS]; + + /* Driver initialized */ + bool inited; +}; + +extern struct device_private iproc_priv; + +#endif diff --git a/drivers/crypto/bcm/spu.c b/drivers/crypto/bcm/spu.c new file mode 100644 index 000000000000..dbb5c03dde49 --- /dev/null +++ b/drivers/crypto/bcm/spu.c @@ -0,0 +1,1251 @@ +/* + * Copyright 2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation (the "GPL"). + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 (GPLv2) for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 (GPLv2) along with this source code. + */ + +#include +#include + +#include "util.h" +#include "spu.h" +#include "spum.h" +#include "cipher.h" + +/* This array is based on the hash algo type supported in spu.h */ +char *tag_to_hash_idx[] = { "none", "md5", "sha1", "sha224", "sha256" }; + +char *hash_alg_name[] = { "None", "md5", "sha1", "sha224", "sha256", "aes", + "sha384", "sha512", "sha3_224", "sha3_256", "sha3_384", "sha3_512" }; + +char *aead_alg_name[] = { "ccm(aes)", "gcm(aes)", "authenc" }; + +/* Assumes SPU-M messages are in big endian */ +void spum_dump_msg_hdr(u8 *buf, unsigned int buf_len) +{ + u8 *ptr = buf; + struct SPUHEADER *spuh = (struct SPUHEADER *)buf; + unsigned int hash_key_len = 0; + unsigned int hash_state_len = 0; + unsigned int cipher_key_len = 0; + unsigned int iv_len; + u32 pflags; + u32 cflags; + u32 ecf; + u32 cipher_alg; + u32 cipher_mode; + u32 cipher_type; + u32 hash_alg; + u32 hash_mode; + u32 hash_type; + u32 sctx_size; /* SCTX length in words */ + u32 sctx_pl_len; /* SCTX payload length in bytes */ + + packet_log("\n"); + packet_log("SPU Message header %p len: %u\n", buf, buf_len); + + /* ========== Decode MH ========== */ + packet_log(" MH 0x%08x\n", be32_to_cpu(*((u32 *)ptr))); + if (spuh->mh.flags & MH_SCTX_PRES) + packet_log(" SCTX present\n"); + if (spuh->mh.flags & MH_BDESC_PRES) + packet_log(" BDESC present\n"); + if (spuh->mh.flags & MH_MFM_PRES) + packet_log(" MFM present\n"); + if (spuh->mh.flags & MH_BD_PRES) + packet_log(" BD present\n"); + if (spuh->mh.flags & MH_HASH_PRES) + packet_log(" HASH present\n"); + if (spuh->mh.flags & MH_SUPDT_PRES) + packet_log(" SUPDT present\n"); + packet_log(" Opcode 0x%02x\n", spuh->mh.op_code); + + ptr += sizeof(spuh->mh) + sizeof(spuh->emh); /* skip emh. unused */ + + /* ========== Decode SCTX ========== */ + if (spuh->mh.flags & MH_SCTX_PRES) { + pflags = be32_to_cpu(spuh->sa.proto_flags); + packet_log(" SCTX[0] 0x%08x\n", pflags); + sctx_size = pflags & SCTX_SIZE; + packet_log(" Size %u words\n", sctx_size); + + cflags = be32_to_cpu(spuh->sa.cipher_flags); + packet_log(" SCTX[1] 0x%08x\n", cflags); + packet_log(" Inbound:%lu (1:decrypt/vrfy 0:encrypt/auth)\n", + (cflags & CIPHER_INBOUND) >> CIPHER_INBOUND_SHIFT); + packet_log(" Order:%lu (1:AuthFirst 0:EncFirst)\n", + (cflags & CIPHER_ORDER) >> CIPHER_ORDER_SHIFT); + packet_log(" ICV_IS_512:%lx\n", + (cflags & ICV_IS_512) >> ICV_IS_512_SHIFT); + cipher_alg = (cflags & CIPHER_ALG) >> CIPHER_ALG_SHIFT; + cipher_mode = (cflags & CIPHER_MODE) >> CIPHER_MODE_SHIFT; + cipher_type = (cflags & CIPHER_TYPE) >> CIPHER_TYPE_SHIFT; + packet_log(" Crypto Alg:%u Mode:%u Type:%u\n", + cipher_alg, cipher_mode, cipher_type); + hash_alg = (cflags & HASH_ALG) >> HASH_ALG_SHIFT; + hash_mode = (cflags & HASH_MODE) >> HASH_MODE_SHIFT; + hash_type = (cflags & HASH_TYPE) >> HASH_TYPE_SHIFT; + packet_log(" Hash Alg:%x Mode:%x Type:%x\n", + hash_alg, hash_mode, hash_type); + packet_log(" UPDT_Offset:%u\n", cflags & UPDT_OFST); + + ecf = be32_to_cpu(spuh->sa.ecf); + packet_log(" SCTX[2] 0x%08x\n", ecf); + packet_log(" WriteICV:%lu CheckICV:%lu ICV_SIZE:%u ", + (ecf & INSERT_ICV) >> INSERT_ICV_SHIFT, + (ecf & CHECK_ICV) >> CHECK_ICV_SHIFT, + (ecf & ICV_SIZE) >> ICV_SIZE_SHIFT); + packet_log("BD_SUPPRESS:%lu\n", + (ecf & BD_SUPPRESS) >> BD_SUPPRESS_SHIFT); + packet_log(" SCTX_IV:%lu ExplicitIV:%lu GenIV:%lu ", + (ecf & SCTX_IV) >> SCTX_IV_SHIFT, + (ecf & EXPLICIT_IV) >> EXPLICIT_IV_SHIFT, + (ecf & GEN_IV) >> GEN_IV_SHIFT); + packet_log("IV_OV_OFST:%lu EXP_IV_SIZE:%u\n", + (ecf & IV_OFFSET) >> IV_OFFSET_SHIFT, + ecf & EXP_IV_SIZE); + + ptr += sizeof(struct SCTX); + + if (hash_alg && hash_mode) { + char *name = "NONE"; + + switch (hash_alg) { + case HASH_ALG_MD5: + hash_key_len = 16; + name = "MD5"; + break; + case HASH_ALG_SHA1: + hash_key_len = 20; + name = "SHA1"; + break; + case HASH_ALG_SHA224: + hash_key_len = 28; + name = "SHA224"; + break; + case HASH_ALG_SHA256: + hash_key_len = 32; + name = "SHA256"; + break; + case HASH_ALG_SHA384: + hash_key_len = 48; + name = "SHA384"; + break; + case HASH_ALG_SHA512: + hash_key_len = 64; + name = "SHA512"; + break; + case HASH_ALG_AES: + hash_key_len = 0; + name = "AES"; + break; + case HASH_ALG_NONE: + break; + } + + packet_log(" Auth Key Type:%s Length:%u Bytes\n", + name, hash_key_len); + packet_dump(" KEY: ", ptr, hash_key_len); + ptr += hash_key_len; + } else if ((hash_alg == HASH_ALG_AES) && + (hash_mode == HASH_MODE_XCBC)) { + char *name = "NONE"; + + switch (cipher_type) { + case CIPHER_TYPE_AES128: + hash_key_len = 16; + name = "AES128-XCBC"; + break; + case CIPHER_TYPE_AES192: + hash_key_len = 24; + name = "AES192-XCBC"; + break; + case CIPHER_TYPE_AES256: + hash_key_len = 32; + name = "AES256-XCBC"; + break; + } + packet_log(" Auth Key Type:%s Length:%u Bytes\n", + name, hash_key_len); + packet_dump(" KEY: ", ptr, hash_key_len); + ptr += hash_key_len; + } + + if (hash_alg && (hash_mode == HASH_MODE_NONE) && + (hash_type == HASH_TYPE_UPDT)) { + char *name = "NONE"; + + switch (hash_alg) { + case HASH_ALG_MD5: + hash_state_len = 16; + name = "MD5"; + break; + case HASH_ALG_SHA1: + hash_state_len = 20; + name = "SHA1"; + break; + case HASH_ALG_SHA224: + hash_state_len = 32; + name = "SHA224"; + break; + case HASH_ALG_SHA256: + hash_state_len = 32; + name = "SHA256"; + break; + case HASH_ALG_SHA384: + hash_state_len = 48; + name = "SHA384"; + break; + case HASH_ALG_SHA512: + hash_state_len = 64; + name = "SHA512"; + break; + case HASH_ALG_AES: + hash_state_len = 0; + name = "AES"; + break; + case HASH_ALG_NONE: + break; + } + + packet_log(" Auth State Type:%s Length:%u Bytes\n", + name, hash_state_len); + packet_dump(" State: ", ptr, hash_state_len); + ptr += hash_state_len; + } + + if (cipher_alg) { + char *name = "NONE"; + + switch (cipher_alg) { + case CIPHER_ALG_DES: + cipher_key_len = 8; + name = "DES"; + break; + case CIPHER_ALG_3DES: + cipher_key_len = 24; + name = "3DES"; + break; + case CIPHER_ALG_RC4: + cipher_key_len = 260; + name = "ARC4"; + break; + case CIPHER_ALG_AES: + switch (cipher_type) { + case CIPHER_TYPE_AES128: + cipher_key_len = 16; + name = "AES128"; + break; + case CIPHER_TYPE_AES192: + cipher_key_len = 24; + name = "AES192"; + break; + case CIPHER_TYPE_AES256: + cipher_key_len = 32; + name = "AES256"; + break; + } + break; + case CIPHER_ALG_NONE: + break; + } + + packet_log(" Cipher Key Type:%s Length:%u Bytes\n", + name, cipher_key_len); + + /* XTS has two keys */ + if (cipher_mode == CIPHER_MODE_XTS) { + packet_dump(" KEY2: ", ptr, cipher_key_len); + ptr += cipher_key_len; + packet_dump(" KEY1: ", ptr, cipher_key_len); + ptr += cipher_key_len; + + cipher_key_len *= 2; + } else { + packet_dump(" KEY: ", ptr, cipher_key_len); + ptr += cipher_key_len; + } + + if (ecf & SCTX_IV) { + sctx_pl_len = sctx_size * sizeof(u32) - + sizeof(struct SCTX); + iv_len = sctx_pl_len - + (hash_key_len + hash_state_len + + cipher_key_len); + packet_log(" IV Length:%u Bytes\n", iv_len); + packet_dump(" IV: ", ptr, iv_len); + ptr += iv_len; + } + } + } + + /* ========== Decode BDESC ========== */ + if (spuh->mh.flags & MH_BDESC_PRES) { +#ifdef DEBUG + struct BDESC_HEADER *bdesc = (struct BDESC_HEADER *)ptr; +#endif + packet_log(" BDESC[0] 0x%08x\n", be32_to_cpu(*((u32 *)ptr))); + packet_log(" OffsetMAC:%u LengthMAC:%u\n", + be16_to_cpu(bdesc->offset_mac), + be16_to_cpu(bdesc->length_mac)); + ptr += sizeof(u32); + + packet_log(" BDESC[1] 0x%08x\n", be32_to_cpu(*((u32 *)ptr))); + packet_log(" OffsetCrypto:%u LengthCrypto:%u\n", + be16_to_cpu(bdesc->offset_crypto), + be16_to_cpu(bdesc->length_crypto)); + ptr += sizeof(u32); + + packet_log(" BDESC[2] 0x%08x\n", be32_to_cpu(*((u32 *)ptr))); + packet_log(" OffsetICV:%u OffsetIV:%u\n", + be16_to_cpu(bdesc->offset_icv), + be16_to_cpu(bdesc->offset_iv)); + ptr += sizeof(u32); + } + + /* ========== Decode BD ========== */ + if (spuh->mh.flags & MH_BD_PRES) { +#ifdef DEBUG + struct BD_HEADER *bd = (struct BD_HEADER *)ptr; +#endif + packet_log(" BD[0] 0x%08x\n", be32_to_cpu(*((u32 *)ptr))); + packet_log(" Size:%ubytes PrevLength:%u\n", + be16_to_cpu(bd->size), be16_to_cpu(bd->prev_length)); + ptr += 4; + } + + /* Double check sanity */ + if (buf + buf_len != ptr) { + packet_log(" Packet parsed incorrectly. "); + packet_log("buf:%p buf_len:%u buf+buf_len:%p ptr:%p\n", + buf, buf_len, buf + buf_len, ptr); + } + + packet_log("\n"); +} + +/** + * spum_ns2_ctx_max_payload() - Determine the max length of the payload for a + * SPU message for a given cipher and hash alg context. + * @cipher_alg: The cipher algorithm + * @cipher_mode: The cipher mode + * @blocksize: The size of a block of data for this algo + * + * The max payload must be a multiple of the blocksize so that if a request is + * too large to fit in a single SPU message, the request can be broken into + * max_payload sized chunks. Each chunk must be a multiple of blocksize. + * + * Return: Max payload length in bytes + */ +u32 spum_ns2_ctx_max_payload(enum spu_cipher_alg cipher_alg, + enum spu_cipher_mode cipher_mode, + unsigned int blocksize) +{ + u32 max_payload = SPUM_NS2_MAX_PAYLOAD; + u32 excess; + + /* In XTS on SPU-M, we'll need to insert tweak before input data */ + if (cipher_mode == CIPHER_MODE_XTS) + max_payload -= SPU_XTS_TWEAK_SIZE; + + excess = max_payload % blocksize; + + return max_payload - excess; +} + +/** + * spum_nsp_ctx_max_payload() - Determine the max length of the payload for a + * SPU message for a given cipher and hash alg context. + * @cipher_alg: The cipher algorithm + * @cipher_mode: The cipher mode + * @blocksize: The size of a block of data for this algo + * + * The max payload must be a multiple of the blocksize so that if a request is + * too large to fit in a single SPU message, the request can be broken into + * max_payload sized chunks. Each chunk must be a multiple of blocksize. + * + * Return: Max payload length in bytes + */ +u32 spum_nsp_ctx_max_payload(enum spu_cipher_alg cipher_alg, + enum spu_cipher_mode cipher_mode, + unsigned int blocksize) +{ + u32 max_payload = SPUM_NSP_MAX_PAYLOAD; + u32 excess; + + /* In XTS on SPU-M, we'll need to insert tweak before input data */ + if (cipher_mode == CIPHER_MODE_XTS) + max_payload -= SPU_XTS_TWEAK_SIZE; + + excess = max_payload % blocksize; + + return max_payload - excess; +} + +/** spum_payload_length() - Given a SPU-M message header, extract the payload + * length. + * @spu_hdr: Start of SPU header + * + * Assumes just MH, EMH, BD (no SCTX, BDESC. Works for response frames. + * + * Return: payload length in bytes + */ +u32 spum_payload_length(u8 *spu_hdr) +{ + struct BD_HEADER *bd; + u32 pl_len; + + /* Find BD header. skip MH, EMH */ + bd = (struct BD_HEADER *)(spu_hdr + 8); + pl_len = be16_to_cpu(bd->size); + + return pl_len; +} + +/** + * spum_response_hdr_len() - Given the length of the hash key and encryption + * key, determine the expected length of a SPU response header. + * @auth_key_len: authentication key length (bytes) + * @enc_key_len: encryption key length (bytes) + * @is_hash: true if response message is for a hash operation + * + * Return: length of SPU response header (bytes) + */ +u16 spum_response_hdr_len(u16 auth_key_len, u16 enc_key_len, bool is_hash) +{ + if (is_hash) + return SPU_HASH_RESP_HDR_LEN; + else + return SPU_RESP_HDR_LEN; +} + +/** + * spum_hash_pad_len() - Calculate the length of hash padding required to extend + * data to a full block size. + * @hash_alg: hash algorithm + * @hash_mode: hash mode + * @chunksize: length of data, in bytes + * @hash_block_size: size of a block of data for hash algorithm + * + * Reserve space for 1 byte (0x80) start of pad and the total length as u64 + * + * Return: length of hash pad in bytes + */ +u16 spum_hash_pad_len(enum hash_alg hash_alg, enum hash_mode hash_mode, + u32 chunksize, u16 hash_block_size) +{ + unsigned int length_len; + unsigned int used_space_last_block; + int hash_pad_len; + + /* AES-XCBC hash requires just padding to next block boundary */ + if ((hash_alg == HASH_ALG_AES) && (hash_mode == HASH_MODE_XCBC)) { + used_space_last_block = chunksize % hash_block_size; + hash_pad_len = hash_block_size - used_space_last_block; + if (hash_pad_len >= hash_block_size) + hash_pad_len -= hash_block_size; + return hash_pad_len; + } + + used_space_last_block = chunksize % hash_block_size + 1; + if ((hash_alg == HASH_ALG_SHA384) || (hash_alg == HASH_ALG_SHA512)) + length_len = 2 * sizeof(u64); + else + length_len = sizeof(u64); + + used_space_last_block += length_len; + hash_pad_len = hash_block_size - used_space_last_block; + if (hash_pad_len < 0) + hash_pad_len += hash_block_size; + + hash_pad_len += 1 + length_len; + return hash_pad_len; +} + +/** + * spum_gcm_ccm_pad_len() - Determine the required length of GCM or CCM padding. + * @cipher_mode: Algo type + * @data_size: Length of plaintext (bytes) + * + * @Return: Length of padding, in bytes + */ +u32 spum_gcm_ccm_pad_len(enum spu_cipher_mode cipher_mode, + unsigned int data_size) +{ + u32 pad_len = 0; + u32 m1 = SPU_GCM_CCM_ALIGN - 1; + + if ((cipher_mode == CIPHER_MODE_GCM) || + (cipher_mode == CIPHER_MODE_CCM)) + pad_len = ((data_size + m1) & ~m1) - data_size; + + return pad_len; +} + +/** + * spum_assoc_resp_len() - Determine the size of the receive buffer required to + * catch associated data. + * @cipher_mode: cipher mode + * @assoc_len: length of associated data (bytes) + * @iv_len: length of IV (bytes) + * @is_encrypt: true if encrypting. false if decrypting. + * + * Return: length of associated data in response message (bytes) + */ +u32 spum_assoc_resp_len(enum spu_cipher_mode cipher_mode, + unsigned int assoc_len, unsigned int iv_len, + bool is_encrypt) +{ + u32 buflen = 0; + u32 pad; + + if (assoc_len) + buflen = assoc_len; + + if (cipher_mode == CIPHER_MODE_GCM) { + /* AAD needs to be padded in responses too */ + pad = spum_gcm_ccm_pad_len(cipher_mode, buflen); + buflen += pad; + } + if (cipher_mode == CIPHER_MODE_CCM) { + /* + * AAD needs to be padded in responses too + * for CCM, len + 2 needs to be 128-bit aligned. + */ + pad = spum_gcm_ccm_pad_len(cipher_mode, buflen + 2); + buflen += pad; + } + + return buflen; +} + +/** + * spu_aead_ivlen() - Calculate the length of the AEAD IV to be included + * in a SPU request after the AAD and before the payload. + * @cipher_mode: cipher mode + * @iv_ctr_len: initialization vector length in bytes + * + * In Linux ~4.2 and later, the assoc_data sg includes the IV. So no need + * to include the IV as a separate field in the SPU request msg. + * + * Return: Length of AEAD IV in bytes + */ +u8 spum_aead_ivlen(enum spu_cipher_mode cipher_mode, u16 iv_len) +{ + return 0; +} + +/** + * spum_hash_type() - Determine the type of hash operation. + * @src_sent: The number of bytes in the current request that have already + * been sent to the SPU to be hashed. + * + * We do not use HASH_TYPE_FULL for requests that fit in a single SPU message. + * Using FULL causes failures (such as when the string to be hashed is empty). + * For similar reasons, we never use HASH_TYPE_FIN. Instead, submit messages + * as INIT or UPDT and do the hash padding in sw. + */ +enum hash_type spum_hash_type(u32 src_sent) +{ + return src_sent ? HASH_TYPE_UPDT : HASH_TYPE_INIT; +} + +/** + * spum_digest_size() - Determine the size of a hash digest to expect the SPU to + * return. + * alg_digest_size: Number of bytes in the final digest for the given algo + * alg: The hash algorithm + * htype: Type of hash operation (init, update, full, etc) + * + * When doing incremental hashing for an algorithm with a truncated hash + * (e.g., SHA224), the SPU returns the full digest so that it can be fed back as + * a partial result for the next chunk. + */ +u32 spum_digest_size(u32 alg_digest_size, enum hash_alg alg, + enum hash_type htype) +{ + u32 digestsize = alg_digest_size; + + /* SPU returns complete digest when doing incremental hash and truncated + * hash algo. + */ + if ((htype == HASH_TYPE_INIT) || (htype == HASH_TYPE_UPDT)) { + if (alg == HASH_ALG_SHA224) + digestsize = SHA256_DIGEST_SIZE; + else if (alg == HASH_ALG_SHA384) + digestsize = SHA512_DIGEST_SIZE; + } + return digestsize; +} + +/** + * spum_create_request() - Build a SPU request message header, up to and + * including the BD header. Construct the message starting at spu_hdr. Caller + * should allocate this buffer in DMA-able memory at least SPU_HEADER_ALLOC_LEN + * bytes long. + * @spu_hdr: Start of buffer where SPU request header is to be written + * @req_opts: SPU request message options + * @cipher_parms: Parameters related to cipher algorithm + * @hash_parms: Parameters related to hash algorithm + * @aead_parms: Parameters related to AEAD operation + * @data_size: Length of data to be encrypted or authenticated. If AEAD, does + * not include length of AAD. + + * Return: the length of the SPU header in bytes. 0 if an error occurs. + */ +u32 spum_create_request(u8 *spu_hdr, + struct spu_request_opts *req_opts, + struct spu_cipher_parms *cipher_parms, + struct spu_hash_parms *hash_parms, + struct spu_aead_parms *aead_parms, + unsigned int data_size) +{ + struct SPUHEADER *spuh; + struct BDESC_HEADER *bdesc; + struct BD_HEADER *bd; + + u8 *ptr; + u32 protocol_bits = 0; + u32 cipher_bits = 0; + u32 ecf_bits = 0; + u8 sctx_words = 0; + unsigned int buf_len = 0; + + /* size of the cipher payload */ + unsigned int cipher_len = hash_parms->prebuf_len + data_size + + hash_parms->pad_len; + + /* offset of prebuf or data from end of BD header */ + unsigned int cipher_offset = aead_parms->assoc_size + + aead_parms->iv_len + aead_parms->aad_pad_len; + + /* total size of the DB data (without STAT word padding) */ + unsigned int real_db_size = spu_real_db_size(aead_parms->assoc_size, + aead_parms->iv_len, + hash_parms->prebuf_len, + data_size, + aead_parms->aad_pad_len, + aead_parms->data_pad_len, + hash_parms->pad_len); + + unsigned int auth_offset = 0; + unsigned int offset_iv = 0; + + /* size/offset of the auth payload */ + unsigned int auth_len; + + auth_len = real_db_size; + + if (req_opts->is_aead && req_opts->is_inbound) + cipher_len -= hash_parms->digestsize; + + if (req_opts->is_aead && req_opts->is_inbound) + auth_len -= hash_parms->digestsize; + + if ((hash_parms->alg == HASH_ALG_AES) && + (hash_parms->mode == HASH_MODE_XCBC)) { + auth_len -= hash_parms->pad_len; + cipher_len -= hash_parms->pad_len; + } + + flow_log("%s()\n", __func__); + flow_log(" in:%u authFirst:%u\n", + req_opts->is_inbound, req_opts->auth_first); + flow_log(" %s. cipher alg:%u mode:%u type %u\n", + spu_alg_name(cipher_parms->alg, cipher_parms->mode), + cipher_parms->alg, cipher_parms->mode, cipher_parms->type); + flow_log(" key: %d\n", cipher_parms->key_len); + flow_dump(" key: ", cipher_parms->key_buf, cipher_parms->key_len); + flow_log(" iv: %d\n", cipher_parms->iv_len); + flow_dump(" iv: ", cipher_parms->iv_buf, cipher_parms->iv_len); + flow_log(" auth alg:%u mode:%u type %u\n", + hash_parms->alg, hash_parms->mode, hash_parms->type); + flow_log(" digestsize: %u\n", hash_parms->digestsize); + flow_log(" authkey: %d\n", hash_parms->key_len); + flow_dump(" authkey: ", hash_parms->key_buf, hash_parms->key_len); + flow_log(" assoc_size:%u\n", aead_parms->assoc_size); + flow_log(" prebuf_len:%u\n", hash_parms->prebuf_len); + flow_log(" data_size:%u\n", data_size); + flow_log(" hash_pad_len:%u\n", hash_parms->pad_len); + flow_log(" real_db_size:%u\n", real_db_size); + flow_log(" auth_offset:%u auth_len:%u cipher_offset:%u cipher_len:%u\n", + auth_offset, auth_len, cipher_offset, cipher_len); + flow_log(" aead_iv: %u\n", aead_parms->iv_len); + + /* starting out: zero the header (plus some) */ + ptr = spu_hdr; + memset(ptr, 0, sizeof(struct SPUHEADER)); + + /* format master header word */ + /* Do not set the next bit even though the datasheet says to */ + spuh = (struct SPUHEADER *)ptr; + ptr += sizeof(struct SPUHEADER); + buf_len += sizeof(struct SPUHEADER); + + spuh->mh.op_code = SPU_CRYPTO_OPERATION_GENERIC; + spuh->mh.flags |= (MH_SCTX_PRES | MH_BDESC_PRES | MH_BD_PRES); + + /* Format sctx word 0 (protocol_bits) */ + sctx_words = 3; /* size in words */ + + /* Format sctx word 1 (cipher_bits) */ + if (req_opts->is_inbound) + cipher_bits |= CIPHER_INBOUND; + if (req_opts->auth_first) + cipher_bits |= CIPHER_ORDER; + + /* Set the crypto parameters in the cipher.flags */ + cipher_bits |= cipher_parms->alg << CIPHER_ALG_SHIFT; + cipher_bits |= cipher_parms->mode << CIPHER_MODE_SHIFT; + cipher_bits |= cipher_parms->type << CIPHER_TYPE_SHIFT; + + /* Set the auth parameters in the cipher.flags */ + cipher_bits |= hash_parms->alg << HASH_ALG_SHIFT; + cipher_bits |= hash_parms->mode << HASH_MODE_SHIFT; + cipher_bits |= hash_parms->type << HASH_TYPE_SHIFT; + + /* + * Format sctx extensions if required, and update main fields if + * required) + */ + if (hash_parms->alg) { + /* Write the authentication key material if present */ + if (hash_parms->key_len) { + memcpy(ptr, hash_parms->key_buf, hash_parms->key_len); + ptr += hash_parms->key_len; + buf_len += hash_parms->key_len; + sctx_words += hash_parms->key_len / 4; + } + + if ((cipher_parms->mode == CIPHER_MODE_GCM) || + (cipher_parms->mode == CIPHER_MODE_CCM)) + /* unpadded length */ + offset_iv = aead_parms->assoc_size; + + /* if GCM/CCM we need to write ICV into the payload */ + if (!req_opts->is_inbound) { + if ((cipher_parms->mode == CIPHER_MODE_GCM) || + (cipher_parms->mode == CIPHER_MODE_CCM)) + ecf_bits |= 1 << INSERT_ICV_SHIFT; + } else { + ecf_bits |= CHECK_ICV; + } + + /* Inform the SPU of the ICV size (in words) */ + if (hash_parms->digestsize == 64) + cipher_bits |= ICV_IS_512; + else + ecf_bits |= + (hash_parms->digestsize / 4) << ICV_SIZE_SHIFT; + } + + if (req_opts->bd_suppress) + ecf_bits |= BD_SUPPRESS; + + /* copy the encryption keys in the SAD entry */ + if (cipher_parms->alg) { + if (cipher_parms->key_len) { + memcpy(ptr, cipher_parms->key_buf, + cipher_parms->key_len); + ptr += cipher_parms->key_len; + buf_len += cipher_parms->key_len; + sctx_words += cipher_parms->key_len / 4; + } + + /* + * if encrypting then set IV size, use SCTX IV unless no IV + * given here + */ + if (cipher_parms->iv_buf && cipher_parms->iv_len) { + /* Use SCTX IV */ + ecf_bits |= SCTX_IV; + + /* cipher iv provided so put it in here */ + memcpy(ptr, cipher_parms->iv_buf, cipher_parms->iv_len); + + ptr += cipher_parms->iv_len; + buf_len += cipher_parms->iv_len; + sctx_words += cipher_parms->iv_len / 4; + } + } + + /* + * RFC4543 (GMAC/ESP) requires data to be sent as part of AAD + * so we need to override the BDESC parameters. + */ + if (req_opts->is_rfc4543) { + if (req_opts->is_inbound) + data_size -= hash_parms->digestsize; + offset_iv = aead_parms->assoc_size + data_size; + cipher_len = 0; + cipher_offset = offset_iv; + auth_len = cipher_offset + aead_parms->data_pad_len; + } + + /* write in the total sctx length now that we know it */ + protocol_bits |= sctx_words; + + /* Endian adjust the SCTX */ + spuh->sa.proto_flags = cpu_to_be32(protocol_bits); + spuh->sa.cipher_flags = cpu_to_be32(cipher_bits); + spuh->sa.ecf = cpu_to_be32(ecf_bits); + + /* === create the BDESC section === */ + bdesc = (struct BDESC_HEADER *)ptr; + + bdesc->offset_mac = cpu_to_be16(auth_offset); + bdesc->length_mac = cpu_to_be16(auth_len); + bdesc->offset_crypto = cpu_to_be16(cipher_offset); + bdesc->length_crypto = cpu_to_be16(cipher_len); + + /* + * CCM in SPU-M requires that ICV not be in same 32-bit word as data or + * padding. So account for padding as necessary. + */ + if (cipher_parms->mode == CIPHER_MODE_CCM) + auth_len += spum_wordalign_padlen(auth_len); + + bdesc->offset_icv = cpu_to_be16(auth_len); + bdesc->offset_iv = cpu_to_be16(offset_iv); + + ptr += sizeof(struct BDESC_HEADER); + buf_len += sizeof(struct BDESC_HEADER); + + /* === no MFM section === */ + + /* === create the BD section === */ + + /* add the BD header */ + bd = (struct BD_HEADER *)ptr; + bd->size = cpu_to_be16(real_db_size); + bd->prev_length = 0; + + ptr += sizeof(struct BD_HEADER); + buf_len += sizeof(struct BD_HEADER); + + packet_dump(" SPU request header: ", spu_hdr, buf_len); + + return buf_len; +} + +/** + * spum_cipher_req_init() - Build a SPU request message header, up to and + * including the BD header. + * @spu_hdr: Start of SPU request header (MH) + * @cipher_parms: Parameters that describe the cipher request + * + * Construct the message starting at spu_hdr. Caller should allocate this buffer + * in DMA-able memory at least SPU_HEADER_ALLOC_LEN bytes long. + * + * Return: the length of the SPU header in bytes. 0 if an error occurs. + */ +u16 spum_cipher_req_init(u8 *spu_hdr, struct spu_cipher_parms *cipher_parms) +{ + struct SPUHEADER *spuh; + u32 protocol_bits = 0; + u32 cipher_bits = 0; + u32 ecf_bits = 0; + u8 sctx_words = 0; + u8 *ptr = spu_hdr; + + flow_log("%s()\n", __func__); + flow_log(" cipher alg:%u mode:%u type %u\n", cipher_parms->alg, + cipher_parms->mode, cipher_parms->type); + flow_log(" cipher_iv_len: %u\n", cipher_parms->iv_len); + flow_log(" key: %d\n", cipher_parms->key_len); + flow_dump(" key: ", cipher_parms->key_buf, cipher_parms->key_len); + + /* starting out: zero the header (plus some) */ + memset(spu_hdr, 0, sizeof(struct SPUHEADER)); + ptr += sizeof(struct SPUHEADER); + + /* format master header word */ + /* Do not set the next bit even though the datasheet says to */ + spuh = (struct SPUHEADER *)spu_hdr; + + spuh->mh.op_code = SPU_CRYPTO_OPERATION_GENERIC; + spuh->mh.flags |= (MH_SCTX_PRES | MH_BDESC_PRES | MH_BD_PRES); + + /* Format sctx word 0 (protocol_bits) */ + sctx_words = 3; /* size in words */ + + /* copy the encryption keys in the SAD entry */ + if (cipher_parms->alg) { + if (cipher_parms->key_len) { + ptr += cipher_parms->key_len; + sctx_words += cipher_parms->key_len / 4; + } + + /* + * if encrypting then set IV size, use SCTX IV unless no IV + * given here + */ + if (cipher_parms->iv_len) { + /* Use SCTX IV */ + ecf_bits |= SCTX_IV; + ptr += cipher_parms->iv_len; + sctx_words += cipher_parms->iv_len / 4; + } + } + + /* Set the crypto parameters in the cipher.flags */ + cipher_bits |= cipher_parms->alg << CIPHER_ALG_SHIFT; + cipher_bits |= cipher_parms->mode << CIPHER_MODE_SHIFT; + cipher_bits |= cipher_parms->type << CIPHER_TYPE_SHIFT; + + /* copy the encryption keys in the SAD entry */ + if (cipher_parms->alg && cipher_parms->key_len) + memcpy(spuh + 1, cipher_parms->key_buf, cipher_parms->key_len); + + /* write in the total sctx length now that we know it */ + protocol_bits |= sctx_words; + + /* Endian adjust the SCTX */ + spuh->sa.proto_flags = cpu_to_be32(protocol_bits); + + /* Endian adjust the SCTX */ + spuh->sa.cipher_flags = cpu_to_be32(cipher_bits); + spuh->sa.ecf = cpu_to_be32(ecf_bits); + + packet_dump(" SPU request header: ", spu_hdr, + sizeof(struct SPUHEADER)); + + return sizeof(struct SPUHEADER) + cipher_parms->key_len + + cipher_parms->iv_len + sizeof(struct BDESC_HEADER) + + sizeof(struct BD_HEADER); +} + +/** + * spum_cipher_req_finish() - Finish building a SPU request message header for a + * block cipher request. Assumes much of the header was already filled in at + * setkey() time in spu_cipher_req_init(). + * @spu_hdr: Start of the request message header (MH field) + * @spu_req_hdr_len: Length in bytes of the SPU request header + * @isInbound: 0 encrypt, 1 decrypt + * @cipher_parms: Parameters describing cipher operation to be performed + * @update_key: If true, rewrite the cipher key in SCTX + * @data_size: Length of the data in the BD field + * + * Assumes much of the header was already filled in at setkey() time in + * spum_cipher_req_init(). + * spum_cipher_req_init() fills in the encryption key. For RC4, when submitting + * a request for a non-first chunk, we use the 260-byte SUPDT field from the + * previous response as the key. update_key is true for this case. Unused in all + * other cases. + */ +void spum_cipher_req_finish(u8 *spu_hdr, + u16 spu_req_hdr_len, + unsigned int is_inbound, + struct spu_cipher_parms *cipher_parms, + bool update_key, + unsigned int data_size) +{ + struct SPUHEADER *spuh; + struct BDESC_HEADER *bdesc; + struct BD_HEADER *bd; + u8 *bdesc_ptr = spu_hdr + spu_req_hdr_len - + (sizeof(struct BD_HEADER) + sizeof(struct BDESC_HEADER)); + + u32 cipher_bits; + + flow_log("%s()\n", __func__); + flow_log(" in: %u\n", is_inbound); + flow_log(" cipher alg: %u, cipher_type: %u\n", cipher_parms->alg, + cipher_parms->type); + if (update_key) { + flow_log(" cipher key len: %u\n", cipher_parms->key_len); + flow_dump(" key: ", cipher_parms->key_buf, + cipher_parms->key_len); + } + + /* + * In XTS mode, API puts "i" parameter (block tweak) in IV. For + * SPU-M, should be in start of the BD; tx_sg_create() copies it there. + * IV in SPU msg for SPU-M should be 0, since that's the "j" parameter + * (block ctr within larger data unit) - given we can send entire disk + * block (<= 4KB) in 1 SPU msg, don't need to use this parameter. + */ + if (cipher_parms->mode == CIPHER_MODE_XTS) + memset(cipher_parms->iv_buf, 0, cipher_parms->iv_len); + + flow_log(" iv len: %d\n", cipher_parms->iv_len); + flow_dump(" iv: ", cipher_parms->iv_buf, cipher_parms->iv_len); + flow_log(" data_size: %u\n", data_size); + + /* format master header word */ + /* Do not set the next bit even though the datasheet says to */ + spuh = (struct SPUHEADER *)spu_hdr; + + /* cipher_bits was initialized at setkey time */ + cipher_bits = be32_to_cpu(spuh->sa.cipher_flags); + + /* Format sctx word 1 (cipher_bits) */ + if (is_inbound) + cipher_bits |= CIPHER_INBOUND; + else + cipher_bits &= ~CIPHER_INBOUND; + + /* update encryption key for RC4 on non-first chunk */ + if (update_key) { + spuh->sa.cipher_flags |= + cipher_parms->type << CIPHER_TYPE_SHIFT; + memcpy(spuh + 1, cipher_parms->key_buf, cipher_parms->key_len); + } + + if (cipher_parms->alg && cipher_parms->iv_buf && cipher_parms->iv_len) + /* cipher iv provided so put it in here */ + memcpy(bdesc_ptr - cipher_parms->iv_len, cipher_parms->iv_buf, + cipher_parms->iv_len); + + spuh->sa.cipher_flags = cpu_to_be32(cipher_bits); + + /* === create the BDESC section === */ + bdesc = (struct BDESC_HEADER *)bdesc_ptr; + bdesc->offset_mac = 0; + bdesc->length_mac = 0; + bdesc->offset_crypto = 0; + + /* XTS mode, data_size needs to include tweak parameter */ + if (cipher_parms->mode == CIPHER_MODE_XTS) + bdesc->length_crypto = cpu_to_be16(data_size + + SPU_XTS_TWEAK_SIZE); + else + bdesc->length_crypto = cpu_to_be16(data_size); + + bdesc->offset_icv = 0; + bdesc->offset_iv = 0; + + /* === no MFM section === */ + + /* === create the BD section === */ + /* add the BD header */ + bd = (struct BD_HEADER *)(bdesc_ptr + sizeof(struct BDESC_HEADER)); + bd->size = cpu_to_be16(data_size); + + /* XTS mode, data_size needs to include tweak parameter */ + if (cipher_parms->mode == CIPHER_MODE_XTS) + bd->size = cpu_to_be16(data_size + SPU_XTS_TWEAK_SIZE); + else + bd->size = cpu_to_be16(data_size); + + bd->prev_length = 0; + + packet_dump(" SPU request header: ", spu_hdr, spu_req_hdr_len); +} + +/** + * spum_request_pad() - Create pad bytes at the end of the data. + * @pad_start: Start of buffer where pad bytes are to be written + * @gcm_ccm_padding: length of GCM/CCM padding, in bytes + * @hash_pad_len: Number of bytes of padding extend data to full block + * @auth_alg: authentication algorithm + * @auth_mode: authentication mode + * @total_sent: length inserted at end of hash pad + * @status_padding: Number of bytes of padding to align STATUS word + * + * There may be three forms of pad: + * 1. GCM/CCM pad - for GCM/CCM mode ciphers, pad to 16-byte alignment + * 2. hash pad - pad to a block length, with 0x80 data terminator and + * size at the end + * 3. STAT pad - to ensure the STAT field is 4-byte aligned + */ +void spum_request_pad(u8 *pad_start, + u32 gcm_ccm_padding, + u32 hash_pad_len, + enum hash_alg auth_alg, + enum hash_mode auth_mode, + unsigned int total_sent, u32 status_padding) +{ + u8 *ptr = pad_start; + + /* fix data alignent for GCM/CCM */ + if (gcm_ccm_padding > 0) { + flow_log(" GCM: padding to 16 byte alignment: %u bytes\n", + gcm_ccm_padding); + memset(ptr, 0, gcm_ccm_padding); + ptr += gcm_ccm_padding; + } + + if (hash_pad_len > 0) { + /* clear the padding section */ + memset(ptr, 0, hash_pad_len); + + if ((auth_alg == HASH_ALG_AES) && + (auth_mode == HASH_MODE_XCBC)) { + /* AES/XCBC just requires padding to be 0s */ + ptr += hash_pad_len; + } else { + /* terminate the data */ + *ptr = 0x80; + ptr += (hash_pad_len - sizeof(u64)); + + /* add the size at the end as required per alg */ + if (auth_alg == HASH_ALG_MD5) + *(u64 *)ptr = cpu_to_le64((u64)total_sent * 8); + else /* SHA1, SHA2-224, SHA2-256 */ + *(u64 *)ptr = cpu_to_be64((u64)total_sent * 8); + ptr += sizeof(u64); + } + } + + /* pad to a 4byte alignment for STAT */ + if (status_padding > 0) { + flow_log(" STAT: padding to 4 byte alignment: %u bytes\n", + status_padding); + + memset(ptr, 0, status_padding); + ptr += status_padding; + } +} + +/** + * spum_xts_tweak_in_payload() - Indicate that SPUM DOES place the XTS tweak + * field in the packet payload (rather than using IV) + * + * Return: 1 + */ +u8 spum_xts_tweak_in_payload(void) +{ + return 1; +} + +/** + * spum_tx_status_len() - Return the length of the STATUS field in a SPU + * response message. + * + * Return: Length of STATUS field in bytes. + */ +u8 spum_tx_status_len(void) +{ + return SPU_TX_STATUS_LEN; +} + +/** + * spum_rx_status_len() - Return the length of the STATUS field in a SPU + * response message. + * + * Return: Length of STATUS field in bytes. + */ +u8 spum_rx_status_len(void) +{ + return SPU_RX_STATUS_LEN; +} + +/** + * spum_status_process() - Process the status from a SPU response message. + * @statp: start of STATUS word + * Return: + * 0 - if status is good and response should be processed + * !0 - status indicates an error and response is invalid + */ +int spum_status_process(u8 *statp) +{ + u32 status; + + status = __be32_to_cpu(*(__be32 *)statp); + flow_log("SPU response STATUS %#08x\n", status); + if (status & SPU_STATUS_ERROR_FLAG) { + pr_err("%s() Warning: Error result from SPU: %#08x\n", + __func__, status); + if (status & SPU_STATUS_INVALID_ICV) + return SPU_INVALID_ICV; + return -EBADMSG; + } + return 0; +} + +/** + * spum_ccm_update_iv() - Update the IV as per the requirements for CCM mode. + * + * @digestsize: Digest size of this request + * @cipher_parms: (pointer to) cipher parmaeters, includes IV buf & IV len + * @assoclen: Length of AAD data + * @chunksize: length of input data to be sent in this req + * @is_encrypt: true if this is an output/encrypt operation + * @is_esp: true if this is an ESP / RFC4309 operation + * + */ +void spum_ccm_update_iv(unsigned int digestsize, + struct spu_cipher_parms *cipher_parms, + unsigned int assoclen, + unsigned int chunksize, + bool is_encrypt, + bool is_esp) +{ + u8 L; /* L from CCM algorithm, length of plaintext data */ + u8 mprime; /* M' from CCM algo, (M - 2) / 2, where M=authsize */ + u8 adata; + + if (cipher_parms->iv_len != CCM_AES_IV_SIZE) { + pr_err("%s(): Invalid IV len %d for CCM mode, should be %d\n", + __func__, cipher_parms->iv_len, CCM_AES_IV_SIZE); + return; + } + + /* + * IV needs to be formatted as follows: + * + * | Byte 0 | Bytes 1 - N | Bytes (N+1) - 15 | + * | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | Bits 7 - 0 | Bits 7 - 0 | + * | 0 |Ad?|(M - 2) / 2| L - 1 | Nonce | Plaintext Length | + * + * Ad? = 1 if AAD present, 0 if not present + * M = size of auth field, 8, 12, or 16 bytes (SPU-M) -or- + * 4, 6, 8, 10, 12, 14, 16 bytes (SPU2) + * L = Size of Plaintext Length field; Nonce size = 15 - L + * + * It appears that the crypto API already expects the L-1 portion + * to be set in the first byte of the IV, which implicitly determines + * the nonce size, and also fills in the nonce. But the other bits + * in byte 0 as well as the plaintext length need to be filled in. + * + * In rfc4309/esp mode, L is not already in the supplied IV and + * we need to fill it in, as well as move the IV data to be after + * the salt + */ + if (is_esp) { + L = CCM_ESP_L_VALUE; /* RFC4309 has fixed L */ + } else { + /* L' = plaintext length - 1 so Plaintext length is L' + 1 */ + L = ((cipher_parms->iv_buf[0] & CCM_B0_L_PRIME) >> + CCM_B0_L_PRIME_SHIFT) + 1; + } + + mprime = (digestsize - 2) >> 1; /* M' = (M - 2) / 2 */ + adata = (assoclen > 0); /* adata = 1 if any associated data */ + + cipher_parms->iv_buf[0] = (adata << CCM_B0_ADATA_SHIFT) | + (mprime << CCM_B0_M_PRIME_SHIFT) | + ((L - 1) << CCM_B0_L_PRIME_SHIFT); + + /* Nonce is already filled in by crypto API, and is 15 - L bytes */ + + /* Don't include digest in plaintext size when decrypting */ + if (!is_encrypt) + chunksize -= digestsize; + + /* Fill in length of plaintext, formatted to be L bytes long */ + format_value_ccm(chunksize, &cipher_parms->iv_buf[15 - L + 1], L); +} + +/** + * spum_wordalign_padlen() - Given the length of a data field, determine the + * padding required to align the data following this field on a 4-byte boundary. + * @data_size: length of data field in bytes + * + * Return: length of status field padding, in bytes + */ +u32 spum_wordalign_padlen(u32 data_size) +{ + return ((data_size + 3) & ~3) - data_size; +} diff --git a/drivers/crypto/bcm/spu.h b/drivers/crypto/bcm/spu.h new file mode 100644 index 000000000000..aa6fc38db263 --- /dev/null +++ b/drivers/crypto/bcm/spu.h @@ -0,0 +1,287 @@ +/* + * Copyright 2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation (the "GPL"). + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 (GPLv2) for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 (GPLv2) along with this source code. + */ + +/* + * This file contains the definition of SPU messages. There are currently two + * SPU message formats: SPU-M and SPU2. The hardware uses different values to + * identify the same things in SPU-M vs SPU2. So this file defines values that + * are hardware independent. Software can use these values for any version of + * SPU hardware. These values are used in APIs in spu.c. Functions internal to + * spu.c and spu2.c convert these to hardware-specific values. + */ + +#ifndef _SPU_H +#define _SPU_H + +#include +#include +#include + +enum spu_cipher_alg { + CIPHER_ALG_NONE = 0x0, + CIPHER_ALG_RC4 = 0x1, + CIPHER_ALG_DES = 0x2, + CIPHER_ALG_3DES = 0x3, + CIPHER_ALG_AES = 0x4, + CIPHER_ALG_LAST = 0x5 +}; + +enum spu_cipher_mode { + CIPHER_MODE_NONE = 0x0, + CIPHER_MODE_ECB = 0x0, + CIPHER_MODE_CBC = 0x1, + CIPHER_MODE_OFB = 0x2, + CIPHER_MODE_CFB = 0x3, + CIPHER_MODE_CTR = 0x4, + CIPHER_MODE_CCM = 0x5, + CIPHER_MODE_GCM = 0x6, + CIPHER_MODE_XTS = 0x7, + CIPHER_MODE_LAST = 0x8 +}; + +enum spu_cipher_type { + CIPHER_TYPE_NONE = 0x0, + CIPHER_TYPE_DES = 0x0, + CIPHER_TYPE_3DES = 0x0, + CIPHER_TYPE_INIT = 0x0, /* used for ARC4 */ + CIPHER_TYPE_AES128 = 0x0, + CIPHER_TYPE_AES192 = 0x1, + CIPHER_TYPE_UPDT = 0x1, /* used for ARC4 */ + CIPHER_TYPE_AES256 = 0x2, +}; + +enum hash_alg { + HASH_ALG_NONE = 0x0, + HASH_ALG_MD5 = 0x1, + HASH_ALG_SHA1 = 0x2, + HASH_ALG_SHA224 = 0x3, + HASH_ALG_SHA256 = 0x4, + HASH_ALG_AES = 0x5, + HASH_ALG_SHA384 = 0x6, + HASH_ALG_SHA512 = 0x7, + /* Keep SHA3 algorithms at the end always */ + HASH_ALG_SHA3_224 = 0x8, + HASH_ALG_SHA3_256 = 0x9, + HASH_ALG_SHA3_384 = 0xa, + HASH_ALG_SHA3_512 = 0xb, + HASH_ALG_LAST +}; + +enum hash_mode { + HASH_MODE_NONE = 0x0, + HASH_MODE_HASH = 0x0, + HASH_MODE_XCBC = 0x0, + HASH_MODE_CMAC = 0x1, + HASH_MODE_CTXT = 0x1, + HASH_MODE_HMAC = 0x2, + HASH_MODE_RABIN = 0x4, + HASH_MODE_FHMAC = 0x6, + HASH_MODE_CCM = 0x5, + HASH_MODE_GCM = 0x6, +}; + +enum hash_type { + HASH_TYPE_NONE = 0x0, + HASH_TYPE_FULL = 0x0, + HASH_TYPE_INIT = 0x1, + HASH_TYPE_UPDT = 0x2, + HASH_TYPE_FIN = 0x3, + HASH_TYPE_AES128 = 0x0, + HASH_TYPE_AES192 = 0x1, + HASH_TYPE_AES256 = 0x2 +}; + +enum aead_type { + AES_CCM, + AES_GCM, + AUTHENC, + AEAD_TYPE_LAST +}; + +extern char *hash_alg_name[HASH_ALG_LAST]; +extern char *aead_alg_name[AEAD_TYPE_LAST]; + +struct spu_request_opts { + bool is_inbound; + bool auth_first; + bool is_aead; + bool is_esp; + bool bd_suppress; + bool is_rfc4543; +}; + +struct spu_cipher_parms { + enum spu_cipher_alg alg; + enum spu_cipher_mode mode; + enum spu_cipher_type type; + u8 *key_buf; + u16 key_len; + /* iv_buf and iv_len include salt, if applicable */ + u8 *iv_buf; + u16 iv_len; +}; + +struct spu_hash_parms { + enum hash_alg alg; + enum hash_mode mode; + enum hash_type type; + u8 digestsize; + u8 *key_buf; + u16 key_len; + u16 prebuf_len; + /* length of hash pad. signed, needs to handle roll-overs */ + int pad_len; +}; + +struct spu_aead_parms { + u32 assoc_size; + u16 iv_len; /* length of IV field between assoc data and data */ + u8 aad_pad_len; /* For AES GCM/CCM, length of padding after AAD */ + u8 data_pad_len;/* For AES GCM/CCM, length of padding after data */ + bool return_iv; /* True if SPU should return an IV */ + u32 ret_iv_len; /* Length in bytes of returned IV */ + u32 ret_iv_off; /* Offset into full IV if partial IV returned */ +}; + +/************** SPU sizes ***************/ + +#define SPU_RX_STATUS_LEN 4 + +/* Max length of padding for 4-byte alignment of STATUS field */ +#define SPU_STAT_PAD_MAX 4 + +/* Max length of pad fragment. 4 is for 4-byte alignment of STATUS field */ +#define SPU_PAD_LEN_MAX (SPU_GCM_CCM_ALIGN + MAX_HASH_BLOCK_SIZE + \ + SPU_STAT_PAD_MAX) + +/* GCM and CCM require 16-byte alignment */ +#define SPU_GCM_CCM_ALIGN 16 + +/* Length up SUPDT field in SPU response message for RC4 */ +#define SPU_SUPDT_LEN 260 + +/* SPU status error codes. These used as common error codes across all + * SPU variants. + */ +#define SPU_INVALID_ICV 1 + +/* Indicates no limit to the length of the payload in a SPU message */ +#define SPU_MAX_PAYLOAD_INF 0xFFFFFFFF + +/* Size of XTS tweak ("i" parameter), in bytes */ +#define SPU_XTS_TWEAK_SIZE 16 + +/* CCM B_0 field definitions, common for SPU-M and SPU2 */ +#define CCM_B0_ADATA 0x40 +#define CCM_B0_ADATA_SHIFT 6 +#define CCM_B0_M_PRIME 0x38 +#define CCM_B0_M_PRIME_SHIFT 3 +#define CCM_B0_L_PRIME 0x07 +#define CCM_B0_L_PRIME_SHIFT 0 +#define CCM_ESP_L_VALUE 4 + +/** + * spu_req_incl_icv() - Return true if SPU request message should include the + * ICV as a separate buffer. + * @cipher_mode: the cipher mode being requested + * @is_encrypt: true if encrypting. false if decrypting. + * + * Return: true if ICV to be included as separate buffer + */ +static __always_inline bool spu_req_incl_icv(enum spu_cipher_mode cipher_mode, + bool is_encrypt) +{ + if ((cipher_mode == CIPHER_MODE_GCM) && !is_encrypt) + return true; + if ((cipher_mode == CIPHER_MODE_CCM) && !is_encrypt) + return true; + + return false; +} + +static __always_inline u32 spu_real_db_size(u32 assoc_size, + u32 aead_iv_buf_len, + u32 prebuf_len, + u32 data_size, + u32 aad_pad_len, + u32 gcm_pad_len, + u32 hash_pad_len) +{ + return assoc_size + aead_iv_buf_len + prebuf_len + data_size + + aad_pad_len + gcm_pad_len + hash_pad_len; +} + +/************** SPU Functions Prototypes **************/ + +void spum_dump_msg_hdr(u8 *buf, unsigned int buf_len); + +u32 spum_ns2_ctx_max_payload(enum spu_cipher_alg cipher_alg, + enum spu_cipher_mode cipher_mode, + unsigned int blocksize); +u32 spum_nsp_ctx_max_payload(enum spu_cipher_alg cipher_alg, + enum spu_cipher_mode cipher_mode, + unsigned int blocksize); +u32 spum_payload_length(u8 *spu_hdr); +u16 spum_response_hdr_len(u16 auth_key_len, u16 enc_key_len, bool is_hash); +u16 spum_hash_pad_len(enum hash_alg hash_alg, enum hash_mode hash_mode, + u32 chunksize, u16 hash_block_size); +u32 spum_gcm_ccm_pad_len(enum spu_cipher_mode cipher_mode, + unsigned int data_size); +u32 spum_assoc_resp_len(enum spu_cipher_mode cipher_mode, + unsigned int assoc_len, unsigned int iv_len, + bool is_encrypt); +u8 spum_aead_ivlen(enum spu_cipher_mode cipher_mode, u16 iv_len); +bool spu_req_incl_icv(enum spu_cipher_mode cipher_mode, bool is_encrypt); +enum hash_type spum_hash_type(u32 src_sent); +u32 spum_digest_size(u32 alg_digest_size, enum hash_alg alg, + enum hash_type htype); + +u32 spum_create_request(u8 *spu_hdr, + struct spu_request_opts *req_opts, + struct spu_cipher_parms *cipher_parms, + struct spu_hash_parms *hash_parms, + struct spu_aead_parms *aead_parms, + unsigned int data_size); + +u16 spum_cipher_req_init(u8 *spu_hdr, struct spu_cipher_parms *cipher_parms); + +void spum_cipher_req_finish(u8 *spu_hdr, + u16 spu_req_hdr_len, + unsigned int is_inbound, + struct spu_cipher_parms *cipher_parms, + bool update_key, + unsigned int data_size); + +void spum_request_pad(u8 *pad_start, + u32 gcm_padding, + u32 hash_pad_len, + enum hash_alg auth_alg, + enum hash_mode auth_mode, + unsigned int total_sent, u32 status_padding); + +u8 spum_xts_tweak_in_payload(void); +u8 spum_tx_status_len(void); +u8 spum_rx_status_len(void); +int spum_status_process(u8 *statp); + +void spum_ccm_update_iv(unsigned int digestsize, + struct spu_cipher_parms *cipher_parms, + unsigned int assoclen, + unsigned int chunksize, + bool is_encrypt, + bool is_esp); +u32 spum_wordalign_padlen(u32 data_size); +#endif diff --git a/drivers/crypto/bcm/spu2.c b/drivers/crypto/bcm/spu2.c new file mode 100644 index 000000000000..ef04c9748317 --- /dev/null +++ b/drivers/crypto/bcm/spu2.c @@ -0,0 +1,1401 @@ +/* + * Copyright 2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation (the "GPL"). + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 (GPLv2) for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 (GPLv2) along with this source code. + */ + +/* + * This file works with the SPU2 version of the SPU. SPU2 has different message + * formats than the previous version of the SPU. All SPU message format + * differences should be hidden in the spux.c,h files. + */ + +#include +#include + +#include "util.h" +#include "spu.h" +#include "spu2.h" + +#define SPU2_TX_STATUS_LEN 0 /* SPU2 has no STATUS in input packet */ + +/* + * Controlled by pkt_stat_cnt field in CRYPTO_SS_SPU0_CORE_SPU2_CONTROL0 + * register. Defaults to 2. + */ +#define SPU2_RX_STATUS_LEN 2 + +enum spu2_proto_sel { + SPU2_PROTO_RESV = 0, + SPU2_MACSEC_SECTAG8_ECB = 1, + SPU2_MACSEC_SECTAG8_SCB = 2, + SPU2_MACSEC_SECTAG16 = 3, + SPU2_MACSEC_SECTAG16_8_XPN = 4, + SPU2_IPSEC = 5, + SPU2_IPSEC_ESN = 6, + SPU2_TLS_CIPHER = 7, + SPU2_TLS_AEAD = 8, + SPU2_DTLS_CIPHER = 9, + SPU2_DTLS_AEAD = 10 +}; + +char *spu2_cipher_type_names[] = { "None", "AES128", "AES192", "AES256", + "DES", "3DES" +}; + +char *spu2_cipher_mode_names[] = { "ECB", "CBC", "CTR", "CFB", "OFB", "XTS", + "CCM", "GCM" +}; + +char *spu2_hash_type_names[] = { "None", "AES128", "AES192", "AES256", + "Reserved", "Reserved", "MD5", "SHA1", "SHA224", "SHA256", "SHA384", + "SHA512", "SHA512/224", "SHA512/256", "SHA3-224", "SHA3-256", + "SHA3-384", "SHA3-512" +}; + +char *spu2_hash_mode_names[] = { "CMAC", "CBC-MAC", "XCBC-MAC", "HMAC", + "Rabin", "CCM", "GCM", "Reserved" +}; + +static char *spu2_ciph_type_name(enum spu2_cipher_type cipher_type) +{ + if (cipher_type >= SPU2_CIPHER_TYPE_LAST) + return "Reserved"; + return spu2_cipher_type_names[cipher_type]; +} + +static char *spu2_ciph_mode_name(enum spu2_cipher_mode cipher_mode) +{ + if (cipher_mode >= SPU2_CIPHER_MODE_LAST) + return "Reserved"; + return spu2_cipher_mode_names[cipher_mode]; +} + +static char *spu2_hash_type_name(enum spu2_hash_type hash_type) +{ + if (hash_type >= SPU2_HASH_TYPE_LAST) + return "Reserved"; + return spu2_hash_type_names[hash_type]; +} + +static char *spu2_hash_mode_name(enum spu2_hash_mode hash_mode) +{ + if (hash_mode >= SPU2_HASH_MODE_LAST) + return "Reserved"; + return spu2_hash_mode_names[hash_mode]; +} + +/* + * Convert from a software cipher mode value to the corresponding value + * for SPU2. + */ +static int spu2_cipher_mode_xlate(enum spu_cipher_mode cipher_mode, + enum spu2_cipher_mode *spu2_mode) +{ + switch (cipher_mode) { + case CIPHER_MODE_ECB: + *spu2_mode = SPU2_CIPHER_MODE_ECB; + break; + case CIPHER_MODE_CBC: + *spu2_mode = SPU2_CIPHER_MODE_CBC; + break; + case CIPHER_MODE_OFB: + *spu2_mode = SPU2_CIPHER_MODE_OFB; + break; + case CIPHER_MODE_CFB: + *spu2_mode = SPU2_CIPHER_MODE_CFB; + break; + case CIPHER_MODE_CTR: + *spu2_mode = SPU2_CIPHER_MODE_CTR; + break; + case CIPHER_MODE_CCM: + *spu2_mode = SPU2_CIPHER_MODE_CCM; + break; + case CIPHER_MODE_GCM: + *spu2_mode = SPU2_CIPHER_MODE_GCM; + break; + case CIPHER_MODE_XTS: + *spu2_mode = SPU2_CIPHER_MODE_XTS; + break; + default: + return -EINVAL; + } + return 0; +} + +/** + * spu2_cipher_xlate() - Convert a cipher {alg/mode/type} triple to a SPU2 + * cipher type and mode. + * @cipher_alg: [in] cipher algorithm value from software enumeration + * @cipher_mode: [in] cipher mode value from software enumeration + * @cipher_type: [in] cipher type value from software enumeration + * @spu2_type: [out] cipher type value used by spu2 hardware + * @spu2_mode: [out] cipher mode value used by spu2 hardware + * + * Return: 0 if successful + */ +static int spu2_cipher_xlate(enum spu_cipher_alg cipher_alg, + enum spu_cipher_mode cipher_mode, + enum spu_cipher_type cipher_type, + enum spu2_cipher_type *spu2_type, + enum spu2_cipher_mode *spu2_mode) +{ + int err; + + err = spu2_cipher_mode_xlate(cipher_mode, spu2_mode); + if (err) { + flow_log("Invalid cipher mode %d\n", cipher_mode); + return err; + } + + switch (cipher_alg) { + case CIPHER_ALG_NONE: + *spu2_type = SPU2_CIPHER_TYPE_NONE; + break; + case CIPHER_ALG_RC4: + /* SPU2 does not support RC4 */ + err = -EINVAL; + *spu2_type = SPU2_CIPHER_TYPE_NONE; + break; + case CIPHER_ALG_DES: + *spu2_type = SPU2_CIPHER_TYPE_DES; + break; + case CIPHER_ALG_3DES: + *spu2_type = SPU2_CIPHER_TYPE_3DES; + break; + case CIPHER_ALG_AES: + switch (cipher_type) { + case CIPHER_TYPE_AES128: + *spu2_type = SPU2_CIPHER_TYPE_AES128; + break; + case CIPHER_TYPE_AES192: + *spu2_type = SPU2_CIPHER_TYPE_AES192; + break; + case CIPHER_TYPE_AES256: + *spu2_type = SPU2_CIPHER_TYPE_AES256; + break; + default: + err = -EINVAL; + } + break; + case CIPHER_ALG_LAST: + default: + err = -EINVAL; + break; + } + + if (err) + flow_log("Invalid cipher alg %d or type %d\n", + cipher_alg, cipher_type); + return err; +} + +/* + * Convert from a software hash mode value to the corresponding value + * for SPU2. Note that HASH_MODE_NONE and HASH_MODE_XCBC have the same value. + */ +static int spu2_hash_mode_xlate(enum hash_mode hash_mode, + enum spu2_hash_mode *spu2_mode) +{ + switch (hash_mode) { + case HASH_MODE_XCBC: + *spu2_mode = SPU2_HASH_MODE_XCBC_MAC; + break; + case HASH_MODE_CMAC: + *spu2_mode = SPU2_HASH_MODE_CMAC; + break; + case HASH_MODE_HMAC: + *spu2_mode = SPU2_HASH_MODE_HMAC; + break; + case HASH_MODE_CCM: + *spu2_mode = SPU2_HASH_MODE_CCM; + break; + case HASH_MODE_GCM: + *spu2_mode = SPU2_HASH_MODE_GCM; + break; + default: + return -EINVAL; + } + return 0; +} + +/** + * spu2_hash_xlate() - Convert a hash {alg/mode/type} triple to a SPU2 hash type + * and mode. + * @hash_alg: [in] hash algorithm value from software enumeration + * @hash_mode: [in] hash mode value from software enumeration + * @hash_type: [in] hash type value from software enumeration + * @ciph_type: [in] cipher type value from software enumeration + * @spu2_type: [out] hash type value used by SPU2 hardware + * @spu2_mode: [out] hash mode value used by SPU2 hardware + * + * Return: 0 if successful + */ +static int +spu2_hash_xlate(enum hash_alg hash_alg, enum hash_mode hash_mode, + enum hash_type hash_type, enum spu_cipher_type ciph_type, + enum spu2_hash_type *spu2_type, enum spu2_hash_mode *spu2_mode) +{ + int err; + + err = spu2_hash_mode_xlate(hash_mode, spu2_mode); + if (err) { + flow_log("Invalid hash mode %d\n", hash_mode); + return err; + } + + switch (hash_alg) { + case HASH_ALG_NONE: + *spu2_type = SPU2_HASH_TYPE_NONE; + break; + case HASH_ALG_MD5: + *spu2_type = SPU2_HASH_TYPE_MD5; + break; + case HASH_ALG_SHA1: + *spu2_type = SPU2_HASH_TYPE_SHA1; + break; + case HASH_ALG_SHA224: + *spu2_type = SPU2_HASH_TYPE_SHA224; + break; + case HASH_ALG_SHA256: + *spu2_type = SPU2_HASH_TYPE_SHA256; + break; + case HASH_ALG_SHA384: + *spu2_type = SPU2_HASH_TYPE_SHA384; + break; + case HASH_ALG_SHA512: + *spu2_type = SPU2_HASH_TYPE_SHA512; + break; + case HASH_ALG_AES: + switch (ciph_type) { + case CIPHER_TYPE_AES128: + *spu2_type = SPU2_HASH_TYPE_AES128; + break; + case CIPHER_TYPE_AES192: + *spu2_type = SPU2_HASH_TYPE_AES192; + break; + case CIPHER_TYPE_AES256: + *spu2_type = SPU2_HASH_TYPE_AES256; + break; + default: + err = -EINVAL; + } + break; + case HASH_ALG_SHA3_224: + *spu2_type = SPU2_HASH_TYPE_SHA3_224; + break; + case HASH_ALG_SHA3_256: + *spu2_type = SPU2_HASH_TYPE_SHA3_256; + break; + case HASH_ALG_SHA3_384: + *spu2_type = SPU2_HASH_TYPE_SHA3_384; + break; + case HASH_ALG_SHA3_512: + *spu2_type = SPU2_HASH_TYPE_SHA3_512; + case HASH_ALG_LAST: + default: + err = -EINVAL; + break; + } + + if (err) + flow_log("Invalid hash alg %d or type %d\n", + hash_alg, hash_type); + return err; +} + +/* Dump FMD ctrl0. The ctrl0 input is in host byte order */ +static void spu2_dump_fmd_ctrl0(u64 ctrl0) +{ + enum spu2_cipher_type ciph_type; + enum spu2_cipher_mode ciph_mode; + enum spu2_hash_type hash_type; + enum spu2_hash_mode hash_mode; + char *ciph_name; + char *ciph_mode_name; + char *hash_name; + char *hash_mode_name; + u8 cfb; + u8 proto; + + packet_log(" FMD CTRL0 %#16llx\n", ctrl0); + if (ctrl0 & SPU2_CIPH_ENCRYPT_EN) + packet_log(" encrypt\n"); + else + packet_log(" decrypt\n"); + + ciph_type = (ctrl0 & SPU2_CIPH_TYPE) >> SPU2_CIPH_TYPE_SHIFT; + ciph_name = spu2_ciph_type_name(ciph_type); + packet_log(" Cipher type: %s\n", ciph_name); + + if (ciph_type != SPU2_CIPHER_TYPE_NONE) { + ciph_mode = (ctrl0 & SPU2_CIPH_MODE) >> SPU2_CIPH_MODE_SHIFT; + ciph_mode_name = spu2_ciph_mode_name(ciph_mode); + packet_log(" Cipher mode: %s\n", ciph_mode_name); + } + + cfb = (ctrl0 & SPU2_CFB_MASK) >> SPU2_CFB_MASK_SHIFT; + packet_log(" CFB %#x\n", cfb); + + proto = (ctrl0 & SPU2_PROTO_SEL) >> SPU2_PROTO_SEL_SHIFT; + packet_log(" protocol %#x\n", proto); + + if (ctrl0 & SPU2_HASH_FIRST) + packet_log(" hash first\n"); + else + packet_log(" cipher first\n"); + + if (ctrl0 & SPU2_CHK_TAG) + packet_log(" check tag\n"); + + hash_type = (ctrl0 & SPU2_HASH_TYPE) >> SPU2_HASH_TYPE_SHIFT; + hash_name = spu2_hash_type_name(hash_type); + packet_log(" Hash type: %s\n", hash_name); + + if (hash_type != SPU2_HASH_TYPE_NONE) { + hash_mode = (ctrl0 & SPU2_HASH_MODE) >> SPU2_HASH_MODE_SHIFT; + hash_mode_name = spu2_hash_mode_name(hash_mode); + packet_log(" Hash mode: %s\n", hash_mode_name); + } + + if (ctrl0 & SPU2_CIPH_PAD_EN) { + packet_log(" Cipher pad: %#2llx\n", + (ctrl0 & SPU2_CIPH_PAD) >> SPU2_CIPH_PAD_SHIFT); + } +} + +/* Dump FMD ctrl1. The ctrl1 input is in host byte order */ +static void spu2_dump_fmd_ctrl1(u64 ctrl1) +{ + u8 hash_key_len; + u8 ciph_key_len; + u8 ret_iv_len; + u8 iv_offset; + u8 iv_len; + u8 hash_tag_len; + u8 ret_md; + + packet_log(" FMD CTRL1 %#16llx\n", ctrl1); + if (ctrl1 & SPU2_TAG_LOC) + packet_log(" Tag after payload\n"); + + packet_log(" Msg includes "); + if (ctrl1 & SPU2_HAS_FR_DATA) + packet_log("FD "); + if (ctrl1 & SPU2_HAS_AAD1) + packet_log("AAD1 "); + if (ctrl1 & SPU2_HAS_NAAD) + packet_log("NAAD "); + if (ctrl1 & SPU2_HAS_AAD2) + packet_log("AAD2 "); + if (ctrl1 & SPU2_HAS_ESN) + packet_log("ESN "); + packet_log("\n"); + + hash_key_len = (ctrl1 & SPU2_HASH_KEY_LEN) >> SPU2_HASH_KEY_LEN_SHIFT; + packet_log(" Hash key len %u\n", hash_key_len); + + ciph_key_len = (ctrl1 & SPU2_CIPH_KEY_LEN) >> SPU2_CIPH_KEY_LEN_SHIFT; + packet_log(" Cipher key len %u\n", ciph_key_len); + + if (ctrl1 & SPU2_GENIV) + packet_log(" Generate IV\n"); + + if (ctrl1 & SPU2_HASH_IV) + packet_log(" IV included in hash\n"); + + if (ctrl1 & SPU2_RET_IV) + packet_log(" Return IV in output before payload\n"); + + ret_iv_len = (ctrl1 & SPU2_RET_IV_LEN) >> SPU2_RET_IV_LEN_SHIFT; + packet_log(" Length of returned IV %u bytes\n", + ret_iv_len ? ret_iv_len : 16); + + iv_offset = (ctrl1 & SPU2_IV_OFFSET) >> SPU2_IV_OFFSET_SHIFT; + packet_log(" IV offset %u\n", iv_offset); + + iv_len = (ctrl1 & SPU2_IV_LEN) >> SPU2_IV_LEN_SHIFT; + packet_log(" Input IV len %u bytes\n", iv_len); + + hash_tag_len = (ctrl1 & SPU2_HASH_TAG_LEN) >> SPU2_HASH_TAG_LEN_SHIFT; + packet_log(" Hash tag length %u bytes\n", hash_tag_len); + + packet_log(" Return "); + ret_md = (ctrl1 & SPU2_RETURN_MD) >> SPU2_RETURN_MD_SHIFT; + if (ret_md) + packet_log("FMD "); + if (ret_md == SPU2_RET_FMD_OMD) + packet_log("OMD "); + else if (ret_md == SPU2_RET_FMD_OMD_IV) + packet_log("OMD IV "); + if (ctrl1 & SPU2_RETURN_FD) + packet_log("FD "); + if (ctrl1 & SPU2_RETURN_AAD1) + packet_log("AAD1 "); + if (ctrl1 & SPU2_RETURN_NAAD) + packet_log("NAAD "); + if (ctrl1 & SPU2_RETURN_AAD2) + packet_log("AAD2 "); + if (ctrl1 & SPU2_RETURN_PAY) + packet_log("Payload"); + packet_log("\n"); +} + +/* Dump FMD ctrl2. The ctrl2 input is in host byte order */ +static void spu2_dump_fmd_ctrl2(u64 ctrl2) +{ + packet_log(" FMD CTRL2 %#16llx\n", ctrl2); + + packet_log(" AAD1 offset %llu length %llu bytes\n", + ctrl2 & SPU2_AAD1_OFFSET, + (ctrl2 & SPU2_AAD1_LEN) >> SPU2_AAD1_LEN_SHIFT); + packet_log(" AAD2 offset %llu\n", + (ctrl2 & SPU2_AAD2_OFFSET) >> SPU2_AAD2_OFFSET_SHIFT); + packet_log(" Payload offset %llu\n", + (ctrl2 & SPU2_PL_OFFSET) >> SPU2_PL_OFFSET_SHIFT); +} + +/* Dump FMD ctrl3. The ctrl3 input is in host byte order */ +static void spu2_dump_fmd_ctrl3(u64 ctrl3) +{ + packet_log(" FMD CTRL3 %#16llx\n", ctrl3); + + packet_log(" Payload length %llu bytes\n", ctrl3 & SPU2_PL_LEN); + packet_log(" TLS length %llu bytes\n", + (ctrl3 & SPU2_TLS_LEN) >> SPU2_TLS_LEN_SHIFT); +} + +static void spu2_dump_fmd(struct SPU2_FMD *fmd) +{ + spu2_dump_fmd_ctrl0(le64_to_cpu(fmd->ctrl0)); + spu2_dump_fmd_ctrl1(le64_to_cpu(fmd->ctrl1)); + spu2_dump_fmd_ctrl2(le64_to_cpu(fmd->ctrl2)); + spu2_dump_fmd_ctrl3(le64_to_cpu(fmd->ctrl3)); +} + +static void spu2_dump_omd(u8 *omd, u16 hash_key_len, u16 ciph_key_len, + u16 hash_iv_len, u16 ciph_iv_len) +{ + u8 *ptr = omd; + + packet_log(" OMD:\n"); + + if (hash_key_len) { + packet_log(" Hash Key Length %u bytes\n", hash_key_len); + packet_dump(" KEY: ", ptr, hash_key_len); + ptr += hash_key_len; + } + + if (ciph_key_len) { + packet_log(" Cipher Key Length %u bytes\n", ciph_key_len); + packet_dump(" KEY: ", ptr, ciph_key_len); + ptr += ciph_key_len; + } + + if (hash_iv_len) { + packet_log(" Hash IV Length %u bytes\n", hash_iv_len); + packet_dump(" hash IV: ", ptr, hash_iv_len); + ptr += ciph_key_len; + } + + if (ciph_iv_len) { + packet_log(" Cipher IV Length %u bytes\n", ciph_iv_len); + packet_dump(" cipher IV: ", ptr, ciph_iv_len); + } +} + +/* Dump a SPU2 header for debug */ +void spu2_dump_msg_hdr(u8 *buf, unsigned int buf_len) +{ + struct SPU2_FMD *fmd = (struct SPU2_FMD *)buf; + u8 *omd; + u64 ctrl1; + u16 hash_key_len; + u16 ciph_key_len; + u16 hash_iv_len; + u16 ciph_iv_len; + u16 omd_len; + + packet_log("\n"); + packet_log("SPU2 message header %p len: %u\n", buf, buf_len); + + spu2_dump_fmd(fmd); + omd = (u8 *)(fmd + 1); + + ctrl1 = le64_to_cpu(fmd->ctrl1); + hash_key_len = (ctrl1 & SPU2_HASH_KEY_LEN) >> SPU2_HASH_KEY_LEN_SHIFT; + ciph_key_len = (ctrl1 & SPU2_CIPH_KEY_LEN) >> SPU2_CIPH_KEY_LEN_SHIFT; + hash_iv_len = 0; + ciph_iv_len = (ctrl1 & SPU2_IV_LEN) >> SPU2_IV_LEN_SHIFT; + spu2_dump_omd(omd, hash_key_len, ciph_key_len, hash_iv_len, + ciph_iv_len); + + /* Double check sanity */ + omd_len = hash_key_len + ciph_key_len + hash_iv_len + ciph_iv_len; + if (FMD_SIZE + omd_len != buf_len) { + packet_log + (" Packet parsed incorrectly. buf_len %u, sum of MD %zu\n", + buf_len, FMD_SIZE + omd_len); + } + packet_log("\n"); +} + +/** + * spu2_fmd_init() - At setkey time, initialize the fixed meta data for + * subsequent ablkcipher requests for this context. + * @spu2_cipher_type: Cipher algorithm + * @spu2_mode: Cipher mode + * @cipher_key_len: Length of cipher key, in bytes + * @cipher_iv_len: Length of cipher initialization vector, in bytes + * + * Return: 0 (success) + */ +static int spu2_fmd_init(struct SPU2_FMD *fmd, + enum spu2_cipher_type spu2_type, + enum spu2_cipher_mode spu2_mode, + u32 cipher_key_len, u32 cipher_iv_len) +{ + u64 ctrl0; + u64 ctrl1; + u64 ctrl2; + u64 ctrl3; + u32 aad1_offset; + u32 aad2_offset; + u16 aad1_len = 0; + u64 payload_offset; + + ctrl0 = (spu2_type << SPU2_CIPH_TYPE_SHIFT) | + (spu2_mode << SPU2_CIPH_MODE_SHIFT); + + ctrl1 = (cipher_key_len << SPU2_CIPH_KEY_LEN_SHIFT) | + ((u64)cipher_iv_len << SPU2_IV_LEN_SHIFT) | + ((u64)SPU2_RET_FMD_ONLY << SPU2_RETURN_MD_SHIFT) | SPU2_RETURN_PAY; + + /* + * AAD1 offset is from start of FD. FD length is always 0 for this + * driver. So AAD1_offset is always 0. + */ + aad1_offset = 0; + aad2_offset = aad1_offset; + payload_offset = 0; + ctrl2 = aad1_offset | + (aad1_len << SPU2_AAD1_LEN_SHIFT) | + (aad2_offset << SPU2_AAD2_OFFSET_SHIFT) | + (payload_offset << SPU2_PL_OFFSET_SHIFT); + + ctrl3 = 0; + + fmd->ctrl0 = cpu_to_le64(ctrl0); + fmd->ctrl1 = cpu_to_le64(ctrl1); + fmd->ctrl2 = cpu_to_le64(ctrl2); + fmd->ctrl3 = cpu_to_le64(ctrl3); + + return 0; +} + +/** + * spu2_fmd_ctrl0_write() - Write ctrl0 field in fixed metadata (FMD) field of + * SPU request packet. + * @fmd: Start of FMD field to be written + * @is_inbound: true if decrypting. false if encrypting. + * @authFirst: true if alg authenticates before encrypting + * @protocol: protocol selector + * @cipher_type: cipher algorithm + * @cipher_mode: cipher mode + * @auth_type: authentication type + * @auth_mode: authentication mode + */ +static void spu2_fmd_ctrl0_write(struct SPU2_FMD *fmd, + bool is_inbound, bool auth_first, + enum spu2_proto_sel protocol, + enum spu2_cipher_type cipher_type, + enum spu2_cipher_mode cipher_mode, + enum spu2_hash_type auth_type, + enum spu2_hash_mode auth_mode) +{ + u64 ctrl0 = 0; + + if ((cipher_type != SPU2_CIPHER_TYPE_NONE) && !is_inbound) + ctrl0 |= SPU2_CIPH_ENCRYPT_EN; + + ctrl0 |= ((u64)cipher_type << SPU2_CIPH_TYPE_SHIFT) | + ((u64)cipher_mode << SPU2_CIPH_MODE_SHIFT); + + if (protocol) + ctrl0 |= (u64)protocol << SPU2_PROTO_SEL_SHIFT; + + if (auth_first) + ctrl0 |= SPU2_HASH_FIRST; + + if (is_inbound && (auth_type != SPU2_HASH_TYPE_NONE)) + ctrl0 |= SPU2_CHK_TAG; + + ctrl0 |= (((u64)auth_type << SPU2_HASH_TYPE_SHIFT) | + ((u64)auth_mode << SPU2_HASH_MODE_SHIFT)); + + fmd->ctrl0 = cpu_to_le64(ctrl0); +} + +/** + * spu2_fmd_ctrl1_write() - Write ctrl1 field in fixed metadata (FMD) field of + * SPU request packet. + * @fmd: Start of FMD field to be written + * @assoc_size: Length of additional associated data, in bytes + * @auth_key_len: Length of authentication key, in bytes + * @cipher_key_len: Length of cipher key, in bytes + * @gen_iv: If true, hw generates IV and returns in response + * @hash_iv: IV participates in hash. Used for IPSEC and TLS. + * @return_iv: Return IV in output packet before payload + * @ret_iv_len: Length of IV returned from SPU, in bytes + * @ret_iv_offset: Offset into full IV of start of returned IV + * @cipher_iv_len: Length of input cipher IV, in bytes + * @digest_size: Length of digest (aka, hash tag or ICV), in bytes + * @return_payload: Return payload in SPU response + * @return_md : return metadata in SPU response + * + * Packet can have AAD2 w/o AAD1. For algorithms currently supported, + * associated data goes in AAD2. + */ +static void spu2_fmd_ctrl1_write(struct SPU2_FMD *fmd, bool is_inbound, + u64 assoc_size, + u64 auth_key_len, u64 cipher_key_len, + bool gen_iv, bool hash_iv, bool return_iv, + u64 ret_iv_len, u64 ret_iv_offset, + u64 cipher_iv_len, u64 digest_size, + bool return_payload, bool return_md) +{ + u64 ctrl1 = 0; + + if (is_inbound && digest_size) + ctrl1 |= SPU2_TAG_LOC; + + if (assoc_size) { + ctrl1 |= SPU2_HAS_AAD2; + ctrl1 |= SPU2_RETURN_AAD2; /* need aad2 for gcm aes esp */ + } + + if (auth_key_len) + ctrl1 |= ((auth_key_len << SPU2_HASH_KEY_LEN_SHIFT) & + SPU2_HASH_KEY_LEN); + + if (cipher_key_len) + ctrl1 |= ((cipher_key_len << SPU2_CIPH_KEY_LEN_SHIFT) & + SPU2_CIPH_KEY_LEN); + + if (gen_iv) + ctrl1 |= SPU2_GENIV; + + if (hash_iv) + ctrl1 |= SPU2_HASH_IV; + + if (return_iv) { + ctrl1 |= SPU2_RET_IV; + ctrl1 |= ret_iv_len << SPU2_RET_IV_LEN_SHIFT; + ctrl1 |= ret_iv_offset << SPU2_IV_OFFSET_SHIFT; + } + + ctrl1 |= ((cipher_iv_len << SPU2_IV_LEN_SHIFT) & SPU2_IV_LEN); + + if (digest_size) + ctrl1 |= ((digest_size << SPU2_HASH_TAG_LEN_SHIFT) & + SPU2_HASH_TAG_LEN); + + /* Let's ask for the output pkt to include FMD, but don't need to + * get keys and IVs back in OMD. + */ + if (return_md) + ctrl1 |= ((u64)SPU2_RET_FMD_ONLY << SPU2_RETURN_MD_SHIFT); + else + ctrl1 |= ((u64)SPU2_RET_NO_MD << SPU2_RETURN_MD_SHIFT); + + /* Crypto API does not get assoc data back. So no need for AAD2. */ + + if (return_payload) + ctrl1 |= SPU2_RETURN_PAY; + + fmd->ctrl1 = cpu_to_le64(ctrl1); +} + +/** + * spu2_fmd_ctrl2_write() - Set the ctrl2 field in the fixed metadata field of + * SPU2 header. + * @fmd: Start of FMD field to be written + * @cipher_offset: Number of bytes from Start of Packet (end of FD field) where + * data to be encrypted or decrypted begins + * @auth_key_len: Length of authentication key, in bytes + * @auth_iv_len: Length of authentication initialization vector, in bytes + * @cipher_key_len: Length of cipher key, in bytes + * @cipher_iv_len: Length of cipher IV, in bytes + */ +static void spu2_fmd_ctrl2_write(struct SPU2_FMD *fmd, u64 cipher_offset, + u64 auth_key_len, u64 auth_iv_len, + u64 cipher_key_len, u64 cipher_iv_len) +{ + u64 ctrl2; + u64 aad1_offset; + u64 aad2_offset; + u16 aad1_len = 0; + u64 payload_offset; + + /* AAD1 offset is from start of FD. FD length always 0. */ + aad1_offset = 0; + + aad2_offset = aad1_offset; + payload_offset = cipher_offset; + ctrl2 = aad1_offset | + (aad1_len << SPU2_AAD1_LEN_SHIFT) | + (aad2_offset << SPU2_AAD2_OFFSET_SHIFT) | + (payload_offset << SPU2_PL_OFFSET_SHIFT); + + fmd->ctrl2 = cpu_to_le64(ctrl2); +} + +/** + * spu2_fmd_ctrl3_write() - Set the ctrl3 field in FMD + * @fmd: Fixed meta data. First field in SPU2 msg header. + * @payload_len: Length of payload, in bytes + */ +static void spu2_fmd_ctrl3_write(struct SPU2_FMD *fmd, u64 payload_len) +{ + u64 ctrl3; + + ctrl3 = payload_len & SPU2_PL_LEN; + + fmd->ctrl3 = cpu_to_le64(ctrl3); +} + +/** + * spu2_ctx_max_payload() - Determine the maximum length of the payload for a + * SPU message for a given cipher and hash alg context. + * @cipher_alg: The cipher algorithm + * @cipher_mode: The cipher mode + * @blocksize: The size of a block of data for this algo + * + * For SPU2, the hardware generally ignores the PayloadLen field in ctrl3 of + * FMD and just keeps computing until it receives a DMA descriptor with the EOF + * flag set. So we consider the max payload to be infinite. AES CCM is an + * exception. + * + * Return: Max payload length in bytes + */ +u32 spu2_ctx_max_payload(enum spu_cipher_alg cipher_alg, + enum spu_cipher_mode cipher_mode, + unsigned int blocksize) +{ + if ((cipher_alg == CIPHER_ALG_AES) && + (cipher_mode == CIPHER_MODE_CCM)) { + u32 excess = SPU2_MAX_PAYLOAD % blocksize; + + return SPU2_MAX_PAYLOAD - excess; + } else { + return SPU_MAX_PAYLOAD_INF; + } +} + +/** + * spu_payload_length() - Given a SPU2 message header, extract the payload + * length. + * @spu_hdr: Start of SPU message header (FMD) + * + * Return: payload length, in bytes + */ +u32 spu2_payload_length(u8 *spu_hdr) +{ + struct SPU2_FMD *fmd = (struct SPU2_FMD *)spu_hdr; + u32 pl_len; + u64 ctrl3; + + ctrl3 = le64_to_cpu(fmd->ctrl3); + pl_len = ctrl3 & SPU2_PL_LEN; + + return pl_len; +} + +/** + * spu_response_hdr_len() - Determine the expected length of a SPU response + * header. + * @auth_key_len: Length of authentication key, in bytes + * @enc_key_len: Length of encryption key, in bytes + * + * For SPU2, includes just FMD. OMD is never requested. + * + * Return: Length of FMD, in bytes + */ +u16 spu2_response_hdr_len(u16 auth_key_len, u16 enc_key_len, bool is_hash) +{ + return FMD_SIZE; +} + +/** + * spu_hash_pad_len() - Calculate the length of hash padding required to extend + * data to a full block size. + * @hash_alg: hash algorithm + * @hash_mode: hash mode + * @chunksize: length of data, in bytes + * @hash_block_size: size of a hash block, in bytes + * + * SPU2 hardware does all hash padding + * + * Return: length of hash pad in bytes + */ +u16 spu2_hash_pad_len(enum hash_alg hash_alg, enum hash_mode hash_mode, + u32 chunksize, u16 hash_block_size) +{ + return 0; +} + +/** + * spu2_gcm_ccm_padlen() - Determine the length of GCM/CCM padding for either + * the AAD field or the data. + * + * Return: 0. Unlike SPU-M, SPU2 hardware does any GCM/CCM padding required. + */ +u32 spu2_gcm_ccm_pad_len(enum spu_cipher_mode cipher_mode, + unsigned int data_size) +{ + return 0; +} + +/** + * spu_assoc_resp_len() - Determine the size of the AAD2 buffer needed to catch + * associated data in a SPU2 output packet. + * @cipher_mode: cipher mode + * @assoc_len: length of additional associated data, in bytes + * @iv_len: length of initialization vector, in bytes + * @is_encrypt: true if encrypting. false if decrypt. + * + * Return: Length of buffer to catch associated data in response + */ +u32 spu2_assoc_resp_len(enum spu_cipher_mode cipher_mode, + unsigned int assoc_len, unsigned int iv_len, + bool is_encrypt) +{ + u32 resp_len = assoc_len; + + if (is_encrypt) + /* gcm aes esp has to write 8-byte IV in response */ + resp_len += iv_len; + return resp_len; +} + +/* + * spu_aead_ivlen() - Calculate the length of the AEAD IV to be included + * in a SPU request after the AAD and before the payload. + * @cipher_mode: cipher mode + * @iv_ctr_len: initialization vector length in bytes + * + * For SPU2, AEAD IV is included in OMD and does not need to be repeated + * prior to the payload. + * + * Return: Length of AEAD IV in bytes + */ +u8 spu2_aead_ivlen(enum spu_cipher_mode cipher_mode, u16 iv_len) +{ + return 0; +} + +/** + * spu2_hash_type() - Determine the type of hash operation. + * @src_sent: The number of bytes in the current request that have already + * been sent to the SPU to be hashed. + * + * SPU2 always does a FULL hash operation + */ +enum hash_type spu2_hash_type(u32 src_sent) +{ + return HASH_TYPE_FULL; +} + +/** + * spu2_digest_size() - Determine the size of a hash digest to expect the SPU to + * return. + * alg_digest_size: Number of bytes in the final digest for the given algo + * alg: The hash algorithm + * htype: Type of hash operation (init, update, full, etc) + * + */ +u32 spu2_digest_size(u32 alg_digest_size, enum hash_alg alg, + enum hash_type htype) +{ + return alg_digest_size; +} + +/** + * spu_create_request() - Build a SPU2 request message header, includint FMD and + * OMD. + * @spu_hdr: Start of buffer where SPU request header is to be written + * @req_opts: SPU request message options + * @cipher_parms: Parameters related to cipher algorithm + * @hash_parms: Parameters related to hash algorithm + * @aead_parms: Parameters related to AEAD operation + * @data_size: Length of data to be encrypted or authenticated. If AEAD, does + * not include length of AAD. + * + * Construct the message starting at spu_hdr. Caller should allocate this buffer + * in DMA-able memory at least SPU_HEADER_ALLOC_LEN bytes long. + * + * Return: the length of the SPU header in bytes. 0 if an error occurs. + */ +u32 spu2_create_request(u8 *spu_hdr, + struct spu_request_opts *req_opts, + struct spu_cipher_parms *cipher_parms, + struct spu_hash_parms *hash_parms, + struct spu_aead_parms *aead_parms, + unsigned int data_size) +{ + struct SPU2_FMD *fmd; + u8 *ptr; + unsigned int buf_len; + int err; + enum spu2_cipher_type spu2_ciph_type = SPU2_CIPHER_TYPE_NONE; + enum spu2_cipher_mode spu2_ciph_mode; + enum spu2_hash_type spu2_auth_type = SPU2_HASH_TYPE_NONE; + enum spu2_hash_mode spu2_auth_mode; + bool return_md = true; + enum spu2_proto_sel proto = SPU2_PROTO_RESV; + + /* size of the payload */ + unsigned int payload_len = + hash_parms->prebuf_len + data_size + hash_parms->pad_len - + ((req_opts->is_aead && req_opts->is_inbound) ? + hash_parms->digestsize : 0); + + /* offset of prebuf or data from start of AAD2 */ + unsigned int cipher_offset = aead_parms->assoc_size + + aead_parms->aad_pad_len + aead_parms->iv_len; + +#ifdef DEBUG + /* total size of the data following OMD (without STAT word padding) */ + unsigned int real_db_size = spu_real_db_size(aead_parms->assoc_size, + aead_parms->iv_len, + hash_parms->prebuf_len, + data_size, + aead_parms->aad_pad_len, + aead_parms->data_pad_len, + hash_parms->pad_len); +#endif + unsigned int assoc_size = aead_parms->assoc_size; + + if (req_opts->is_aead && + (cipher_parms->alg == CIPHER_ALG_AES) && + (cipher_parms->mode == CIPHER_MODE_GCM)) + /* + * On SPU 2, aes gcm cipher first on encrypt, auth first on + * decrypt + */ + req_opts->auth_first = req_opts->is_inbound; + + /* and do opposite for ccm (auth 1st on encrypt) */ + if (req_opts->is_aead && + (cipher_parms->alg == CIPHER_ALG_AES) && + (cipher_parms->mode == CIPHER_MODE_CCM)) + req_opts->auth_first = !req_opts->is_inbound; + + flow_log("%s()\n", __func__); + flow_log(" in:%u authFirst:%u\n", + req_opts->is_inbound, req_opts->auth_first); + flow_log(" cipher alg:%u mode:%u type %u\n", cipher_parms->alg, + cipher_parms->mode, cipher_parms->type); + flow_log(" is_esp: %s\n", req_opts->is_esp ? "yes" : "no"); + flow_log(" key: %d\n", cipher_parms->key_len); + flow_dump(" key: ", cipher_parms->key_buf, cipher_parms->key_len); + flow_log(" iv: %d\n", cipher_parms->iv_len); + flow_dump(" iv: ", cipher_parms->iv_buf, cipher_parms->iv_len); + flow_log(" auth alg:%u mode:%u type %u\n", + hash_parms->alg, hash_parms->mode, hash_parms->type); + flow_log(" digestsize: %u\n", hash_parms->digestsize); + flow_log(" authkey: %d\n", hash_parms->key_len); + flow_dump(" authkey: ", hash_parms->key_buf, hash_parms->key_len); + flow_log(" assoc_size:%u\n", assoc_size); + flow_log(" prebuf_len:%u\n", hash_parms->prebuf_len); + flow_log(" data_size:%u\n", data_size); + flow_log(" hash_pad_len:%u\n", hash_parms->pad_len); + flow_log(" real_db_size:%u\n", real_db_size); + flow_log(" cipher_offset:%u payload_len:%u\n", + cipher_offset, payload_len); + flow_log(" aead_iv: %u\n", aead_parms->iv_len); + + /* Convert to spu2 values for cipher alg, hash alg */ + err = spu2_cipher_xlate(cipher_parms->alg, cipher_parms->mode, + cipher_parms->type, + &spu2_ciph_type, &spu2_ciph_mode); + + /* If we are doing GCM hashing only - either via rfc4543 transform + * or because we happen to do GCM with AAD only and no payload - we + * need to configure hardware to use hash key rather than cipher key + * and put data into payload. This is because unlike SPU-M, running + * GCM cipher with 0 size payload is not permitted. + */ + if ((req_opts->is_rfc4543) || + ((spu2_ciph_mode == SPU2_CIPHER_MODE_GCM) && + (payload_len == 0))) { + /* Use hashing (only) and set up hash key */ + spu2_ciph_type = SPU2_CIPHER_TYPE_NONE; + hash_parms->key_len = cipher_parms->key_len; + memcpy(hash_parms->key_buf, cipher_parms->key_buf, + cipher_parms->key_len); + cipher_parms->key_len = 0; + + if (req_opts->is_rfc4543) + payload_len += assoc_size; + else + payload_len = assoc_size; + cipher_offset = 0; + assoc_size = 0; + } + + if (err) + return 0; + + flow_log("spu2 cipher type %s, cipher mode %s\n", + spu2_ciph_type_name(spu2_ciph_type), + spu2_ciph_mode_name(spu2_ciph_mode)); + + err = spu2_hash_xlate(hash_parms->alg, hash_parms->mode, + hash_parms->type, + cipher_parms->type, + &spu2_auth_type, &spu2_auth_mode); + if (err) + return 0; + + flow_log("spu2 hash type %s, hash mode %s\n", + spu2_hash_type_name(spu2_auth_type), + spu2_hash_mode_name(spu2_auth_mode)); + + fmd = (struct SPU2_FMD *)spu_hdr; + + spu2_fmd_ctrl0_write(fmd, req_opts->is_inbound, req_opts->auth_first, + proto, spu2_ciph_type, spu2_ciph_mode, + spu2_auth_type, spu2_auth_mode); + + spu2_fmd_ctrl1_write(fmd, req_opts->is_inbound, assoc_size, + hash_parms->key_len, cipher_parms->key_len, + false, false, + aead_parms->return_iv, aead_parms->ret_iv_len, + aead_parms->ret_iv_off, + cipher_parms->iv_len, hash_parms->digestsize, + !req_opts->bd_suppress, return_md); + + spu2_fmd_ctrl2_write(fmd, cipher_offset, hash_parms->key_len, 0, + cipher_parms->key_len, cipher_parms->iv_len); + + spu2_fmd_ctrl3_write(fmd, payload_len); + + ptr = (u8 *)(fmd + 1); + buf_len = sizeof(struct SPU2_FMD); + + /* Write OMD */ + if (hash_parms->key_len) { + memcpy(ptr, hash_parms->key_buf, hash_parms->key_len); + ptr += hash_parms->key_len; + buf_len += hash_parms->key_len; + } + if (cipher_parms->key_len) { + memcpy(ptr, cipher_parms->key_buf, cipher_parms->key_len); + ptr += cipher_parms->key_len; + buf_len += cipher_parms->key_len; + } + if (cipher_parms->iv_len) { + memcpy(ptr, cipher_parms->iv_buf, cipher_parms->iv_len); + ptr += cipher_parms->iv_len; + buf_len += cipher_parms->iv_len; + } + + packet_dump(" SPU request header: ", spu_hdr, buf_len); + + return buf_len; +} + +/** + * spu_cipher_req_init() - Build an ablkcipher SPU2 request message header, + * including FMD and OMD. + * @spu_hdr: Location of start of SPU request (FMD field) + * @cipher_parms: Parameters describing cipher request + * + * Called at setkey time to initialize a msg header that can be reused for all + * subsequent ablkcipher requests. Construct the message starting at spu_hdr. + * Caller should allocate this buffer in DMA-able memory at least + * SPU_HEADER_ALLOC_LEN bytes long. + * + * Return: the total length of the SPU header (FMD and OMD) in bytes. 0 if an + * error occurs. + */ +u16 spu2_cipher_req_init(u8 *spu_hdr, struct spu_cipher_parms *cipher_parms) +{ + struct SPU2_FMD *fmd; + u8 *omd; + enum spu2_cipher_type spu2_type = SPU2_CIPHER_TYPE_NONE; + enum spu2_cipher_mode spu2_mode; + int err; + + flow_log("%s()\n", __func__); + flow_log(" cipher alg:%u mode:%u type %u\n", cipher_parms->alg, + cipher_parms->mode, cipher_parms->type); + flow_log(" cipher_iv_len: %u\n", cipher_parms->iv_len); + flow_log(" key: %d\n", cipher_parms->key_len); + flow_dump(" key: ", cipher_parms->key_buf, cipher_parms->key_len); + + /* Convert to spu2 values */ + err = spu2_cipher_xlate(cipher_parms->alg, cipher_parms->mode, + cipher_parms->type, &spu2_type, &spu2_mode); + if (err) + return 0; + + flow_log("spu2 cipher type %s, cipher mode %s\n", + spu2_ciph_type_name(spu2_type), + spu2_ciph_mode_name(spu2_mode)); + + /* Construct the FMD header */ + fmd = (struct SPU2_FMD *)spu_hdr; + err = spu2_fmd_init(fmd, spu2_type, spu2_mode, cipher_parms->key_len, + cipher_parms->iv_len); + if (err) + return 0; + + /* Write cipher key to OMD */ + omd = (u8 *)(fmd + 1); + if (cipher_parms->key_buf && cipher_parms->key_len) + memcpy(omd, cipher_parms->key_buf, cipher_parms->key_len); + + packet_dump(" SPU request header: ", spu_hdr, + FMD_SIZE + cipher_parms->key_len + cipher_parms->iv_len); + + return FMD_SIZE + cipher_parms->key_len + cipher_parms->iv_len; +} + +/** + * spu_cipher_req_finish() - Finish building a SPU request message header for a + * block cipher request. + * @spu_hdr: Start of the request message header (MH field) + * @spu_req_hdr_len: Length in bytes of the SPU request header + * @isInbound: 0 encrypt, 1 decrypt + * @cipher_parms: Parameters describing cipher operation to be performed + * @update_key: If true, rewrite the cipher key in SCTX + * @data_size: Length of the data in the BD field + * + * Assumes much of the header was already filled in at setkey() time in + * spu_cipher_req_init(). + * spu_cipher_req_init() fills in the encryption key. For RC4, when submitting a + * request for a non-first chunk, we use the 260-byte SUPDT field from the + * previous response as the key. update_key is true for this case. Unused in all + * other cases. + */ +void spu2_cipher_req_finish(u8 *spu_hdr, + u16 spu_req_hdr_len, + unsigned int is_inbound, + struct spu_cipher_parms *cipher_parms, + bool update_key, + unsigned int data_size) +{ + struct SPU2_FMD *fmd; + u8 *omd; /* start of optional metadata */ + u64 ctrl0; + u64 ctrl3; + + flow_log("%s()\n", __func__); + flow_log(" in: %u\n", is_inbound); + flow_log(" cipher alg: %u, cipher_type: %u\n", cipher_parms->alg, + cipher_parms->type); + if (update_key) { + flow_log(" cipher key len: %u\n", cipher_parms->key_len); + flow_dump(" key: ", cipher_parms->key_buf, + cipher_parms->key_len); + } + flow_log(" iv len: %d\n", cipher_parms->iv_len); + flow_dump(" iv: ", cipher_parms->iv_buf, cipher_parms->iv_len); + flow_log(" data_size: %u\n", data_size); + + fmd = (struct SPU2_FMD *)spu_hdr; + omd = (u8 *)(fmd + 1); + + /* + * FMD ctrl0 was initialized at setkey time. update it to indicate + * whether we are encrypting or decrypting. + */ + ctrl0 = le64_to_cpu(fmd->ctrl0); + if (is_inbound) + ctrl0 &= ~SPU2_CIPH_ENCRYPT_EN; /* decrypt */ + else + ctrl0 |= SPU2_CIPH_ENCRYPT_EN; /* encrypt */ + fmd->ctrl0 = cpu_to_le64(ctrl0); + + if (cipher_parms->alg && cipher_parms->iv_buf && cipher_parms->iv_len) { + /* cipher iv provided so put it in here */ + memcpy(omd + cipher_parms->key_len, cipher_parms->iv_buf, + cipher_parms->iv_len); + } + + ctrl3 = le64_to_cpu(fmd->ctrl3); + data_size &= SPU2_PL_LEN; + ctrl3 |= data_size; + fmd->ctrl3 = cpu_to_le64(ctrl3); + + packet_dump(" SPU request header: ", spu_hdr, spu_req_hdr_len); +} + +/** + * spu_request_pad() - Create pad bytes at the end of the data. + * @pad_start: Start of buffer where pad bytes are to be written + * @gcm_padding: Length of GCM padding, in bytes + * @hash_pad_len: Number of bytes of padding extend data to full block + * @auth_alg: Authentication algorithm + * @auth_mode: Authentication mode + * @total_sent: Length inserted at end of hash pad + * @status_padding: Number of bytes of padding to align STATUS word + * + * There may be three forms of pad: + * 1. GCM pad - for GCM mode ciphers, pad to 16-byte alignment + * 2. hash pad - pad to a block length, with 0x80 data terminator and + * size at the end + * 3. STAT pad - to ensure the STAT field is 4-byte aligned + */ +void spu2_request_pad(u8 *pad_start, u32 gcm_padding, u32 hash_pad_len, + enum hash_alg auth_alg, enum hash_mode auth_mode, + unsigned int total_sent, u32 status_padding) +{ + u8 *ptr = pad_start; + + /* fix data alignent for GCM */ + if (gcm_padding > 0) { + flow_log(" GCM: padding to 16 byte alignment: %u bytes\n", + gcm_padding); + memset(ptr, 0, gcm_padding); + ptr += gcm_padding; + } + + if (hash_pad_len > 0) { + /* clear the padding section */ + memset(ptr, 0, hash_pad_len); + + /* terminate the data */ + *ptr = 0x80; + ptr += (hash_pad_len - sizeof(u64)); + + /* add the size at the end as required per alg */ + if (auth_alg == HASH_ALG_MD5) + *(u64 *)ptr = cpu_to_le64((u64)total_sent * 8); + else /* SHA1, SHA2-224, SHA2-256 */ + *(u64 *)ptr = cpu_to_be64((u64)total_sent * 8); + ptr += sizeof(u64); + } + + /* pad to a 4byte alignment for STAT */ + if (status_padding > 0) { + flow_log(" STAT: padding to 4 byte alignment: %u bytes\n", + status_padding); + + memset(ptr, 0, status_padding); + ptr += status_padding; + } +} + +/** + * spu2_xts_tweak_in_payload() - Indicate that SPU2 does NOT place the XTS + * tweak field in the packet payload (it uses IV instead) + * + * Return: 0 + */ +u8 spu2_xts_tweak_in_payload(void) +{ + return 0; +} + +/** + * spu2_tx_status_len() - Return the length of the STATUS field in a SPU + * response message. + * + * Return: Length of STATUS field in bytes. + */ +u8 spu2_tx_status_len(void) +{ + return SPU2_TX_STATUS_LEN; +} + +/** + * spu2_rx_status_len() - Return the length of the STATUS field in a SPU + * response message. + * + * Return: Length of STATUS field in bytes. + */ +u8 spu2_rx_status_len(void) +{ + return SPU2_RX_STATUS_LEN; +} + +/** + * spu_status_process() - Process the status from a SPU response message. + * @statp: start of STATUS word + * + * Return: 0 - if status is good and response should be processed + * !0 - status indicates an error and response is invalid + */ +int spu2_status_process(u8 *statp) +{ + /* SPU2 status is 2 bytes by default - SPU_RX_STATUS_LEN */ + u16 status = le16_to_cpu(*(__le16 *)statp); + + if (status == 0) + return 0; + + flow_log("rx status is %#x\n", status); + if (status == SPU2_INVALID_ICV) + return SPU_INVALID_ICV; + + return -EBADMSG; +} + +/** + * spu2_ccm_update_iv() - Update the IV as per the requirements for CCM mode. + * + * @digestsize: Digest size of this request + * @cipher_parms: (pointer to) cipher parmaeters, includes IV buf & IV len + * @assoclen: Length of AAD data + * @chunksize: length of input data to be sent in this req + * @is_encrypt: true if this is an output/encrypt operation + * @is_esp: true if this is an ESP / RFC4309 operation + * + */ +void spu2_ccm_update_iv(unsigned int digestsize, + struct spu_cipher_parms *cipher_parms, + unsigned int assoclen, unsigned int chunksize, + bool is_encrypt, bool is_esp) +{ + int L; /* size of length field, in bytes */ + + /* + * In RFC4309 mode, L is fixed at 4 bytes; otherwise, IV from + * testmgr contains (L-1) in bottom 3 bits of first byte, + * per RFC 3610. + */ + if (is_esp) + L = CCM_ESP_L_VALUE; + else + L = ((cipher_parms->iv_buf[0] & CCM_B0_L_PRIME) >> + CCM_B0_L_PRIME_SHIFT) + 1; + + /* SPU2 doesn't want these length bytes nor the first byte... */ + cipher_parms->iv_len -= (1 + L); + memmove(cipher_parms->iv_buf, &cipher_parms->iv_buf[1], + cipher_parms->iv_len); +} + +/** + * spu2_wordalign_padlen() - SPU2 does not require padding. + * @data_size: length of data field in bytes + * + * Return: length of status field padding, in bytes (always 0 on SPU2) + */ +u32 spu2_wordalign_padlen(u32 data_size) +{ + return 0; +} diff --git a/drivers/crypto/bcm/spu2.h b/drivers/crypto/bcm/spu2.h new file mode 100644 index 000000000000..ab1f59934828 --- /dev/null +++ b/drivers/crypto/bcm/spu2.h @@ -0,0 +1,228 @@ +/* + * Copyright 2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation (the "GPL"). + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 (GPLv2) for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 (GPLv2) along with this source code. + */ + +/* + * This file contains SPU message definitions specific to SPU2. + */ + +#ifndef _SPU2_H +#define _SPU2_H + +enum spu2_cipher_type { + SPU2_CIPHER_TYPE_NONE = 0x0, + SPU2_CIPHER_TYPE_AES128 = 0x1, + SPU2_CIPHER_TYPE_AES192 = 0x2, + SPU2_CIPHER_TYPE_AES256 = 0x3, + SPU2_CIPHER_TYPE_DES = 0x4, + SPU2_CIPHER_TYPE_3DES = 0x5, + SPU2_CIPHER_TYPE_LAST +}; + +enum spu2_cipher_mode { + SPU2_CIPHER_MODE_ECB = 0x0, + SPU2_CIPHER_MODE_CBC = 0x1, + SPU2_CIPHER_MODE_CTR = 0x2, + SPU2_CIPHER_MODE_CFB = 0x3, + SPU2_CIPHER_MODE_OFB = 0x4, + SPU2_CIPHER_MODE_XTS = 0x5, + SPU2_CIPHER_MODE_CCM = 0x6, + SPU2_CIPHER_MODE_GCM = 0x7, + SPU2_CIPHER_MODE_LAST +}; + +enum spu2_hash_type { + SPU2_HASH_TYPE_NONE = 0x0, + SPU2_HASH_TYPE_AES128 = 0x1, + SPU2_HASH_TYPE_AES192 = 0x2, + SPU2_HASH_TYPE_AES256 = 0x3, + SPU2_HASH_TYPE_MD5 = 0x6, + SPU2_HASH_TYPE_SHA1 = 0x7, + SPU2_HASH_TYPE_SHA224 = 0x8, + SPU2_HASH_TYPE_SHA256 = 0x9, + SPU2_HASH_TYPE_SHA384 = 0xa, + SPU2_HASH_TYPE_SHA512 = 0xb, + SPU2_HASH_TYPE_SHA512_224 = 0xc, + SPU2_HASH_TYPE_SHA512_256 = 0xd, + SPU2_HASH_TYPE_SHA3_224 = 0xe, + SPU2_HASH_TYPE_SHA3_256 = 0xf, + SPU2_HASH_TYPE_SHA3_384 = 0x10, + SPU2_HASH_TYPE_SHA3_512 = 0x11, + SPU2_HASH_TYPE_LAST +}; + +enum spu2_hash_mode { + SPU2_HASH_MODE_CMAC = 0x0, + SPU2_HASH_MODE_CBC_MAC = 0x1, + SPU2_HASH_MODE_XCBC_MAC = 0x2, + SPU2_HASH_MODE_HMAC = 0x3, + SPU2_HASH_MODE_RABIN = 0x4, + SPU2_HASH_MODE_CCM = 0x5, + SPU2_HASH_MODE_GCM = 0x6, + SPU2_HASH_MODE_RESERVED = 0x7, + SPU2_HASH_MODE_LAST +}; + +enum spu2_ret_md_opts { + SPU2_RET_NO_MD = 0, /* return no metadata */ + SPU2_RET_FMD_OMD = 1, /* return both FMD and OMD */ + SPU2_RET_FMD_ONLY = 2, /* return only FMD */ + SPU2_RET_FMD_OMD_IV = 3, /* return FMD and OMD with just IVs */ +}; + +/* Fixed Metadata format */ +struct SPU2_FMD { + u64 ctrl0; + u64 ctrl1; + u64 ctrl2; + u64 ctrl3; +}; + +#define FMD_SIZE sizeof(struct SPU2_FMD) + +/* Fixed part of request message header length in bytes. Just FMD. */ +#define SPU2_REQ_FIXED_LEN FMD_SIZE +#define SPU2_HEADER_ALLOC_LEN (SPU_REQ_FIXED_LEN + \ + 2 * MAX_KEY_SIZE + 2 * MAX_IV_SIZE) + +/* FMD ctrl0 field masks */ +#define SPU2_CIPH_ENCRYPT_EN 0x1 /* 0: decrypt, 1: encrypt */ +#define SPU2_CIPH_TYPE 0xF0 /* one of spu2_cipher_type */ +#define SPU2_CIPH_TYPE_SHIFT 4 +#define SPU2_CIPH_MODE 0xF00 /* one of spu2_cipher_mode */ +#define SPU2_CIPH_MODE_SHIFT 8 +#define SPU2_CFB_MASK 0x7000 /* cipher feedback mask */ +#define SPU2_CFB_MASK_SHIFT 12 +#define SPU2_PROTO_SEL 0xF00000 /* MACsec, IPsec, TLS... */ +#define SPU2_PROTO_SEL_SHIFT 20 +#define SPU2_HASH_FIRST 0x1000000 /* 1: hash input is input pkt + * data + */ +#define SPU2_CHK_TAG 0x2000000 /* 1: check digest provided */ +#define SPU2_HASH_TYPE 0x1F0000000 /* one of spu2_hash_type */ +#define SPU2_HASH_TYPE_SHIFT 28 +#define SPU2_HASH_MODE 0xF000000000 /* one of spu2_hash_mode */ +#define SPU2_HASH_MODE_SHIFT 36 +#define SPU2_CIPH_PAD_EN 0x100000000000 /* 1: Add pad to end of payload for + * enc + */ +#define SPU2_CIPH_PAD 0xFF000000000000 /* cipher pad value */ +#define SPU2_CIPH_PAD_SHIFT 48 + +/* FMD ctrl1 field masks */ +#define SPU2_TAG_LOC 0x1 /* 1: end of payload, 0: undef */ +#define SPU2_HAS_FR_DATA 0x2 /* 1: msg has frame data */ +#define SPU2_HAS_AAD1 0x4 /* 1: msg has AAD1 field */ +#define SPU2_HAS_NAAD 0x8 /* 1: msg has NAAD field */ +#define SPU2_HAS_AAD2 0x10 /* 1: msg has AAD2 field */ +#define SPU2_HAS_ESN 0x20 /* 1: msg has ESN field */ +#define SPU2_HASH_KEY_LEN 0xFF00 /* len of hash key in bytes. + * HMAC only. + */ +#define SPU2_HASH_KEY_LEN_SHIFT 8 +#define SPU2_CIPH_KEY_LEN 0xFF00000 /* len of cipher key in bytes */ +#define SPU2_CIPH_KEY_LEN_SHIFT 20 +#define SPU2_GENIV 0x10000000 /* 1: hw generates IV */ +#define SPU2_HASH_IV 0x20000000 /* 1: IV incl in hash */ +#define SPU2_RET_IV 0x40000000 /* 1: return IV in output msg + * b4 payload + */ +#define SPU2_RET_IV_LEN 0xF00000000 /* length in bytes of IV returned. + * 0 = 16 bytes + */ +#define SPU2_RET_IV_LEN_SHIFT 32 +#define SPU2_IV_OFFSET 0xF000000000 /* gen IV offset */ +#define SPU2_IV_OFFSET_SHIFT 36 +#define SPU2_IV_LEN 0x1F0000000000 /* length of input IV in bytes */ +#define SPU2_IV_LEN_SHIFT 40 +#define SPU2_HASH_TAG_LEN 0x7F000000000000 /* hash tag length in bytes */ +#define SPU2_HASH_TAG_LEN_SHIFT 48 +#define SPU2_RETURN_MD 0x300000000000000 /* return metadata */ +#define SPU2_RETURN_MD_SHIFT 56 +#define SPU2_RETURN_FD 0x400000000000000 +#define SPU2_RETURN_AAD1 0x800000000000000 +#define SPU2_RETURN_NAAD 0x1000000000000000 +#define SPU2_RETURN_AAD2 0x2000000000000000 +#define SPU2_RETURN_PAY 0x4000000000000000 /* return payload */ + +/* FMD ctrl2 field masks */ +#define SPU2_AAD1_OFFSET 0xFFF /* byte offset of AAD1 field */ +#define SPU2_AAD1_LEN 0xFF000 /* length of AAD1 in bytes */ +#define SPU2_AAD1_LEN_SHIFT 12 +#define SPU2_AAD2_OFFSET 0xFFF00000 /* byte offset of AAD2 field */ +#define SPU2_AAD2_OFFSET_SHIFT 20 +#define SPU2_PL_OFFSET 0xFFFFFFFF00000000 /* payload offset from AAD2 */ +#define SPU2_PL_OFFSET_SHIFT 32 + +/* FMD ctrl3 field masks */ +#define SPU2_PL_LEN 0xFFFFFFFF /* payload length in bytes */ +#define SPU2_TLS_LEN 0xFFFF00000000 /* TLS encrypt: cipher len + * TLS decrypt: compressed len + */ +#define SPU2_TLS_LEN_SHIFT 32 + +/* + * Max value that can be represented in the Payload Length field of the + * ctrl3 word of FMD. + */ +#define SPU2_MAX_PAYLOAD SPU2_PL_LEN + +/* Error values returned in STATUS field of response messages */ +#define SPU2_INVALID_ICV 1 + +void spu2_dump_msg_hdr(u8 *buf, unsigned int buf_len); +u32 spu2_ctx_max_payload(enum spu_cipher_alg cipher_alg, + enum spu_cipher_mode cipher_mode, + unsigned int blocksize); +u32 spu2_payload_length(u8 *spu_hdr); +u16 spu2_response_hdr_len(u16 auth_key_len, u16 enc_key_len, bool is_hash); +u16 spu2_hash_pad_len(enum hash_alg hash_alg, enum hash_mode hash_mode, + u32 chunksize, u16 hash_block_size); +u32 spu2_gcm_ccm_pad_len(enum spu_cipher_mode cipher_mode, + unsigned int data_size); +u32 spu2_assoc_resp_len(enum spu_cipher_mode cipher_mode, + unsigned int assoc_len, unsigned int iv_len, + bool is_encrypt); +u8 spu2_aead_ivlen(enum spu_cipher_mode cipher_mode, + u16 iv_len); +enum hash_type spu2_hash_type(u32 src_sent); +u32 spu2_digest_size(u32 alg_digest_size, enum hash_alg alg, + enum hash_type htype); +u32 spu2_create_request(u8 *spu_hdr, + struct spu_request_opts *req_opts, + struct spu_cipher_parms *cipher_parms, + struct spu_hash_parms *hash_parms, + struct spu_aead_parms *aead_parms, + unsigned int data_size); +u16 spu2_cipher_req_init(u8 *spu_hdr, struct spu_cipher_parms *cipher_parms); +void spu2_cipher_req_finish(u8 *spu_hdr, + u16 spu_req_hdr_len, + unsigned int is_inbound, + struct spu_cipher_parms *cipher_parms, + bool update_key, + unsigned int data_size); +void spu2_request_pad(u8 *pad_start, u32 gcm_padding, u32 hash_pad_len, + enum hash_alg auth_alg, enum hash_mode auth_mode, + unsigned int total_sent, u32 status_padding); +u8 spu2_xts_tweak_in_payload(void); +u8 spu2_tx_status_len(void); +u8 spu2_rx_status_len(void); +int spu2_status_process(u8 *statp); +void spu2_ccm_update_iv(unsigned int digestsize, + struct spu_cipher_parms *cipher_parms, + unsigned int assoclen, unsigned int chunksize, + bool is_encrypt, bool is_esp); +u32 spu2_wordalign_padlen(u32 data_size); +#endif diff --git a/drivers/crypto/bcm/spum.h b/drivers/crypto/bcm/spum.h new file mode 100644 index 000000000000..d0a5b5828638 --- /dev/null +++ b/drivers/crypto/bcm/spum.h @@ -0,0 +1,174 @@ +/* + * Copyright 2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation (the "GPL"). + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 (GPLv2) for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 (GPLv2) along with this source code. + */ + +/* + * This file contains SPU message definitions specific to SPU-M. + */ + +#ifndef _SPUM_H_ +#define _SPUM_H_ + +#define SPU_CRYPTO_OPERATION_GENERIC 0x1 + +/* Length of STATUS field in tx and rx packets */ +#define SPU_TX_STATUS_LEN 4 + +/* SPU-M error codes */ +#define SPU_STATUS_MASK 0x0000FF00 +#define SPU_STATUS_SUCCESS 0x00000000 +#define SPU_STATUS_INVALID_ICV 0x00000100 + +#define SPU_STATUS_ERROR_FLAG 0x00020000 + +/* Request message. MH + EMH + BDESC + BD header */ +#define SPU_REQ_FIXED_LEN 24 + +/* + * Max length of a SPU message header. Used to allocate a buffer where + * the SPU message header is constructed. Can be used for either a SPU-M + * header or a SPU2 header. + * For SPU-M, sum of the following: + * MH - 4 bytes + * EMH - 4 + * SCTX - 3 + + * max auth key len - 64 + * max cipher key len - 264 (RC4) + * max IV len - 16 + * BDESC - 12 + * BD header - 4 + * Total: 371 + * + * For SPU2, FMD_SIZE (32) plus lengths of hash and cipher keys, + * hash and cipher IVs. If SPU2 does not support RC4, then + */ +#define SPU_HEADER_ALLOC_LEN (SPU_REQ_FIXED_LEN + MAX_KEY_SIZE + \ + MAX_KEY_SIZE + MAX_IV_SIZE) + +/* + * Response message header length. Normally MH, EMH, BD header, but when + * BD_SUPPRESS is used for hash requests, there is no BD header. + */ +#define SPU_RESP_HDR_LEN 12 +#define SPU_HASH_RESP_HDR_LEN 8 + +/* + * Max value that can be represented in the Payload Length field of the BD + * header. This is a 16-bit field. + */ +#define SPUM_NS2_MAX_PAYLOAD (BIT(16) - 1) + +/* + * NSP SPU is limited to ~9KB because of FA2 FIFO size limitations; + * Set MAX_PAYLOAD to 8k to allow for addition of header, digest, etc. + * and stay within limitation. + */ + +#define SPUM_NSP_MAX_PAYLOAD 8192 + +/* Buffer Descriptor Header [BDESC]. SPU in big-endian mode. */ +struct BDESC_HEADER { + u16 offset_mac; /* word 0 [31-16] */ + u16 length_mac; /* word 0 [15-0] */ + u16 offset_crypto; /* word 1 [31-16] */ + u16 length_crypto; /* word 1 [15-0] */ + u16 offset_icv; /* word 2 [31-16] */ + u16 offset_iv; /* word 2 [15-0] */ +}; + +/* Buffer Data Header [BD]. SPU in big-endian mode. */ +struct BD_HEADER { + u16 size; + u16 prev_length; +}; + +/* Command Context Header. SPU-M in big endian mode. */ +struct MHEADER { + u8 flags; /* [31:24] */ + u8 op_code; /* [23:16] */ + u16 reserved; /* [15:0] */ +}; + +/* MH header flags bits */ +#define MH_SUPDT_PRES BIT(0) +#define MH_HASH_PRES BIT(2) +#define MH_BD_PRES BIT(3) +#define MH_MFM_PRES BIT(4) +#define MH_BDESC_PRES BIT(5) +#define MH_SCTX_PRES BIT(7) + +/* SCTX word 0 bit offsets and fields masks */ +#define SCTX_SIZE 0x000000FF + +/* SCTX word 1 bit shifts and field masks */ +#define UPDT_OFST 0x000000FF /* offset of SCTX updateable fld */ +#define HASH_TYPE 0x00000300 /* hash alg operation type */ +#define HASH_TYPE_SHIFT 8 +#define HASH_MODE 0x00001C00 /* one of spu2_hash_mode */ +#define HASH_MODE_SHIFT 10 +#define HASH_ALG 0x0000E000 /* hash algorithm */ +#define HASH_ALG_SHIFT 13 +#define CIPHER_TYPE 0x00030000 /* encryption operation type */ +#define CIPHER_TYPE_SHIFT 16 +#define CIPHER_MODE 0x001C0000 /* encryption mode */ +#define CIPHER_MODE_SHIFT 18 +#define CIPHER_ALG 0x00E00000 /* encryption algo */ +#define CIPHER_ALG_SHIFT 21 +#define ICV_IS_512 BIT(27) +#define ICV_IS_512_SHIFT 27 +#define CIPHER_ORDER BIT(30) +#define CIPHER_ORDER_SHIFT 30 +#define CIPHER_INBOUND BIT(31) +#define CIPHER_INBOUND_SHIFT 31 + +/* SCTX word 2 bit shifts and field masks */ +#define EXP_IV_SIZE 0x7 +#define IV_OFFSET BIT(3) +#define IV_OFFSET_SHIFT 3 +#define GEN_IV BIT(5) +#define GEN_IV_SHIFT 5 +#define EXPLICIT_IV BIT(6) +#define EXPLICIT_IV_SHIFT 6 +#define SCTX_IV BIT(7) +#define SCTX_IV_SHIFT 7 +#define ICV_SIZE 0x0F00 +#define ICV_SIZE_SHIFT 8 +#define CHECK_ICV BIT(12) +#define CHECK_ICV_SHIFT 12 +#define INSERT_ICV BIT(13) +#define INSERT_ICV_SHIFT 13 +#define BD_SUPPRESS BIT(19) +#define BD_SUPPRESS_SHIFT 19 + +/* Generic Mode Security Context Structure [SCTX] */ +struct SCTX { +/* word 0: protocol flags */ + u32 proto_flags; + +/* word 1: cipher flags */ + u32 cipher_flags; + +/* word 2: Extended cipher flags */ + u32 ecf; + +}; + +struct SPUHEADER { + struct MHEADER mh; + u32 emh; + struct SCTX sa; +}; + +#endif /* _SPUM_H_ */ diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c new file mode 100644 index 000000000000..0502f460dacd --- /dev/null +++ b/drivers/crypto/bcm/util.c @@ -0,0 +1,581 @@ +/* + * Copyright 2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation (the "GPL"). + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 (GPLv2) for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 (GPLv2) along with this source code. + */ + +#include + +#include "cipher.h" +#include "util.h" + +/* offset of SPU_OFIFO_CTRL register */ +#define SPU_OFIFO_CTRL 0x40 +#define SPU_FIFO_WATERMARK 0x1FF + +/** + * spu_sg_at_offset() - Find the scatterlist entry at a given distance from the + * start of a scatterlist. + * @sg: [in] Start of a scatterlist + * @skip: [in] Distance from the start of the scatterlist, in bytes + * @sge: [out] Scatterlist entry at skip bytes from start + * @sge_offset: [out] Number of bytes from start of sge buffer to get to + * requested distance. + * + * Return: 0 if entry found at requested distance + * < 0 otherwise + */ +int spu_sg_at_offset(struct scatterlist *sg, unsigned int skip, + struct scatterlist **sge, unsigned int *sge_offset) +{ + /* byte index from start of sg to the end of the previous entry */ + unsigned int index = 0; + /* byte index from start of sg to the end of the current entry */ + unsigned int next_index; + + next_index = sg->length; + while (next_index <= skip) { + sg = sg_next(sg); + index = next_index; + if (!sg) + return -EINVAL; + next_index += sg->length; + } + + *sge_offset = skip - index; + *sge = sg; + return 0; +} + +/* Copy len bytes of sg data, starting at offset skip, to a dest buffer */ +void sg_copy_part_to_buf(struct scatterlist *src, u8 *dest, + unsigned int len, unsigned int skip) +{ + size_t copied; + unsigned int nents = sg_nents(src); + + copied = sg_pcopy_to_buffer(src, nents, dest, len, skip); + if (copied != len) { + flow_log("%s copied %u bytes of %u requested. ", + __func__, (u32)copied, len); + flow_log("sg with %u entries and skip %u\n", nents, skip); + } +} + +/* + * Copy data into a scatterlist starting at a specified offset in the + * scatterlist. Specifically, copy len bytes of data in the buffer src + * into the scatterlist dest, starting skip bytes into the scatterlist. + */ +void sg_copy_part_from_buf(struct scatterlist *dest, u8 *src, + unsigned int len, unsigned int skip) +{ + size_t copied; + unsigned int nents = sg_nents(dest); + + copied = sg_pcopy_from_buffer(dest, nents, src, len, skip); + if (copied != len) { + flow_log("%s copied %u bytes of %u requested. ", + __func__, (u32)copied, len); + flow_log("sg with %u entries and skip %u\n", nents, skip); + } +} + +/** + * spu_sg_count() - Determine number of elements in scatterlist to provide a + * specified number of bytes. + * @sg_list: scatterlist to examine + * @skip: index of starting point + * @nbytes: consider elements of scatterlist until reaching this number of + * bytes + * + * Return: the number of sg entries contributing to nbytes of data + */ +int spu_sg_count(struct scatterlist *sg_list, unsigned int skip, int nbytes) +{ + struct scatterlist *sg; + int sg_nents = 0; + unsigned int offset; + + if (!sg_list) + return 0; + + if (spu_sg_at_offset(sg_list, skip, &sg, &offset) < 0) + return 0; + + while (sg && (nbytes > 0)) { + sg_nents++; + nbytes -= (sg->length - offset); + offset = 0; + sg = sg_next(sg); + } + return sg_nents; +} + +/** + * spu_msg_sg_add() - Copy scatterlist entries from one sg to another, up to a + * given length. + * @to_sg: scatterlist to copy to + * @from_sg: scatterlist to copy from + * @from_skip: number of bytes to skip in from_sg. Non-zero when previous + * request included part of the buffer in entry in from_sg. + * Assumes from_skip < from_sg->length. + * @from_nents number of entries in from_sg + * @length number of bytes to copy. may reach this limit before exhausting + * from_sg. + * + * Copies the entries themselves, not the data in the entries. Assumes to_sg has + * enough entries. Does not limit the size of an individual buffer in to_sg. + * + * to_sg, from_sg, skip are all updated to end of copy + * + * Return: Number of bytes copied + */ +u32 spu_msg_sg_add(struct scatterlist **to_sg, + struct scatterlist **from_sg, u32 *from_skip, + u8 from_nents, u32 length) +{ + struct scatterlist *sg; /* an entry in from_sg */ + struct scatterlist *to = *to_sg; + struct scatterlist *from = *from_sg; + u32 skip = *from_skip; + u32 offset; + int i; + u32 entry_len = 0; + u32 frag_len = 0; /* length of entry added to to_sg */ + u32 copied = 0; /* number of bytes copied so far */ + + if (length == 0) + return 0; + + for_each_sg(from, sg, from_nents, i) { + /* number of bytes in this from entry not yet used */ + entry_len = sg->length - skip; + frag_len = min(entry_len, length - copied); + offset = sg->offset + skip; + if (frag_len) + sg_set_page(to++, sg_page(sg), frag_len, offset); + copied += frag_len; + if (copied == entry_len) { + /* used up all of from entry */ + skip = 0; /* start at beginning of next entry */ + } + if (copied == length) + break; + } + *to_sg = to; + *from_sg = sg; + if (frag_len < entry_len) + *from_skip = skip + frag_len; + else + *from_skip = 0; + + return copied; +} + +void add_to_ctr(u8 *ctr_pos, unsigned int increment) +{ + __be64 *high_be = (__be64 *)ctr_pos; + __be64 *low_be = high_be + 1; + u64 orig_low = __be64_to_cpu(*low_be); + u64 new_low = orig_low + (u64)increment; + + *low_be = __cpu_to_be64(new_low); + if (new_low < orig_low) + /* there was a carry from the low 8 bytes */ + *high_be = __cpu_to_be64(__be64_to_cpu(*high_be) + 1); +} + +struct sdesc { + struct shash_desc shash; + char ctx[]; +}; + +/* do a synchronous decrypt operation */ +int do_decrypt(char *alg_name, + void *key_ptr, unsigned int key_len, + void *iv_ptr, void *src_ptr, void *dst_ptr, + unsigned int block_len) +{ + struct scatterlist sg_in[1], sg_out[1]; + struct crypto_blkcipher *tfm = + crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); + struct blkcipher_desc desc = {.tfm = tfm, .flags = 0 }; + int ret = 0; + void *iv; + int ivsize; + + flow_log("%s() name:%s block_len:%u\n", __func__, alg_name, block_len); + + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + crypto_blkcipher_setkey((void *)tfm, key_ptr, key_len); + + sg_init_table(sg_in, 1); + sg_set_buf(sg_in, src_ptr, block_len); + + sg_init_table(sg_out, 1); + sg_set_buf(sg_out, dst_ptr, block_len); + + iv = crypto_blkcipher_crt(tfm)->iv; + ivsize = crypto_blkcipher_ivsize(tfm); + memcpy(iv, iv_ptr, ivsize); + + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, block_len); + crypto_free_blkcipher(tfm); + + if (ret < 0) + pr_err("aes_decrypt failed %d\n", ret); + + return ret; +} + +/** + * do_shash() - Do a synchronous hash operation in software + * @name: The name of the hash algorithm + * @result: Buffer where digest is to be written + * @data1: First part of data to hash. May be NULL. + * @data1_len: Length of data1, in bytes + * @data2: Second part of data to hash. May be NULL. + * @data2_len: Length of data2, in bytes + * @key: Key (if keyed hash) + * @key_len: Length of key, in bytes (or 0 if non-keyed hash) + * + * Note that the crypto API will not select this driver's own transform because + * this driver only registers asynchronous algos. + * + * Return: 0 if hash successfully stored in result + * < 0 otherwise + */ +int do_shash(unsigned char *name, unsigned char *result, + const u8 *data1, unsigned int data1_len, + const u8 *data2, unsigned int data2_len, + const u8 *key, unsigned int key_len) +{ + int rc; + unsigned int size; + struct crypto_shash *hash; + struct sdesc *sdesc; + + hash = crypto_alloc_shash(name, 0, 0); + if (IS_ERR(hash)) { + rc = PTR_ERR(hash); + pr_err("%s: Crypto %s allocation error %d", __func__, name, rc); + return rc; + } + + size = sizeof(struct shash_desc) + crypto_shash_descsize(hash); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) { + rc = -ENOMEM; + pr_err("%s: Memory allocation failure", __func__); + goto do_shash_err; + } + sdesc->shash.tfm = hash; + sdesc->shash.flags = 0x0; + + if (key_len > 0) { + rc = crypto_shash_setkey(hash, key, key_len); + if (rc) { + pr_err("%s: Could not setkey %s shash", __func__, name); + goto do_shash_err; + } + } + + rc = crypto_shash_init(&sdesc->shash); + if (rc) { + pr_err("%s: Could not init %s shash", __func__, name); + goto do_shash_err; + } + rc = crypto_shash_update(&sdesc->shash, data1, data1_len); + if (rc) { + pr_err("%s: Could not update1", __func__); + goto do_shash_err; + } + if (data2 && data2_len) { + rc = crypto_shash_update(&sdesc->shash, data2, data2_len); + if (rc) { + pr_err("%s: Could not update2", __func__); + goto do_shash_err; + } + } + rc = crypto_shash_final(&sdesc->shash, result); + if (rc) + pr_err("%s: Could not genereate %s hash", __func__, name); + +do_shash_err: + crypto_free_shash(hash); + kfree(sdesc); + + return rc; +} + +/* Dump len bytes of a scatterlist starting at skip bytes into the sg */ +void __dump_sg(struct scatterlist *sg, unsigned int skip, unsigned int len) +{ + u8 dbuf[16]; + unsigned int idx = skip; + unsigned int num_out = 0; /* number of bytes dumped so far */ + unsigned int count; + + if (packet_debug_logging) { + while (num_out < len) { + count = (len - num_out > 16) ? 16 : len - num_out; + sg_copy_part_to_buf(sg, dbuf, count, idx); + num_out += count; + print_hex_dump(KERN_ALERT, " sg: ", DUMP_PREFIX_NONE, + 4, 1, dbuf, count, false); + idx += 16; + } + } + if (debug_logging_sleep) + msleep(debug_logging_sleep); +} + +/* Returns the name for a given cipher alg/mode */ +char *spu_alg_name(enum spu_cipher_alg alg, enum spu_cipher_mode mode) +{ + switch (alg) { + case CIPHER_ALG_RC4: + return "rc4"; + case CIPHER_ALG_AES: + switch (mode) { + case CIPHER_MODE_CBC: + return "cbc(aes)"; + case CIPHER_MODE_ECB: + return "ecb(aes)"; + case CIPHER_MODE_OFB: + return "ofb(aes)"; + case CIPHER_MODE_CFB: + return "cfb(aes)"; + case CIPHER_MODE_CTR: + return "ctr(aes)"; + case CIPHER_MODE_XTS: + return "xts(aes)"; + case CIPHER_MODE_GCM: + return "gcm(aes)"; + default: + return "aes"; + } + break; + case CIPHER_ALG_DES: + switch (mode) { + case CIPHER_MODE_CBC: + return "cbc(des)"; + case CIPHER_MODE_ECB: + return "ecb(des)"; + case CIPHER_MODE_CTR: + return "ctr(des)"; + default: + return "des"; + } + break; + case CIPHER_ALG_3DES: + switch (mode) { + case CIPHER_MODE_CBC: + return "cbc(des3_ede)"; + case CIPHER_MODE_ECB: + return "ecb(des3_ede)"; + case CIPHER_MODE_CTR: + return "ctr(des3_ede)"; + default: + return "3des"; + } + break; + default: + return "other"; + } +} + +static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct device_private *ipriv; + char *buf; + ssize_t ret, out_offset, out_count; + int i; + u32 fifo_len; + u32 spu_ofifo_ctrl; + u32 alg; + u32 mode; + u32 op_cnt; + + out_count = 2048; + + buf = kmalloc(out_count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ipriv = filp->private_data; + out_offset = 0; + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Number of SPUs.........%u\n", + ipriv->spu.num_spu); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Current sessions.......%u\n", + atomic_read(&ipriv->session_count)); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Session count..........%u\n", + atomic_read(&ipriv->stream_count)); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Cipher setkey..........%u\n", + atomic_read(&ipriv->setkey_cnt[SPU_OP_CIPHER])); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Cipher Ops.............%u\n", + atomic_read(&ipriv->op_counts[SPU_OP_CIPHER])); + for (alg = 0; alg < CIPHER_ALG_LAST; alg++) { + for (mode = 0; mode < CIPHER_MODE_LAST; mode++) { + op_cnt = atomic_read(&ipriv->cipher_cnt[alg][mode]); + if (op_cnt) { + out_offset += snprintf(buf + out_offset, + out_count - out_offset, + " %-13s%11u\n", + spu_alg_name(alg, mode), op_cnt); + } + } + } + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Hash Ops...............%u\n", + atomic_read(&ipriv->op_counts[SPU_OP_HASH])); + for (alg = 0; alg < HASH_ALG_LAST; alg++) { + op_cnt = atomic_read(&ipriv->hash_cnt[alg]); + if (op_cnt) { + out_offset += snprintf(buf + out_offset, + out_count - out_offset, + " %-13s%11u\n", + hash_alg_name[alg], op_cnt); + } + } + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "HMAC setkey............%u\n", + atomic_read(&ipriv->setkey_cnt[SPU_OP_HMAC])); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "HMAC Ops...............%u\n", + atomic_read(&ipriv->op_counts[SPU_OP_HMAC])); + for (alg = 0; alg < HASH_ALG_LAST; alg++) { + op_cnt = atomic_read(&ipriv->hmac_cnt[alg]); + if (op_cnt) { + out_offset += snprintf(buf + out_offset, + out_count - out_offset, + " %-13s%11u\n", + hash_alg_name[alg], op_cnt); + } + } + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "AEAD setkey............%u\n", + atomic_read(&ipriv->setkey_cnt[SPU_OP_AEAD])); + + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "AEAD Ops...............%u\n", + atomic_read(&ipriv->op_counts[SPU_OP_AEAD])); + for (alg = 0; alg < AEAD_TYPE_LAST; alg++) { + op_cnt = atomic_read(&ipriv->aead_cnt[alg]); + if (op_cnt) { + out_offset += snprintf(buf + out_offset, + out_count - out_offset, + " %-13s%11u\n", + aead_alg_name[alg], op_cnt); + } + } + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Bytes of req data......%llu\n", + (u64)atomic64_read(&ipriv->bytes_out)); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Bytes of resp data.....%llu\n", + (u64)atomic64_read(&ipriv->bytes_in)); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Mailbox full...........%u\n", + atomic_read(&ipriv->mb_no_spc)); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Mailbox send failures..%u\n", + atomic_read(&ipriv->mb_send_fail)); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "Check ICV errors.......%u\n", + atomic_read(&ipriv->bad_icv)); + if (ipriv->spu.spu_type == SPU_TYPE_SPUM) + for (i = 0; i < ipriv->spu.num_spu; i++) { + spu_ofifo_ctrl = ioread32(ipriv->spu.reg_vbase[i] + + SPU_OFIFO_CTRL); + fifo_len = spu_ofifo_ctrl & SPU_FIFO_WATERMARK; + out_offset += snprintf(buf + out_offset, + out_count - out_offset, + "SPU %d output FIFO high water.....%u\n", + i, fifo_len); + } + + if (out_offset > out_count) + out_offset = out_count; + + ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); + kfree(buf); + return ret; +} + +static const struct file_operations spu_debugfs_stats = { + .owner = THIS_MODULE, + .open = simple_open, + .read = spu_debugfs_read, +}; + +/* + * Create the debug FS directories. If the top-level directory has not yet + * been created, create it now. Create a stats file in this directory for + * a SPU. + */ +void spu_setup_debugfs(void) +{ + if (!debugfs_initialized()) + return; + + if (!iproc_priv.debugfs_dir) + iproc_priv.debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, + NULL); + + if (!iproc_priv.debugfs_stats) + /* Create file with permissions S_IRUSR */ + debugfs_create_file("stats", 0400, iproc_priv.debugfs_dir, + &iproc_priv, &spu_debugfs_stats); +} + +void spu_free_debugfs(void) +{ + debugfs_remove_recursive(iproc_priv.debugfs_dir); + iproc_priv.debugfs_dir = NULL; +} + +/** + * format_value_ccm() - Format a value into a buffer, using a specified number + * of bytes (i.e. maybe writing value X into a 4 byte + * buffer, or maybe into a 12 byte buffer), as per the + * SPU CCM spec. + * + * @val: value to write (up to max of unsigned int) + * @buf: (pointer to) buffer to write the value + * @len: number of bytes to use (0 to 255) + * + */ +void format_value_ccm(unsigned int val, u8 *buf, u8 len) +{ + int i; + + /* First clear full output buffer */ + memset(buf, 0, len); + + /* Then, starting from right side, fill in with data */ + for (i = 0; i < len; i++) { + buf[len - i - 1] = (val >> (8 * i)) & 0xff; + if (i >= 3) + break; /* Only handle up to 32 bits of 'val' */ + } +} diff --git a/drivers/crypto/bcm/util.h b/drivers/crypto/bcm/util.h new file mode 100644 index 000000000000..712e029795f8 --- /dev/null +++ b/drivers/crypto/bcm/util.h @@ -0,0 +1,116 @@ +/* + * Copyright 2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation (the "GPL"). + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 (GPLv2) for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 (GPLv2) along with this source code. + */ + +#ifndef _UTIL_H +#define _UTIL_H + +#include +#include + +#include "spu.h" + +extern int flow_debug_logging; +extern int packet_debug_logging; +extern int debug_logging_sleep; + +#ifdef DEBUG +#define flow_log(...) \ + do { \ + if (flow_debug_logging) { \ + printk(__VA_ARGS__); \ + if (debug_logging_sleep) \ + msleep(debug_logging_sleep); \ + } \ + } while (0) +#define flow_dump(msg, var, var_len) \ + do { \ + if (flow_debug_logging) { \ + print_hex_dump(KERN_ALERT, msg, DUMP_PREFIX_NONE, \ + 16, 1, var, var_len, false); \ + if (debug_logging_sleep) \ + msleep(debug_logging_sleep); \ + } \ + } while (0) + +#define packet_log(...) \ + do { \ + if (packet_debug_logging) { \ + printk(__VA_ARGS__); \ + if (debug_logging_sleep) \ + msleep(debug_logging_sleep); \ + } \ + } while (0) +#define packet_dump(msg, var, var_len) \ + do { \ + if (packet_debug_logging) { \ + print_hex_dump(KERN_ALERT, msg, DUMP_PREFIX_NONE, \ + 16, 1, var, var_len, false); \ + if (debug_logging_sleep) \ + msleep(debug_logging_sleep); \ + } \ + } while (0) + +void __dump_sg(struct scatterlist *sg, unsigned int skip, unsigned int len); + +#define dump_sg(sg, skip, len) __dump_sg(sg, skip, len) + +#else /* !DEBUG_ON */ + +#define flow_log(...) do {} while (0) +#define flow_dump(msg, var, var_len) do {} while (0) +#define packet_log(...) do {} while (0) +#define packet_dump(msg, var, var_len) do {} while (0) + +#define dump_sg(sg, skip, len) do {} while (0) + +#endif /* DEBUG_ON */ + +int spu_sg_at_offset(struct scatterlist *sg, unsigned int skip, + struct scatterlist **sge, unsigned int *sge_offset); + +/* Copy sg data, from skip, length len, to dest */ +void sg_copy_part_to_buf(struct scatterlist *src, u8 *dest, + unsigned int len, unsigned int skip); +/* Copy src into scatterlist from offset, length len */ +void sg_copy_part_from_buf(struct scatterlist *dest, u8 *src, + unsigned int len, unsigned int skip); + +int spu_sg_count(struct scatterlist *sg_list, unsigned int skip, int nbytes); +u32 spu_msg_sg_add(struct scatterlist **to_sg, + struct scatterlist **from_sg, u32 *skip, + u8 from_nents, u32 tot_len); + +void add_to_ctr(u8 *ctr_pos, unsigned int increment); + +/* do a synchronous decrypt operation */ +int do_decrypt(char *alg_name, + void *key_ptr, unsigned int key_len, + void *iv_ptr, void *src_ptr, void *dst_ptr, + unsigned int block_len); + +/* produce a message digest from data of length n bytes */ +int do_shash(unsigned char *name, unsigned char *result, + const u8 *data1, unsigned int data1_len, + const u8 *data2, unsigned int data2_len, + const u8 *key, unsigned int key_len); + +char *spu_alg_name(enum spu_cipher_alg alg, enum spu_cipher_mode mode); + +void spu_setup_debugfs(void); +void spu_free_debugfs(void); +void format_value_ccm(unsigned int val, u8 *buf, u8 len); + +#endif From f7cc02b3c3a33a10dd5bb9e5dfd22e47e09503a2 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Wed, 8 Feb 2017 13:07:06 -0600 Subject: [PATCH 114/142] crypto: ccp - Set the AES size field for all modes Ensure that the size field is correctly populated for all AES modes. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev-v5.c | 3 +-- drivers/crypto/ccp/ccp-dev.h | 1 + drivers/crypto/ccp/ccp-ops.c | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index 612898b4aaad..9c6ff8b813d9 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -284,8 +284,7 @@ static int ccp5_perform_aes(struct ccp_op *op) CCP_AES_ENCRYPT(&function) = op->u.aes.action; CCP_AES_MODE(&function) = op->u.aes.mode; CCP_AES_TYPE(&function) = op->u.aes.type; - if (op->u.aes.mode == CCP_AES_MODE_CFB) - CCP_AES_SIZE(&function) = 0x7f; + CCP_AES_SIZE(&function) = op->u.aes.size; CCP5_CMD_FUNCTION(&desc) = function.raw; diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index 649e5610a5ce..2b5c01fade05 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -467,6 +467,7 @@ struct ccp_aes_op { enum ccp_aes_type type; enum ccp_aes_mode mode; enum ccp_aes_action action; + unsigned int size; }; struct ccp_xts_aes_op { diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 50fae4442801..6878160234b5 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -692,6 +692,14 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) goto e_ctx; } } + switch (aes->mode) { + case CCP_AES_MODE_CFB: /* CFB128 only */ + case CCP_AES_MODE_CTR: + op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1; + break; + default: + op.u.aes.size = 0; + } /* Prepare the input and output data workareas. For in-place * operations we need to set the dma direction to BIDIRECTIONAL From dd3f9f40b58168f91f27ab686c7bae1f35edd3d4 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 9 Feb 2017 17:51:20 +0100 Subject: [PATCH 115/142] crypto: atmel-sha - fix missing "return" instructions This patch fixes a previous patch: "crypto: atmel-sha - update request queue management to make it more generic". Indeed the patch above should have replaced the "return -EINVAL;" lines by "return atmel_sha_complete(dd, -EINVAL);" but instead replaced them by a simple call of "atmel_sha_complete(dd, -EINVAL);". Hence all "return" instructions were missing. Reported-by: Dan Carpenter Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 50a1dcd50c46..bc033178d0e7 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -668,7 +668,7 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); } if (!in_desc) - atmel_sha_complete(dd, -EINVAL); + return atmel_sha_complete(dd, -EINVAL); in_desc->callback = atmel_sha_dma_callback; in_desc->callback_param = dd; @@ -725,7 +725,7 @@ static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd, if (dma_mapping_error(dd->dev, ctx->dma_addr)) { dev_err(dd->dev, "dma %zu bytes error\n", ctx->buflen + ctx->block_size); - atmel_sha_complete(dd, -EINVAL); + return atmel_sha_complete(dd, -EINVAL); } ctx->flags &= ~SHA_FLAGS_SG; @@ -816,7 +816,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) if (dma_mapping_error(dd->dev, ctx->dma_addr)) { dev_err(dd->dev, "dma %zu bytes error\n", ctx->buflen + ctx->block_size); - atmel_sha_complete(dd, -EINVAL); + return atmel_sha_complete(dd, -EINVAL); } if (length == 0) { @@ -830,7 +830,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) { dev_err(dd->dev, "dma_map_sg error\n"); - atmel_sha_complete(dd, -EINVAL); + return atmel_sha_complete(dd, -EINVAL); } ctx->flags |= SHA_FLAGS_SG; @@ -844,7 +844,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) { dev_err(dd->dev, "dma_map_sg error\n"); - atmel_sha_complete(dd, -EINVAL); + return atmel_sha_complete(dd, -EINVAL); } ctx->flags |= SHA_FLAGS_SG; From 19998acb0ff67cb8843668f3b94bdbe6018fa7d8 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 9 Feb 2017 17:51:21 +0100 Subject: [PATCH 116/142] crypto: atmel-sha - fix error management in atmel_sha_start() This patch clarifies and fixes how errors should be handled by atmel_sha_start(). For update operations, the previous code wrongly assumed that (err != -EINPROGRESS) implies (err == 0). It's wrong because that doesn't take the error cases (err < 0) into account. This patch also adds many comments to detail all the possible returned values and what should be done in each case. Especially, when an error occurs, since atmel_sha_complete() has already been called, hence releasing the hardware, atmel_sha_start() must not call atmel_sha_finish_req() later otherwise atmel_sha_complete() would be called a second time. Signed-off-by: Cyrille Pitchen Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index bc033178d0e7..a9482023d7d3 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -1106,22 +1106,39 @@ static int atmel_sha_start(struct atmel_sha_dev *dd) ctx->op, req->nbytes); err = atmel_sha_hw_init(dd); - if (err) - goto err1; + return atmel_sha_complete(dd, err); + + /* + * atmel_sha_update_req() and atmel_sha_final_req() can return either: + * -EINPROGRESS: the hardware is busy and the SHA driver will resume + * its job later in the done_task. + * This is the main path. + * + * 0: the SHA driver can continue its job then release the hardware + * later, if needed, with atmel_sha_finish_req(). + * This is the alternate path. + * + * < 0: an error has occurred so atmel_sha_complete(dd, err) has already + * been called, hence the hardware has been released. + * The SHA driver must stop its job without calling + * atmel_sha_finish_req(), otherwise atmel_sha_complete() would be + * called a second time. + * + * Please note that currently, atmel_sha_final_req() never returns 0. + */ dd->resume = atmel_sha_done; if (ctx->op == SHA_OP_UPDATE) { err = atmel_sha_update_req(dd); - if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) + if (!err && (ctx->flags & SHA_FLAGS_FINUP)) /* no final() after finup() */ err = atmel_sha_final_req(dd); } else if (ctx->op == SHA_OP_FINAL) { err = atmel_sha_final_req(dd); } -err1: - if (err != -EINPROGRESS) + if (!err) /* done_task will not finish it, so do it here */ atmel_sha_finish_req(req, err); From a60496a0ca0d34a3ae92e426138eab35f0f45612 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Thu, 9 Feb 2017 15:49:48 -0600 Subject: [PATCH 117/142] crypto: ccp - Change mode for detailed CCP init messages The CCP initialization messages only need to be sent to syslog in debug mode. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev-v5.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index 9c6ff8b813d9..e0dfb6a05c48 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -531,7 +531,7 @@ static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status) status >>= LSB_REGION_WIDTH; } queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT); - dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n", + dev_dbg(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n", cmd_q->id, queues); return queues ? 0 : -EINVAL; @@ -573,7 +573,7 @@ static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp, */ cmd_q->lsb = bitno; bitmap_clear(lsb_pub, bitno, 1); - dev_info(ccp->dev, + dev_dbg(ccp->dev, "Queue %d gets LSB %d\n", i, bitno); break; @@ -731,7 +731,6 @@ static int ccp5_init(struct ccp_device *ccp) ret = -EIO; goto e_pool; } - dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); /* Turn off the queues and disable interrupts until ready */ for (i = 0; i < ccp->cmd_q_count; i++) { From 4cdf101ef444e47bc8869ef3e90396e828fd9b61 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Thu, 9 Feb 2017 15:49:57 -0600 Subject: [PATCH 118/142] crypto: ccp - Update the command queue on errors Move the command queue tail pointer when an error is detected. Always return the error. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev-v5.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index e0dfb6a05c48..41cc853f8569 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -250,17 +250,20 @@ static int ccp5_do_cmd(struct ccp5_desc *desc, ret = wait_event_interruptible(cmd_q->int_queue, cmd_q->int_rcvd); if (ret || cmd_q->cmd_error) { + /* Log the error and flush the queue by + * moving the head pointer + */ if (cmd_q->cmd_error) ccp_log_error(cmd_q->ccp, cmd_q->cmd_error); - /* A version 5 device doesn't use Job IDs... */ + iowrite32(tail, cmd_q->reg_head_lo); if (!ret) ret = -EIO; } cmd_q->int_rcvd = 0; } - return 0; + return ret; } static int ccp5_perform_aes(struct ccp_op *op) From 83d650ab78c7185da815e16d03fb579d3fde0140 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Thu, 9 Feb 2017 15:50:08 -0600 Subject: [PATCH 119/142] crypto: ccp - Simplify some buffer management routines The reverse-get/set functions can be simplified by eliminating unused code. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-ops.c | 136 ++++++++++++++--------------------- 1 file changed, 53 insertions(+), 83 deletions(-) diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 6878160234b5..f1396c3aedac 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -184,62 +184,46 @@ static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, } static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa, + unsigned int wa_offset, struct scatterlist *sg, - unsigned int len, unsigned int se_len, - bool sign_extend) + unsigned int sg_offset, + unsigned int len) { - unsigned int nbytes, sg_offset, dm_offset, sb_len, i; - u8 buffer[CCP_REVERSE_BUF_SIZE]; + u8 *p, *q; - if (WARN_ON(se_len > sizeof(buffer))) - return -EINVAL; + ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len); - sg_offset = len; - dm_offset = 0; - nbytes = len; - while (nbytes) { - sb_len = min_t(unsigned int, nbytes, se_len); - sg_offset -= sb_len; - - scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 0); - for (i = 0; i < sb_len; i++) - wa->address[dm_offset + i] = buffer[sb_len - i - 1]; - - dm_offset += sb_len; - nbytes -= sb_len; - - if ((sb_len != se_len) && sign_extend) { - /* Must sign-extend to nearest sign-extend length */ - if (wa->address[dm_offset - 1] & 0x80) - memset(wa->address + dm_offset, 0xff, - se_len - sb_len); - } + p = wa->address + wa_offset; + q = p + len - 1; + while (p < q) { + *p = *p ^ *q; + *q = *p ^ *q; + *p = *p ^ *q; + p++; + q--; } - return 0; } static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa, + unsigned int wa_offset, struct scatterlist *sg, + unsigned int sg_offset, unsigned int len) { - unsigned int nbytes, sg_offset, dm_offset, sb_len, i; - u8 buffer[CCP_REVERSE_BUF_SIZE]; + u8 *p, *q; - sg_offset = 0; - dm_offset = len; - nbytes = len; - while (nbytes) { - sb_len = min_t(unsigned int, nbytes, sizeof(buffer)); - dm_offset -= sb_len; - - for (i = 0; i < sb_len; i++) - buffer[sb_len - i - 1] = wa->address[dm_offset + i]; - scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 1); - - sg_offset += sb_len; - nbytes -= sb_len; + p = wa->address + wa_offset; + q = p + len - 1; + while (p < q) { + *p = *p ^ *q; + *q = *p ^ *q; + *p = *p ^ *q; + p++; + q--; } + + ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len); } static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q) @@ -1269,8 +1253,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) if (ret) goto e_sb; - ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, - CCP_SB_BYTES, false); + ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len); if (ret) goto e_exp; ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key, @@ -1288,16 +1271,12 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) if (ret) goto e_exp; - ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, - CCP_SB_BYTES, false); + ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len); if (ret) goto e_src; - src.address += o_len; /* Adjust the address for the copy operation */ - ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, - CCP_SB_BYTES, false); + ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len); if (ret) goto e_src; - src.address -= o_len; /* Reset the address to original value */ /* Prepare the output area for the operation */ ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len, @@ -1322,7 +1301,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) goto e_dst; } - ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len); + ccp_reverse_get_dm_area(&dst.dm_wa, 0, rsa->dst, 0, rsa->mod_len); e_dst: ccp_free_data(&dst, cmd_q); @@ -1574,25 +1553,22 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) save = src.address; /* Copy the ECC modulus */ - ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len, - CCP_ECC_OPERAND_SIZE, false); + ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len); if (ret) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; /* Copy the first operand */ - ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1, - ecc->u.mm.operand_1_len, - CCP_ECC_OPERAND_SIZE, false); + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0, + ecc->u.mm.operand_1_len); if (ret) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) { /* Copy the second operand */ - ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2, - ecc->u.mm.operand_2_len, - CCP_ECC_OPERAND_SIZE, false); + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0, + ecc->u.mm.operand_2_len); if (ret) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; @@ -1631,7 +1607,8 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) } /* Save the ECC result */ - ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES); + ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0, + CCP_ECC_MODULUS_BYTES); e_dst: ccp_dm_free(&dst); @@ -1699,22 +1676,19 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) save = src.address; /* Copy the ECC modulus */ - ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len, - CCP_ECC_OPERAND_SIZE, false); + ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len); if (ret) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; /* Copy the first point X and Y coordinate */ - ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x, - ecc->u.pm.point_1.x_len, - CCP_ECC_OPERAND_SIZE, false); + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0, + ecc->u.pm.point_1.x_len); if (ret) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; - ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y, - ecc->u.pm.point_1.y_len, - CCP_ECC_OPERAND_SIZE, false); + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0, + ecc->u.pm.point_1.y_len); if (ret) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; @@ -1725,15 +1699,13 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { /* Copy the second point X and Y coordinate */ - ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x, - ecc->u.pm.point_2.x_len, - CCP_ECC_OPERAND_SIZE, false); + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0, + ecc->u.pm.point_2.x_len); if (ret) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; - ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y, - ecc->u.pm.point_2.y_len, - CCP_ECC_OPERAND_SIZE, false); + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0, + ecc->u.pm.point_2.y_len); if (ret) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; @@ -1743,19 +1715,17 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) src.address += CCP_ECC_OPERAND_SIZE; } else { /* Copy the Domain "a" parameter */ - ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a, - ecc->u.pm.domain_a_len, - CCP_ECC_OPERAND_SIZE, false); + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0, + ecc->u.pm.domain_a_len); if (ret) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) { /* Copy the scalar value */ - ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar, - ecc->u.pm.scalar_len, - CCP_ECC_OPERAND_SIZE, - false); + ret = ccp_reverse_set_dm_area(&src, 0, + ecc->u.pm.scalar, 0, + ecc->u.pm.scalar_len); if (ret) goto e_src; src.address += CCP_ECC_OPERAND_SIZE; @@ -1800,10 +1770,10 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) save = dst.address; /* Save the ECC result X and Y coordinates */ - ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x, + ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0, CCP_ECC_MODULUS_BYTES); dst.address += CCP_ECC_OUTPUT_SIZE; - ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y, + ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0, CCP_ECC_MODULUS_BYTES); dst.address += CCP_ECC_OUTPUT_SIZE; From 78fd0fff7fbb55b352bd7058bf51caa46dd3a0f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:14 +0200 Subject: [PATCH 120/142] crypto: caam - don't include unneeded headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intern.h, jr.h are not needed in error.c error.h is not needed in ctrl.c Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 1 - drivers/crypto/caam/error.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 755109841cfd..8957ec952212 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -13,7 +13,6 @@ #include "intern.h" #include "jr.h" #include "desc_constr.h" -#include "error.h" #include "ctrl.h" bool caam_little_end; diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c index 79a0cc70717f..6f44ccb55c63 100644 --- a/drivers/crypto/caam/error.c +++ b/drivers/crypto/caam/error.c @@ -6,9 +6,7 @@ #include "compat.h" #include "regs.h" -#include "intern.h" #include "desc.h" -#include "jr.h" #include "error.h" static const struct { From b3b5fce7364b674b66c58adea1dd2a2f85b61dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:15 +0200 Subject: [PATCH 121/142] crypto: caam - check return code of dma_set_mask_and_coherent() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting the dma mask could fail, thus make sure it succeeds before going further. Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 15 ++++++++++----- drivers/crypto/caam/jr.c | 19 ++++++++++++++----- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 8957ec952212..f825e3765a4b 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -586,13 +586,18 @@ static int caam_probe(struct platform_device *pdev) JRSTART_JR1_START | JRSTART_JR2_START | JRSTART_JR3_START); - if (sizeof(dma_addr_t) == sizeof(u64)) + if (sizeof(dma_addr_t) == sizeof(u64)) { if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) - dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); else - dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36)); - else - dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36)); + } else { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + } + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); + goto iounmap_ctrl; + } /* * Detect and enable JobRs diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index c8604dfadbf5..27631000b9f8 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -498,13 +498,22 @@ static int caam_jr_probe(struct platform_device *pdev) jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl; - if (sizeof(dma_addr_t) == sizeof(u64)) + if (sizeof(dma_addr_t) == sizeof(u64)) { if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring")) - dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(40)); + error = dma_set_mask_and_coherent(jrdev, + DMA_BIT_MASK(40)); else - dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(36)); - else - dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(32)); + error = dma_set_mask_and_coherent(jrdev, + DMA_BIT_MASK(36)); + } else { + error = dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(32)); + } + if (error) { + dev_err(jrdev, "dma_set_mask_and_coherent failed (%d)\n", + error); + iounmap(ctrl); + return error; + } /* Identify the interrupt */ jrpriv->irq = irq_of_parse_and_map(nprop, 0); From 4d8348d8e3bfc3e37f3cbaaff3966a1fca8909ed Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 10 Feb 2017 14:07:16 +0200 Subject: [PATCH 122/142] crypto: caam - fix JR IO mapping if one fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If one of the JRs failed at init, the next JR used the failed JR's IO space. The patch fixes this bug. Signed-off-by: Tudor Ambarus Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index f825e3765a4b..579f8263c479 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -308,10 +308,8 @@ static int caam_remove(struct platform_device *pdev) ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; /* Remove platform devices for JobRs */ - for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) { - if (ctrlpriv->jrpdev[ring]) - of_device_unregister(ctrlpriv->jrpdev[ring]); - } + for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) + of_device_unregister(ctrlpriv->jrpdev[ring]); /* De-initialize RNG state handles initialized by this driver. */ if (ctrlpriv->rng4_sh_init) @@ -423,7 +421,7 @@ DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n"); /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { - int ret, ring, rspec, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; + int ret, ring, ridx, rspec, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; u64 caam_id; struct device *dev; struct device_node *nprop, *np; @@ -618,6 +616,7 @@ static int caam_probe(struct platform_device *pdev) } ring = 0; + ridx = 0; ctrlpriv->total_jobrs = 0; for_each_available_child_of_node(nprop, np) if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") || @@ -625,17 +624,19 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->jrpdev[ring] = of_platform_device_create(np, NULL, dev); if (!ctrlpriv->jrpdev[ring]) { - pr_warn("JR%d Platform device creation error\n", - ring); + pr_warn("JR physical index %d: Platform device creation error\n", + ridx); + ridx++; continue; } ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *) ((__force uint8_t *)ctrl + - (ring + JR_BLOCK_NUMBER) * + (ridx + JR_BLOCK_NUMBER) * BLOCK_OFFSET ); ctrlpriv->total_jobrs++; ring++; + ridx++; } /* Check to see if QI present. If so, enable */ From fd88aac93e4dc7810940e854be1c3dc5adb20120 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:17 +0200 Subject: [PATCH 123/142] crypto: caam - fix HW S/G in ablkcipher_giv_edesc_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HW S/G generation does not work properly when the following conditions are met: -src == dst -src/dst is S/G -IV is right before (contiguous with) the first src/dst S/G entry since "iv_contig" is set to true (iv_contig is a misnomer here and it actually refers to the whole output being contiguous) Fix this by setting dst S/G nents equal to src S/G nents, instead of leaving it set to init value (0). Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 662fe94cb2f8..05d4690351b9 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -1798,7 +1798,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - int src_nents, dst_nents = 0, sec4_sg_bytes; + int src_nents, dst_nents, sec4_sg_bytes; struct ablkcipher_edesc *edesc; dma_addr_t iv_dma = 0; bool iv_contig = false; @@ -1808,9 +1808,6 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( src_nents = sg_count(req->src, req->nbytes); - if (unlikely(req->dst != req->src)) - dst_nents = sg_count(req->dst, req->nbytes); - if (likely(req->src == req->dst)) { sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_BIDIRECTIONAL); @@ -1818,6 +1815,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( dev_err(jrdev, "unable to map source\n"); return ERR_PTR(-ENOMEM); } + + dst_nents = src_nents; } else { sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE); @@ -1826,6 +1825,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( return ERR_PTR(-ENOMEM); } + dst_nents = sg_count(req->dst, req->nbytes); sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1, DMA_FROM_DEVICE); if (unlikely(!sgc)) { From fd144d83cc42cfe6c82cba76bc0113dacd53a4d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:18 +0200 Subject: [PATCH 124/142] crypto: caam - check sg_count() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sg_count() internally calls sg_nents_for_len(), which could fail in case the required number of bytes is larger than the total bytes in the S/G. Thus, add checks to validate the input. Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 44 +++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 05d4690351b9..ed8a04412767 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -1335,13 +1335,31 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, if (unlikely(req->dst != req->src)) { src_nents = sg_count(req->src, req->assoclen + req->cryptlen); + if (unlikely(src_nents < 0)) { + dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n", + req->assoclen + req->cryptlen); + return ERR_PTR(src_nents); + } + dst_nents = sg_count(req->dst, req->assoclen + req->cryptlen + (encrypt ? authsize : (-authsize))); + if (unlikely(dst_nents < 0)) { + dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n", + req->assoclen + req->cryptlen + + (encrypt ? authsize : (-authsize))); + return ERR_PTR(dst_nents); + } } else { src_nents = sg_count(req->src, req->assoclen + req->cryptlen + (encrypt ? authsize : 0)); + if (unlikely(src_nents < 0)) { + dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n", + req->assoclen + req->cryptlen + + (encrypt ? authsize : 0)); + return ERR_PTR(src_nents); + } } /* Check if data are contiguous. */ @@ -1609,9 +1627,20 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request int sec4_sg_index; src_nents = sg_count(req->src, req->nbytes); + if (unlikely(src_nents < 0)) { + dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n", + req->nbytes); + return ERR_PTR(src_nents); + } - if (req->dst != req->src) + if (req->dst != req->src) { dst_nents = sg_count(req->dst, req->nbytes); + if (unlikely(dst_nents < 0)) { + dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n", + req->nbytes); + return ERR_PTR(dst_nents); + } + } if (likely(req->src == req->dst)) { sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, @@ -1807,6 +1836,11 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( int sec4_sg_index; src_nents = sg_count(req->src, req->nbytes); + if (unlikely(src_nents < 0)) { + dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n", + req->nbytes); + return ERR_PTR(src_nents); + } if (likely(req->src == req->dst)) { sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, @@ -1826,6 +1860,12 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( } dst_nents = sg_count(req->dst, req->nbytes); + if (unlikely(dst_nents < 0)) { + dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n", + req->nbytes); + return ERR_PTR(dst_nents); + } + sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1, DMA_FROM_DEVICE); if (unlikely(!sgc)) { @@ -1914,7 +1954,7 @@ static int ablkcipher_givencrypt(struct skcipher_givcrypt_request *creq) struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req); struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher); struct device *jrdev = ctx->jrdev; - bool iv_contig; + bool iv_contig = false; u32 *desc; int ret = 0; From fa0c92db1eb818a36c1b837b5f6da7e95b181247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:19 +0200 Subject: [PATCH 125/142] crypto: caam - replace sg_count() with sg_nents_for_len() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace internal sg_count() function and the convoluted logic around it with the standard sg_nents_for_len() function. src_nents, dst_nents now hold the number of SW S/G entries, instead of the HW S/G table entries. With this change, null (zero length) input data for AEAD case needs to be handled in a visible way. req->src is no longer (un)mapped, pointer address is set to 0 in SEQ IN PTR command. Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 189 ++++++++++++++----------------- drivers/crypto/caam/sg_sw_sec4.h | 11 -- 2 files changed, 88 insertions(+), 112 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index ed8a04412767..14b7dc8d5dcb 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -887,8 +887,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, /* * aead_edesc - s/w-extended aead descriptor - * @src_nents: number of segments in input scatterlist - * @dst_nents: number of segments in output scatterlist + * @src_nents: number of segments in input s/w scatterlist + * @dst_nents: number of segments in output s/w scatterlist * @sec4_sg_bytes: length of dma mapped sec4_sg space * @sec4_sg_dma: bus physical mapped address of h/w link table * @sec4_sg: pointer to h/w link table @@ -905,8 +905,8 @@ struct aead_edesc { /* * ablkcipher_edesc - s/w-extended ablkcipher descriptor - * @src_nents: number of segments in input scatterlist - * @dst_nents: number of segments in output scatterlist + * @src_nents: number of segments in input s/w scatterlist + * @dst_nents: number of segments in output s/w scatterlist * @iv_dma: dma address of iv for checking continuity and link table * @sec4_sg_bytes: length of dma mapped sec4_sg space * @sec4_sg_dma: bus physical mapped address of h/w link table @@ -930,10 +930,11 @@ static void caam_unmap(struct device *dev, struct scatterlist *src, int sec4_sg_bytes) { if (dst != src) { - dma_unmap_sg(dev, src, src_nents ? : 1, DMA_TO_DEVICE); - dma_unmap_sg(dev, dst, dst_nents ? : 1, DMA_FROM_DEVICE); + if (src_nents) + dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); + dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE); } else { - dma_unmap_sg(dev, src, src_nents ? : 1, DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL); } if (iv_dma) @@ -1102,7 +1103,7 @@ static void init_aead_job(struct aead_request *req, init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE); if (all_contig) { - src_dma = sg_dma_address(req->src); + src_dma = edesc->src_nents ? sg_dma_address(req->src) : 0; in_options = 0; } else { src_dma = edesc->sec4_sg_dma; @@ -1117,7 +1118,7 @@ static void init_aead_job(struct aead_request *req, out_options = in_options; if (unlikely(req->src != req->dst)) { - if (!edesc->dst_nents) { + if (edesc->dst_nents == 1) { dst_dma = sg_dma_address(req->dst); } else { dst_dma = edesc->sec4_sg_dma + @@ -1227,10 +1228,11 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr, print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, req->info, ivsize, 1); - printk(KERN_ERR "asked=%d, nbytes%d\n", (int)edesc->src_nents ? 100 : req->nbytes, req->nbytes); + pr_err("asked=%d, nbytes%d\n", + (int)edesc->src_nents > 1 ? 100 : req->nbytes, req->nbytes); dbg_dump_sg(KERN_ERR, "src @"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, req->src, - edesc->src_nents ? 100 : req->nbytes, 1); + edesc->src_nents > 1 ? 100 : req->nbytes, 1); #endif len = desc_len(sh_desc); @@ -1247,7 +1249,7 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr, append_seq_in_ptr(desc, src_dma, req->nbytes + ivsize, in_options); if (likely(req->src == req->dst)) { - if (!edesc->src_nents && iv_contig) { + if (edesc->src_nents == 1 && iv_contig) { dst_dma = sg_dma_address(req->src); } else { dst_dma = edesc->sec4_sg_dma + @@ -1255,7 +1257,7 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr, out_options = LDST_SGF; } } else { - if (!edesc->dst_nents) { + if (edesc->dst_nents == 1) { dst_dma = sg_dma_address(req->dst); } else { dst_dma = edesc->sec4_sg_dma + @@ -1287,13 +1289,13 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr, ivsize, 1); dbg_dump_sg(KERN_ERR, "src @" __stringify(__LINE__) ": ", DUMP_PREFIX_ADDRESS, 16, 4, req->src, - edesc->src_nents ? 100 : req->nbytes, 1); + edesc->src_nents > 1 ? 100 : req->nbytes, 1); #endif len = desc_len(sh_desc); init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE); - if (!edesc->src_nents) { + if (edesc->src_nents == 1) { src_dma = sg_dma_address(req->src); in_options = 0; } else { @@ -1329,21 +1331,22 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, int src_nents, dst_nents = 0; struct aead_edesc *edesc; int sgc; - bool all_contig = true; - int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; + int sec4_sg_index, sec4_sg_len, sec4_sg_bytes; unsigned int authsize = ctx->authsize; if (unlikely(req->dst != req->src)) { - src_nents = sg_count(req->src, req->assoclen + req->cryptlen); + src_nents = sg_nents_for_len(req->src, req->assoclen + + req->cryptlen); if (unlikely(src_nents < 0)) { dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n", req->assoclen + req->cryptlen); return ERR_PTR(src_nents); } - dst_nents = sg_count(req->dst, - req->assoclen + req->cryptlen + - (encrypt ? authsize : (-authsize))); + dst_nents = sg_nents_for_len(req->dst, req->assoclen + + req->cryptlen + + (encrypt ? authsize : + (-authsize))); if (unlikely(dst_nents < 0)) { dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n", req->assoclen + req->cryptlen + @@ -1351,9 +1354,9 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, return ERR_PTR(dst_nents); } } else { - src_nents = sg_count(req->src, - req->assoclen + req->cryptlen + - (encrypt ? authsize : 0)); + src_nents = sg_nents_for_len(req->src, req->assoclen + + req->cryptlen + + (encrypt ? authsize : 0)); if (unlikely(src_nents < 0)) { dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n", req->assoclen + req->cryptlen + @@ -1362,13 +1365,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, } } - /* Check if data are contiguous. */ - all_contig = !src_nents; - if (!all_contig) - sec4_sg_len = src_nents; - - sec4_sg_len += dst_nents; - + sec4_sg_len = src_nents > 1 ? src_nents : 0; + sec4_sg_len += dst_nents > 1 ? dst_nents : 0; sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ @@ -1380,28 +1378,28 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, } if (likely(req->src == req->dst)) { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_BIDIRECTIONAL); + sgc = dma_map_sg(jrdev, req->src, src_nents, DMA_BIDIRECTIONAL); if (unlikely(!sgc)) { dev_err(jrdev, "unable to map source\n"); kfree(edesc); return ERR_PTR(-ENOMEM); } } else { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_TO_DEVICE); - if (unlikely(!sgc)) { - dev_err(jrdev, "unable to map source\n"); - kfree(edesc); - return ERR_PTR(-ENOMEM); + /* Cover also the case of null (zero length) input data */ + if (src_nents) { + sgc = dma_map_sg(jrdev, req->src, src_nents, + DMA_TO_DEVICE); + if (unlikely(!sgc)) { + dev_err(jrdev, "unable to map source\n"); + kfree(edesc); + return ERR_PTR(-ENOMEM); + } } - sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1, - DMA_FROM_DEVICE); + sgc = dma_map_sg(jrdev, req->dst, dst_nents, DMA_FROM_DEVICE); if (unlikely(!sgc)) { dev_err(jrdev, "unable to map destination\n"); - dma_unmap_sg(jrdev, req->src, src_nents ? : 1, - DMA_TO_DEVICE); + dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); kfree(edesc); return ERR_PTR(-ENOMEM); } @@ -1411,15 +1409,15 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, edesc->dst_nents = dst_nents; edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + desc_bytes; - *all_contig_ptr = all_contig; + *all_contig_ptr = !(src_nents > 1); sec4_sg_index = 0; - if (!all_contig) { + if (src_nents > 1) { sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg + sec4_sg_index, 0); sec4_sg_index += src_nents; } - if (dst_nents) { + if (dst_nents > 1) { sg_to_sec4_sg_last(req->dst, dst_nents, edesc->sec4_sg + sec4_sg_index, 0); } @@ -1621,12 +1619,12 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request int src_nents, dst_nents = 0, sec4_sg_bytes; struct ablkcipher_edesc *edesc; dma_addr_t iv_dma = 0; - bool iv_contig = false; + bool in_contig; int sgc; int ivsize = crypto_ablkcipher_ivsize(ablkcipher); - int sec4_sg_index; + int dst_sg_idx, sec4_sg_ents; - src_nents = sg_count(req->src, req->nbytes); + src_nents = sg_nents_for_len(req->src, req->nbytes); if (unlikely(src_nents < 0)) { dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n", req->nbytes); @@ -1634,7 +1632,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request } if (req->dst != req->src) { - dst_nents = sg_count(req->dst, req->nbytes); + dst_nents = sg_nents_for_len(req->dst, req->nbytes); if (unlikely(dst_nents < 0)) { dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n", req->nbytes); @@ -1643,26 +1641,22 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request } if (likely(req->src == req->dst)) { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_BIDIRECTIONAL); + sgc = dma_map_sg(jrdev, req->src, src_nents, DMA_BIDIRECTIONAL); if (unlikely(!sgc)) { dev_err(jrdev, "unable to map source\n"); return ERR_PTR(-ENOMEM); } } else { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_TO_DEVICE); + sgc = dma_map_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); if (unlikely(!sgc)) { dev_err(jrdev, "unable to map source\n"); return ERR_PTR(-ENOMEM); } - sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1, - DMA_FROM_DEVICE); + sgc = dma_map_sg(jrdev, req->dst, dst_nents, DMA_FROM_DEVICE); if (unlikely(!sgc)) { dev_err(jrdev, "unable to map destination\n"); - dma_unmap_sg(jrdev, req->src, src_nents ? : 1, - DMA_TO_DEVICE); + dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return ERR_PTR(-ENOMEM); } } @@ -1675,16 +1669,16 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request return ERR_PTR(-ENOMEM); } - /* - * Check if iv can be contiguous with source and destination. - * If so, include it. If not, create scatterlist. - */ - if (!src_nents && iv_dma + ivsize == sg_dma_address(req->src)) - iv_contig = true; - else - src_nents = src_nents ? : 1; - sec4_sg_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) * - sizeof(struct sec4_sg_entry); + if (src_nents == 1 && iv_dma + ivsize == sg_dma_address(req->src)) { + in_contig = true; + sec4_sg_ents = 0; + } else { + in_contig = false; + sec4_sg_ents = 1 + src_nents; + } + dst_sg_idx = sec4_sg_ents; + sec4_sg_ents += dst_nents > 1 ? dst_nents : 0; + sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes, @@ -1702,17 +1696,15 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + desc_bytes; - sec4_sg_index = 0; - if (!iv_contig) { + if (!in_contig) { dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0); sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg + 1, 0); - sec4_sg_index += 1 + src_nents; } - if (dst_nents) { + if (dst_nents > 1) { sg_to_sec4_sg_last(req->dst, dst_nents, - edesc->sec4_sg + sec4_sg_index, 0); + edesc->sec4_sg + dst_sg_idx, 0); } edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, @@ -1733,7 +1725,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request sec4_sg_bytes, 1); #endif - *iv_contig_out = iv_contig; + *iv_contig_out = in_contig; return edesc; } @@ -1830,12 +1822,12 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( int src_nents, dst_nents, sec4_sg_bytes; struct ablkcipher_edesc *edesc; dma_addr_t iv_dma = 0; - bool iv_contig = false; + bool out_contig; int sgc; int ivsize = crypto_ablkcipher_ivsize(ablkcipher); - int sec4_sg_index; + int dst_sg_idx, sec4_sg_ents; - src_nents = sg_count(req->src, req->nbytes); + src_nents = sg_nents_for_len(req->src, req->nbytes); if (unlikely(src_nents < 0)) { dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n", req->nbytes); @@ -1843,8 +1835,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( } if (likely(req->src == req->dst)) { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_BIDIRECTIONAL); + sgc = dma_map_sg(jrdev, req->src, src_nents, DMA_BIDIRECTIONAL); if (unlikely(!sgc)) { dev_err(jrdev, "unable to map source\n"); return ERR_PTR(-ENOMEM); @@ -1852,26 +1843,23 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( dst_nents = src_nents; } else { - sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, - DMA_TO_DEVICE); + sgc = dma_map_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); if (unlikely(!sgc)) { dev_err(jrdev, "unable to map source\n"); return ERR_PTR(-ENOMEM); } - dst_nents = sg_count(req->dst, req->nbytes); + dst_nents = sg_nents_for_len(req->dst, req->nbytes); if (unlikely(dst_nents < 0)) { dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n", req->nbytes); return ERR_PTR(dst_nents); } - sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1, - DMA_FROM_DEVICE); + sgc = dma_map_sg(jrdev, req->dst, dst_nents, DMA_FROM_DEVICE); if (unlikely(!sgc)) { dev_err(jrdev, "unable to map destination\n"); - dma_unmap_sg(jrdev, req->src, src_nents ? : 1, - DMA_TO_DEVICE); + dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return ERR_PTR(-ENOMEM); } } @@ -1888,14 +1876,17 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( return ERR_PTR(-ENOMEM); } - if (!dst_nents && iv_dma + ivsize == sg_dma_address(req->dst)) - iv_contig = true; - else - dst_nents = dst_nents ? : 1; - sec4_sg_bytes = ((iv_contig ? 0 : 1) + src_nents + dst_nents) * - sizeof(struct sec4_sg_entry); + sec4_sg_ents = src_nents > 1 ? src_nents : 0; + dst_sg_idx = sec4_sg_ents; + if (dst_nents == 1 && iv_dma + ivsize == sg_dma_address(req->dst)) { + out_contig = true; + } else { + out_contig = false; + sec4_sg_ents += 1 + dst_nents; + } /* allocate space for base edesc and hw desc commands, link tables */ + sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry); edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes, GFP_DMA | flags); if (!edesc) { @@ -1911,18 +1902,14 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + desc_bytes; - sec4_sg_index = 0; - if (src_nents) { + if (src_nents > 1) sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0); - sec4_sg_index += src_nents; - } - if (!iv_contig) { - dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index, + if (!out_contig) { + dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx, iv_dma, ivsize, 0); - sec4_sg_index += 1; sg_to_sec4_sg_last(req->dst, dst_nents, - edesc->sec4_sg + sec4_sg_index, 0); + edesc->sec4_sg + dst_sg_idx + 1, 0); } edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, @@ -1943,7 +1930,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( sec4_sg_bytes, 1); #endif - *iv_contig_out = iv_contig; + *iv_contig_out = out_contig; return edesc; } diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 6afa20c4a013..c6adad09c972 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -73,14 +73,3 @@ static inline struct sec4_sg_entry *sg_to_sec4_sg_len( } while (total); return sec4_sg_ptr - 1; } - -/* derive number of elements in scatterlist, but return 0 for 1 */ -static inline int sg_count(struct scatterlist *sg_list, int nbytes) -{ - int sg_nents = sg_nents_for_len(sg_list, nbytes); - - if (likely(sg_nents == 1)) - return 0; - - return sg_nents; -} From 838e0a89e33a6e15492b8e4d700fc64c21ca3587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:20 +0200 Subject: [PATCH 126/142] crypto: caam - use dma_map_sg() return code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dma_map_sg() might coalesce S/G entries, so use the number of S/G entries returned by it instead of what sg_nents_for_len() initially returns. Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 151 ++++++++++++++++++---------------- 1 file changed, 80 insertions(+), 71 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 14b7dc8d5dcb..71d09e896d48 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -1328,9 +1328,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - int src_nents, dst_nents = 0; + int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0; struct aead_edesc *edesc; - int sgc; int sec4_sg_index, sec4_sg_len, sec4_sg_bytes; unsigned int authsize = ctx->authsize; @@ -1365,60 +1364,62 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, } } - sec4_sg_len = src_nents > 1 ? src_nents : 0; - sec4_sg_len += dst_nents > 1 ? dst_nents : 0; + if (likely(req->src == req->dst)) { + mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents, + DMA_BIDIRECTIONAL); + if (unlikely(!mapped_src_nents)) { + dev_err(jrdev, "unable to map source\n"); + return ERR_PTR(-ENOMEM); + } + } else { + /* Cover also the case of null (zero length) input data */ + if (src_nents) { + mapped_src_nents = dma_map_sg(jrdev, req->src, + src_nents, DMA_TO_DEVICE); + if (unlikely(!mapped_src_nents)) { + dev_err(jrdev, "unable to map source\n"); + return ERR_PTR(-ENOMEM); + } + } else { + mapped_src_nents = 0; + } + + mapped_dst_nents = dma_map_sg(jrdev, req->dst, dst_nents, + DMA_FROM_DEVICE); + if (unlikely(!mapped_dst_nents)) { + dev_err(jrdev, "unable to map destination\n"); + dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); + return ERR_PTR(-ENOMEM); + } + } + + sec4_sg_len = mapped_src_nents > 1 ? mapped_src_nents : 0; + sec4_sg_len += mapped_dst_nents > 1 ? mapped_dst_nents : 0; sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes, GFP_DMA | flags); if (!edesc) { - dev_err(jrdev, "could not allocate extended descriptor\n"); + caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0, + 0, 0, 0); return ERR_PTR(-ENOMEM); } - if (likely(req->src == req->dst)) { - sgc = dma_map_sg(jrdev, req->src, src_nents, DMA_BIDIRECTIONAL); - if (unlikely(!sgc)) { - dev_err(jrdev, "unable to map source\n"); - kfree(edesc); - return ERR_PTR(-ENOMEM); - } - } else { - /* Cover also the case of null (zero length) input data */ - if (src_nents) { - sgc = dma_map_sg(jrdev, req->src, src_nents, - DMA_TO_DEVICE); - if (unlikely(!sgc)) { - dev_err(jrdev, "unable to map source\n"); - kfree(edesc); - return ERR_PTR(-ENOMEM); - } - } - - sgc = dma_map_sg(jrdev, req->dst, dst_nents, DMA_FROM_DEVICE); - if (unlikely(!sgc)) { - dev_err(jrdev, "unable to map destination\n"); - dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); - kfree(edesc); - return ERR_PTR(-ENOMEM); - } - } - edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + desc_bytes; - *all_contig_ptr = !(src_nents > 1); + *all_contig_ptr = !(mapped_src_nents > 1); sec4_sg_index = 0; - if (src_nents > 1) { - sg_to_sec4_sg_last(req->src, src_nents, - edesc->sec4_sg + sec4_sg_index, 0); - sec4_sg_index += src_nents; + if (mapped_src_nents > 1) { + sg_to_sec4_sg_last(req->src, mapped_src_nents, + edesc->sec4_sg + sec4_sg_index, 0); + sec4_sg_index += mapped_src_nents; } - if (dst_nents > 1) { - sg_to_sec4_sg_last(req->dst, dst_nents, + if (mapped_dst_nents > 1) { + sg_to_sec4_sg_last(req->dst, mapped_dst_nents, edesc->sec4_sg + sec4_sg_index, 0); } @@ -1616,13 +1617,12 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - int src_nents, dst_nents = 0, sec4_sg_bytes; + int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0; struct ablkcipher_edesc *edesc; dma_addr_t iv_dma = 0; bool in_contig; - int sgc; int ivsize = crypto_ablkcipher_ivsize(ablkcipher); - int dst_sg_idx, sec4_sg_ents; + int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes; src_nents = sg_nents_for_len(req->src, req->nbytes); if (unlikely(src_nents < 0)) { @@ -1641,20 +1641,23 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request } if (likely(req->src == req->dst)) { - sgc = dma_map_sg(jrdev, req->src, src_nents, DMA_BIDIRECTIONAL); - if (unlikely(!sgc)) { + mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents, + DMA_BIDIRECTIONAL); + if (unlikely(!mapped_src_nents)) { dev_err(jrdev, "unable to map source\n"); return ERR_PTR(-ENOMEM); } } else { - sgc = dma_map_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); - if (unlikely(!sgc)) { + mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents, + DMA_TO_DEVICE); + if (unlikely(!mapped_src_nents)) { dev_err(jrdev, "unable to map source\n"); return ERR_PTR(-ENOMEM); } - sgc = dma_map_sg(jrdev, req->dst, dst_nents, DMA_FROM_DEVICE); - if (unlikely(!sgc)) { + mapped_dst_nents = dma_map_sg(jrdev, req->dst, dst_nents, + DMA_FROM_DEVICE); + if (unlikely(!mapped_dst_nents)) { dev_err(jrdev, "unable to map destination\n"); dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return ERR_PTR(-ENOMEM); @@ -1669,15 +1672,16 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request return ERR_PTR(-ENOMEM); } - if (src_nents == 1 && iv_dma + ivsize == sg_dma_address(req->src)) { + if (mapped_src_nents == 1 && + iv_dma + ivsize == sg_dma_address(req->src)) { in_contig = true; sec4_sg_ents = 0; } else { in_contig = false; - sec4_sg_ents = 1 + src_nents; + sec4_sg_ents = 1 + mapped_src_nents; } dst_sg_idx = sec4_sg_ents; - sec4_sg_ents += dst_nents > 1 ? dst_nents : 0; + sec4_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0; sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ @@ -1698,13 +1702,13 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request if (!in_contig) { dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0); - sg_to_sec4_sg_last(req->src, src_nents, + sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg + 1, 0); } - if (dst_nents > 1) { - sg_to_sec4_sg_last(req->dst, dst_nents, - edesc->sec4_sg + dst_sg_idx, 0); + if (mapped_dst_nents > 1) { + sg_to_sec4_sg_last(req->dst, mapped_dst_nents, + edesc->sec4_sg + dst_sg_idx, 0); } edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, @@ -1819,13 +1823,12 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - int src_nents, dst_nents, sec4_sg_bytes; + int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents; struct ablkcipher_edesc *edesc; dma_addr_t iv_dma = 0; bool out_contig; - int sgc; int ivsize = crypto_ablkcipher_ivsize(ablkcipher); - int dst_sg_idx, sec4_sg_ents; + int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes; src_nents = sg_nents_for_len(req->src, req->nbytes); if (unlikely(src_nents < 0)) { @@ -1835,16 +1838,19 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( } if (likely(req->src == req->dst)) { - sgc = dma_map_sg(jrdev, req->src, src_nents, DMA_BIDIRECTIONAL); - if (unlikely(!sgc)) { + mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents, + DMA_BIDIRECTIONAL); + if (unlikely(!mapped_src_nents)) { dev_err(jrdev, "unable to map source\n"); return ERR_PTR(-ENOMEM); } dst_nents = src_nents; + mapped_dst_nents = src_nents; } else { - sgc = dma_map_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); - if (unlikely(!sgc)) { + mapped_src_nents = dma_map_sg(jrdev, req->src, src_nents, + DMA_TO_DEVICE); + if (unlikely(!mapped_src_nents)) { dev_err(jrdev, "unable to map source\n"); return ERR_PTR(-ENOMEM); } @@ -1856,8 +1862,9 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( return ERR_PTR(dst_nents); } - sgc = dma_map_sg(jrdev, req->dst, dst_nents, DMA_FROM_DEVICE); - if (unlikely(!sgc)) { + mapped_dst_nents = dma_map_sg(jrdev, req->dst, dst_nents, + DMA_FROM_DEVICE); + if (unlikely(!mapped_dst_nents)) { dev_err(jrdev, "unable to map destination\n"); dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return ERR_PTR(-ENOMEM); @@ -1876,13 +1883,14 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( return ERR_PTR(-ENOMEM); } - sec4_sg_ents = src_nents > 1 ? src_nents : 0; + sec4_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0; dst_sg_idx = sec4_sg_ents; - if (dst_nents == 1 && iv_dma + ivsize == sg_dma_address(req->dst)) { + if (mapped_dst_nents == 1 && + iv_dma + ivsize == sg_dma_address(req->dst)) { out_contig = true; } else { out_contig = false; - sec4_sg_ents += 1 + dst_nents; + sec4_sg_ents += 1 + mapped_dst_nents; } /* allocate space for base edesc and hw desc commands, link tables */ @@ -1902,13 +1910,14 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + desc_bytes; - if (src_nents > 1) - sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0); + if (mapped_src_nents > 1) + sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg, + 0); if (!out_contig) { dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx, iv_dma, ivsize, 0); - sg_to_sec4_sg_last(req->dst, dst_nents, + sg_to_sec4_sg_last(req->dst, mapped_dst_nents, edesc->sec4_sg + dst_sg_idx + 1, 0); } From cfb725f6d3d31355fa4510da7d7bdce807045b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:21 +0200 Subject: [PATCH 127/142] crypto: caam - don't dma_map key for hash algorithms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shared descriptors for hash algorithms are small enough for (split) keys to be inlined in all cases. Since driver already does this, all what's left is to remove unused ctx->key_dma. Fixes: 045e36780f115 ("crypto: caam - ahash hmac support") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamhash.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index e58639ea53b1..117bbd8c08d4 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -109,7 +109,6 @@ struct caam_hash_ctx { dma_addr_t sh_desc_digest_dma; struct device *jrdev; u8 key[CAAM_MAX_HASH_KEY_SIZE]; - dma_addr_t key_dma; int ctx_len; struct alginfo adata; }; @@ -420,7 +419,6 @@ static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct device *jrdev = ctx->jrdev; int blocksize = crypto_tfm_alg_blocksize(&ahash->base); int digestsize = crypto_ahash_digestsize(ahash); int ret; @@ -448,28 +446,14 @@ static int ahash_setkey(struct crypto_ahash *ahash, if (ret) goto bad_free_key; - ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->adata.keylen_pad, - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->key_dma)) { - dev_err(jrdev, "unable to map key i/o memory\n"); - ret = -ENOMEM; - goto error_free_key; - } #ifdef DEBUG print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, ctx->adata.keylen_pad, 1); #endif - ret = ahash_set_sh_desc(ahash); - if (ret) { - dma_unmap_single(jrdev, ctx->key_dma, ctx->adata.keylen_pad, - DMA_TO_DEVICE); - } - - error_free_key: kfree(hashed_key); - return ret; + return ahash_set_sh_desc(ahash); bad_free_key: kfree(hashed_key); crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN); From bbf2234494afd14a720d61a233c21b95e4261326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:22 +0200 Subject: [PATCH 128/142] crypto: caam - fix DMA API leaks for multiple setkey() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit setkey() callback may be invoked multiple times for the same tfm. In this case, DMA API leaks are caused by shared descriptors (and key for caamalg) being mapped several times and unmapped only once. Fix this by performing mapping / unmapping only in crypto algorithm's cra_init() / cra_exit() callbacks and sync_for_device in the setkey() tfm callback. Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 275 +++++++++------------------------ drivers/crypto/caam/caamhash.c | 79 ++++------ 2 files changed, 102 insertions(+), 252 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 71d09e896d48..9bc80eb06934 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -134,15 +134,15 @@ struct caam_aead_alg { * per-session context */ struct caam_ctx { - struct device *jrdev; u32 sh_desc_enc[DESC_MAX_USED_LEN]; u32 sh_desc_dec[DESC_MAX_USED_LEN]; u32 sh_desc_givenc[DESC_MAX_USED_LEN]; + u8 key[CAAM_MAX_KEY_SIZE]; dma_addr_t sh_desc_enc_dma; dma_addr_t sh_desc_dec_dma; dma_addr_t sh_desc_givenc_dma; - u8 key[CAAM_MAX_KEY_SIZE]; dma_addr_t key_dma; + struct device *jrdev; struct alginfo adata; struct alginfo cdata; unsigned int authsize; @@ -171,13 +171,8 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead) /* aead_encrypt shared descriptor */ desc = ctx->sh_desc_enc; cnstr_shdsc_aead_null_encap(desc, &ctx->adata, ctx->authsize); - ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma, + desc_bytes(desc), DMA_TO_DEVICE); /* * Job Descriptor and Shared Descriptors @@ -194,13 +189,8 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead) /* aead_decrypt shared descriptor */ desc = ctx->sh_desc_dec; cnstr_shdsc_aead_null_decap(desc, &ctx->adata, ctx->authsize); - ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma, + desc_bytes(desc), DMA_TO_DEVICE); return 0; } @@ -278,13 +268,8 @@ static int aead_set_sh_desc(struct crypto_aead *aead) desc = ctx->sh_desc_enc; cnstr_shdsc_aead_encap(desc, &ctx->cdata, &ctx->adata, ctx->authsize, is_rfc3686, nonce, ctx1_iv_off); - ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma, + desc_bytes(desc), DMA_TO_DEVICE); skip_enc: /* @@ -315,13 +300,8 @@ skip_enc: cnstr_shdsc_aead_decap(desc, &ctx->cdata, &ctx->adata, ivsize, ctx->authsize, alg->caam.geniv, is_rfc3686, nonce, ctx1_iv_off); - ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma, + desc_bytes(desc), DMA_TO_DEVICE); if (!alg->caam.geniv) goto skip_givenc; @@ -354,13 +334,8 @@ skip_enc: cnstr_shdsc_aead_givencap(desc, &ctx->cdata, &ctx->adata, ivsize, ctx->authsize, is_rfc3686, nonce, ctx1_iv_off); - ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma, + desc_bytes(desc), DMA_TO_DEVICE); skip_givenc: return 0; @@ -403,13 +378,8 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) desc = ctx->sh_desc_enc; cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ctx->authsize); - ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma, + desc_bytes(desc), DMA_TO_DEVICE); /* * Job Descriptor and Shared Descriptors @@ -425,13 +395,8 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) desc = ctx->sh_desc_dec; cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ctx->authsize); - ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma, + desc_bytes(desc), DMA_TO_DEVICE); return 0; } @@ -472,13 +437,8 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) desc = ctx->sh_desc_enc; cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ctx->authsize); - ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma, + desc_bytes(desc), DMA_TO_DEVICE); /* * Job Descriptor and Shared Descriptors @@ -494,13 +454,8 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) desc = ctx->sh_desc_dec; cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ctx->authsize); - ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma, + desc_bytes(desc), DMA_TO_DEVICE); return 0; } @@ -542,13 +497,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) desc = ctx->sh_desc_enc; cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ctx->authsize); - ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma, + desc_bytes(desc), DMA_TO_DEVICE); /* * Job Descriptor and Shared Descriptors @@ -564,13 +514,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) desc = ctx->sh_desc_dec; cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ctx->authsize); - ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma, + desc_bytes(desc), DMA_TO_DEVICE); return 0; } @@ -614,28 +559,15 @@ static int aead_setkey(struct crypto_aead *aead, /* postpend encryption key to auth split key */ memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen); - - ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->adata.keylen_pad + - keys.enckeylen, DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->key_dma)) { - dev_err(jrdev, "unable to map key i/o memory\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->adata.keylen_pad + + keys.enckeylen, DMA_TO_DEVICE); #ifdef DEBUG print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, ctx->adata.keylen_pad + keys.enckeylen, 1); #endif - ctx->cdata.keylen = keys.enckeylen; - - ret = aead_set_sh_desc(aead); - if (ret) { - dma_unmap_single(jrdev, ctx->key_dma, ctx->adata.keylen_pad + - keys.enckeylen, DMA_TO_DEVICE); - } - - return ret; + return aead_set_sh_desc(aead); badkey: crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; @@ -646,7 +578,6 @@ static int gcm_setkey(struct crypto_aead *aead, { struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; - int ret = 0; #ifdef DEBUG print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", @@ -654,21 +585,10 @@ static int gcm_setkey(struct crypto_aead *aead, #endif memcpy(ctx->key, key, keylen); - ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen, - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->key_dma)) { - dev_err(jrdev, "unable to map key i/o memory\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE); ctx->cdata.keylen = keylen; - ret = gcm_set_sh_desc(aead); - if (ret) { - dma_unmap_single(jrdev, ctx->key_dma, ctx->cdata.keylen, - DMA_TO_DEVICE); - } - - return ret; + return gcm_set_sh_desc(aead); } static int rfc4106_setkey(struct crypto_aead *aead, @@ -676,7 +596,6 @@ static int rfc4106_setkey(struct crypto_aead *aead, { struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; - int ret = 0; if (keylen < 4) return -EINVAL; @@ -693,21 +612,9 @@ static int rfc4106_setkey(struct crypto_aead *aead, * in the nonce. Update the AES key length. */ ctx->cdata.keylen = keylen - 4; - - ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->cdata.keylen, - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->key_dma)) { - dev_err(jrdev, "unable to map key i/o memory\n"); - return -ENOMEM; - } - - ret = rfc4106_set_sh_desc(aead); - if (ret) { - dma_unmap_single(jrdev, ctx->key_dma, ctx->cdata.keylen, - DMA_TO_DEVICE); - } - - return ret; + dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen, + DMA_TO_DEVICE); + return rfc4106_set_sh_desc(aead); } static int rfc4543_setkey(struct crypto_aead *aead, @@ -715,7 +622,6 @@ static int rfc4543_setkey(struct crypto_aead *aead, { struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; - int ret = 0; if (keylen < 4) return -EINVAL; @@ -732,21 +638,9 @@ static int rfc4543_setkey(struct crypto_aead *aead, * in the nonce. Update the AES key length. */ ctx->cdata.keylen = keylen - 4; - - ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->cdata.keylen, - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->key_dma)) { - dev_err(jrdev, "unable to map key i/o memory\n"); - return -ENOMEM; - } - - ret = rfc4543_set_sh_desc(aead); - if (ret) { - dma_unmap_single(jrdev, ctx->key_dma, ctx->cdata.keylen, - DMA_TO_DEVICE); - } - - return ret; + dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen, + DMA_TO_DEVICE); + return rfc4543_set_sh_desc(aead); } static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, @@ -787,12 +681,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, keylen -= CTR_RFC3686_NONCE_SIZE; } - ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen, - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->key_dma)) { - dev_err(jrdev, "unable to map key i/o memory\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE); ctx->cdata.keylen = keylen; ctx->cdata.key_virt = ctx->key; ctx->cdata.key_inline = true; @@ -801,37 +690,22 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, desc = ctx->sh_desc_enc; cnstr_shdsc_ablkcipher_encap(desc, &ctx->cdata, ivsize, is_rfc3686, ctx1_iv_off); - ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma, + desc_bytes(desc), DMA_TO_DEVICE); /* ablkcipher_decrypt shared descriptor */ desc = ctx->sh_desc_dec; cnstr_shdsc_ablkcipher_decap(desc, &ctx->cdata, ivsize, is_rfc3686, ctx1_iv_off); - ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma, + desc_bytes(desc), DMA_TO_DEVICE); /* ablkcipher_givencrypt shared descriptor */ desc = ctx->sh_desc_givenc; cnstr_shdsc_ablkcipher_givencap(desc, &ctx->cdata, ivsize, is_rfc3686, ctx1_iv_off); - ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_givenc_dma, + desc_bytes(desc), DMA_TO_DEVICE); return 0; } @@ -851,11 +725,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, } memcpy(ctx->key, key, keylen); - ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen, DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->key_dma)) { - dev_err(jrdev, "unable to map key i/o memory\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE); ctx->cdata.keylen = keylen; ctx->cdata.key_virt = ctx->key; ctx->cdata.key_inline = true; @@ -863,24 +733,14 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, /* xts_ablkcipher_encrypt shared descriptor */ desc = ctx->sh_desc_enc; cnstr_shdsc_xts_ablkcipher_encap(desc, &ctx->cdata); - ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma, + desc_bytes(desc), DMA_TO_DEVICE); /* xts_ablkcipher_decrypt shared descriptor */ desc = ctx->sh_desc_dec; cnstr_shdsc_xts_ablkcipher_decap(desc, &ctx->cdata); - ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { - dma_unmap_single(jrdev, ctx->sh_desc_enc_dma, - desc_bytes(ctx->sh_desc_enc), DMA_TO_DEVICE); - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma, + desc_bytes(desc), DMA_TO_DEVICE); return 0; } @@ -3391,12 +3251,31 @@ struct caam_crypto_alg { static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam) { + dma_addr_t dma_addr; + ctx->jrdev = caam_jr_alloc(); if (IS_ERR(ctx->jrdev)) { pr_err("Job Ring Device allocation for transform failed\n"); return PTR_ERR(ctx->jrdev); } + dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_enc, + offsetof(struct caam_ctx, + sh_desc_enc_dma), + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(ctx->jrdev, dma_addr)) { + dev_err(ctx->jrdev, "unable to map key, shared descriptors\n"); + caam_jr_free(ctx->jrdev); + return -ENOMEM; + } + + ctx->sh_desc_enc_dma = dma_addr; + ctx->sh_desc_dec_dma = dma_addr + offsetof(struct caam_ctx, + sh_desc_dec); + ctx->sh_desc_givenc_dma = dma_addr + offsetof(struct caam_ctx, + sh_desc_givenc); + ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key); + /* copy descriptor header template value */ ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type; ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam->class2_alg_type; @@ -3426,25 +3305,9 @@ static int caam_aead_init(struct crypto_aead *tfm) static void caam_exit_common(struct caam_ctx *ctx) { - if (ctx->sh_desc_enc_dma && - !dma_mapping_error(ctx->jrdev, ctx->sh_desc_enc_dma)) - dma_unmap_single(ctx->jrdev, ctx->sh_desc_enc_dma, - desc_bytes(ctx->sh_desc_enc), DMA_TO_DEVICE); - if (ctx->sh_desc_dec_dma && - !dma_mapping_error(ctx->jrdev, ctx->sh_desc_dec_dma)) - dma_unmap_single(ctx->jrdev, ctx->sh_desc_dec_dma, - desc_bytes(ctx->sh_desc_dec), DMA_TO_DEVICE); - if (ctx->sh_desc_givenc_dma && - !dma_mapping_error(ctx->jrdev, ctx->sh_desc_givenc_dma)) - dma_unmap_single(ctx->jrdev, ctx->sh_desc_givenc_dma, - desc_bytes(ctx->sh_desc_givenc), - DMA_TO_DEVICE); - if (ctx->key_dma && - !dma_mapping_error(ctx->jrdev, ctx->key_dma)) - dma_unmap_single(ctx->jrdev, ctx->key_dma, - ctx->cdata.keylen + ctx->adata.keylen_pad, - DMA_TO_DEVICE); - + dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_enc_dma, + offsetof(struct caam_ctx, sh_desc_enc_dma), + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); caam_jr_free(ctx->jrdev); } diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 117bbd8c08d4..2ad83a8dc0fe 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -276,12 +276,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) /* ahash_update shared descriptor */ desc = ctx->sh_desc_update; ahash_gen_sh_desc(desc, OP_ALG_AS_UPDATE, ctx->ctx_len, ctx, true); - ctx->sh_desc_update_dma = dma_map_single(jrdev, desc, desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_update_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_update_dma, + desc_bytes(desc), DMA_TO_DEVICE); #ifdef DEBUG print_hex_dump(KERN_ERR, "ahash update shdesc@"__stringify(__LINE__)": ", @@ -291,13 +287,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) /* ahash_update_first shared descriptor */ desc = ctx->sh_desc_update_first; ahash_gen_sh_desc(desc, OP_ALG_AS_INIT, ctx->ctx_len, ctx, false); - ctx->sh_desc_update_first_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_update_first_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma, + desc_bytes(desc), DMA_TO_DEVICE); #ifdef DEBUG print_hex_dump(KERN_ERR, "ahash update first shdesc@"__stringify(__LINE__)": ", @@ -307,12 +298,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) /* ahash_final shared descriptor */ desc = ctx->sh_desc_fin; ahash_gen_sh_desc(desc, OP_ALG_AS_FINALIZE, digestsize, ctx, true); - ctx->sh_desc_fin_dma = dma_map_single(jrdev, desc, desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_fin_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_fin_dma, + desc_bytes(desc), DMA_TO_DEVICE); #ifdef DEBUG print_hex_dump(KERN_ERR, "ahash final shdesc@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, desc, @@ -322,13 +309,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) /* ahash_digest shared descriptor */ desc = ctx->sh_desc_digest; ahash_gen_sh_desc(desc, OP_ALG_AS_INITFINAL, digestsize, ctx, false); - ctx->sh_desc_digest_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_digest_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } + dma_sync_single_for_device(jrdev, ctx->sh_desc_digest_dma, + desc_bytes(desc), DMA_TO_DEVICE); #ifdef DEBUG print_hex_dump(KERN_ERR, "ahash digest shdesc@"__stringify(__LINE__)": ", @@ -1716,6 +1698,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) HASH_MSG_LEN + SHA256_DIGEST_SIZE, HASH_MSG_LEN + 64, HASH_MSG_LEN + SHA512_DIGEST_SIZE }; + dma_addr_t dma_addr; /* * Get a Job ring from Job Ring driver to ensure in-order @@ -1726,6 +1709,26 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) pr_err("Job Ring Device allocation for transform failed\n"); return PTR_ERR(ctx->jrdev); } + + dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_update, + offsetof(struct caam_hash_ctx, + sh_desc_update_dma), + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(ctx->jrdev, dma_addr)) { + dev_err(ctx->jrdev, "unable to map shared descriptors\n"); + caam_jr_free(ctx->jrdev); + return -ENOMEM; + } + + ctx->sh_desc_update_dma = dma_addr; + ctx->sh_desc_update_first_dma = dma_addr + + offsetof(struct caam_hash_ctx, + sh_desc_update_first); + ctx->sh_desc_fin_dma = dma_addr + offsetof(struct caam_hash_ctx, + sh_desc_fin); + ctx->sh_desc_digest_dma = dma_addr + offsetof(struct caam_hash_ctx, + sh_desc_digest); + /* copy descriptor header template value */ ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam_hash->alg_type; @@ -1742,26 +1745,10 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm) { struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->sh_desc_update_dma && - !dma_mapping_error(ctx->jrdev, ctx->sh_desc_update_dma)) - dma_unmap_single(ctx->jrdev, ctx->sh_desc_update_dma, - desc_bytes(ctx->sh_desc_update), - DMA_TO_DEVICE); - if (ctx->sh_desc_update_first_dma && - !dma_mapping_error(ctx->jrdev, ctx->sh_desc_update_first_dma)) - dma_unmap_single(ctx->jrdev, ctx->sh_desc_update_first_dma, - desc_bytes(ctx->sh_desc_update_first), - DMA_TO_DEVICE); - if (ctx->sh_desc_fin_dma && - !dma_mapping_error(ctx->jrdev, ctx->sh_desc_fin_dma)) - dma_unmap_single(ctx->jrdev, ctx->sh_desc_fin_dma, - desc_bytes(ctx->sh_desc_fin), DMA_TO_DEVICE); - if (ctx->sh_desc_digest_dma && - !dma_mapping_error(ctx->jrdev, ctx->sh_desc_digest_dma)) - dma_unmap_single(ctx->jrdev, ctx->sh_desc_digest_dma, - desc_bytes(ctx->sh_desc_digest), - DMA_TO_DEVICE); - + dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma, + offsetof(struct caam_hash_ctx, + sh_desc_update_dma), + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); caam_jr_free(ctx->jrdev); } From 87ec02e7409d787348c244039aa3536a812dfa8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:23 +0200 Subject: [PATCH 129/142] crypto: caam - fix error path for ctx_dma mapping failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case ctx_dma dma mapping fails, ahash_unmap_ctx() tries to dma unmap an invalid address: map_seq_out_ptr_ctx() / ctx_map_to_sec4_sg() -> goto unmap_ctx -> -> ahash_unmap_ctx() -> dma unmap ctx_dma There is also possible to reach ahash_unmap_ctx() with ctx_dma uninitialzed or to try to unmap the same address twice. Fix these by setting ctx_dma = 0 where needed: -initialize ctx_dma in ahash_init() -clear ctx_dma in case of mapping error (instead of holding the error code returned by the dma map function) -clear ctx_dma after each unmapping Fixes: 32686d34f8fb6 ("crypto: caam - ensure that we clean up after an error") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamhash.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 2ad83a8dc0fe..6c6c005f417b 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -148,6 +148,7 @@ static inline int map_seq_out_ptr_ctx(u32 *desc, struct device *jrdev, ctx_len, DMA_FROM_DEVICE); if (dma_mapping_error(jrdev, state->ctx_dma)) { dev_err(jrdev, "unable to map ctx\n"); + state->ctx_dma = 0; return -ENOMEM; } @@ -208,6 +209,7 @@ static inline int ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev, state->ctx_dma = dma_map_single(jrdev, state->caam_ctx, ctx_len, flag); if (dma_mapping_error(jrdev, state->ctx_dma)) { dev_err(jrdev, "unable to map ctx\n"); + state->ctx_dma = 0; return -ENOMEM; } @@ -482,8 +484,10 @@ static inline void ahash_unmap_ctx(struct device *dev, struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); - if (state->ctx_dma) + if (state->ctx_dma) { dma_unmap_single(dev, state->ctx_dma, ctx->ctx_len, flag); + state->ctx_dma = 0; + } ahash_unmap(dev, edesc, req, dst_len); } @@ -1463,6 +1467,7 @@ static int ahash_init(struct ahash_request *req) state->finup = ahash_finup_first; state->final = ahash_final_no_ctx; + state->ctx_dma = 0; state->current_buf = 0; state->buf_dma = 0; state->buflen_0 = 0; From 0355d23d4034f42b28db19520bc8865e26053404 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:24 +0200 Subject: [PATCH 130/142] crypto: caam - abstract ahash request double buffering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit caamhash uses double buffering for holding previous/current and next chunks (data smaller than block size) to be hashed. Add (inline) functions to abstract this mechanism. Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamhash.c | 77 +++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 6c6c005f417b..b37d555a80d0 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -137,6 +137,31 @@ struct caam_export_state { int (*finup)(struct ahash_request *req); }; +static inline void switch_buf(struct caam_hash_state *state) +{ + state->current_buf ^= 1; +} + +static inline u8 *current_buf(struct caam_hash_state *state) +{ + return state->current_buf ? state->buf_1 : state->buf_0; +} + +static inline u8 *alt_buf(struct caam_hash_state *state) +{ + return state->current_buf ? state->buf_0 : state->buf_1; +} + +static inline int *current_buflen(struct caam_hash_state *state) +{ + return state->current_buf ? &state->buflen_1 : &state->buflen_0; +} + +static inline int *alt_buflen(struct caam_hash_state *state) +{ + return state->current_buf ? &state->buflen_0 : &state->buflen_1; +} + /* Common job descriptor seq in/out ptr routines */ /* Map state->caam_ctx, and append seq_out_ptr command that points to it */ @@ -695,11 +720,10 @@ static int ahash_update_ctx(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; - int *buflen = state->current_buf ? &state->buflen_1 : &state->buflen_0; - u8 *next_buf = state->current_buf ? state->buf_0 : state->buf_1; - int *next_buflen = state->current_buf ? &state->buflen_0 : - &state->buflen_1, last_buflen; + u8 *buf = current_buf(state); + int *buflen = current_buflen(state); + u8 *next_buf = alt_buf(state); + int *next_buflen = alt_buflen(state), last_buflen; int in_len = *buflen + req->nbytes, to_hash; u32 *desc; int src_nents, mapped_nents, sec4_sg_bytes, sec4_sg_src_index; @@ -771,7 +795,7 @@ static int ahash_update_ctx(struct ahash_request *req) cpu_to_caam32(SEC4_SG_LEN_FIN); } - state->current_buf = !state->current_buf; + switch_buf(state); desc = edesc->hw_desc; @@ -829,10 +853,9 @@ static int ahash_final_ctx(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; - int buflen = state->current_buf ? state->buflen_1 : state->buflen_0; - int last_buflen = state->current_buf ? state->buflen_0 : - state->buflen_1; + u8 *buf = current_buf(state); + int buflen = *current_buflen(state); + int last_buflen = *alt_buflen(state); u32 *desc; int sec4_sg_bytes, sec4_sg_src_index; int digestsize = crypto_ahash_digestsize(ahash); @@ -908,10 +931,9 @@ static int ahash_finup_ctx(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; - int buflen = state->current_buf ? state->buflen_1 : state->buflen_0; - int last_buflen = state->current_buf ? state->buflen_0 : - state->buflen_1; + u8 *buf = current_buf(state); + int buflen = *current_buflen(state); + int last_buflen = *alt_buflen(state); u32 *desc; int sec4_sg_src_index; int src_nents, mapped_nents; @@ -1075,8 +1097,8 @@ static int ahash_final_no_ctx(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; - int buflen = state->current_buf ? state->buflen_1 : state->buflen_0; + u8 *buf = current_buf(state); + int buflen = *current_buflen(state); u32 *desc; int digestsize = crypto_ahash_digestsize(ahash); struct ahash_edesc *edesc; @@ -1136,11 +1158,10 @@ static int ahash_update_no_ctx(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; - int *buflen = state->current_buf ? &state->buflen_1 : &state->buflen_0; - u8 *next_buf = state->current_buf ? state->buf_0 : state->buf_1; - int *next_buflen = state->current_buf ? &state->buflen_0 : - &state->buflen_1; + u8 *buf = current_buf(state); + int *buflen = current_buflen(state); + u8 *next_buf = alt_buf(state); + int *next_buflen = alt_buflen(state); int in_len = *buflen + req->nbytes, to_hash; int sec4_sg_bytes, src_nents, mapped_nents; struct ahash_edesc *edesc; @@ -1200,7 +1221,7 @@ static int ahash_update_no_ctx(struct ahash_request *req) *next_buflen, 0); } - state->current_buf = !state->current_buf; + switch_buf(state); desc = edesc->hw_desc; @@ -1263,10 +1284,9 @@ static int ahash_finup_no_ctx(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *buf = state->current_buf ? state->buf_1 : state->buf_0; - int buflen = state->current_buf ? state->buflen_1 : state->buflen_0; - int last_buflen = state->current_buf ? state->buflen_0 : - state->buflen_1; + u8 *buf = current_buf(state); + int buflen = *current_buflen(state); + int last_buflen = *alt_buflen(state); u32 *desc; int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents; int digestsize = crypto_ahash_digestsize(ahash); @@ -1356,9 +1376,8 @@ static int ahash_update_first(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *next_buf = state->current_buf ? state->buf_1 : state->buf_0; - int *next_buflen = state->current_buf ? - &state->buflen_1 : &state->buflen_0; + u8 *next_buf = current_buf(state); + int *next_buflen = current_buflen(state); int to_hash; u32 *desc; int src_nents, mapped_nents; From 944c3d4dca34403e802287a1e7e9d02c06dce0d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 10 Feb 2017 14:07:25 +0200 Subject: [PATCH 131/142] crypto: caam - fix state buffer DMA (un)mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we register the DMA API debug notification chain to receive platform bus events: dma_debug_add_bus(&platform_bus_type); we start receiving warnings after a simple test like "modprobe caam_jr && modprobe caamhash && modprobe -r caamhash && modprobe -r caam_jr": platform ffe301000.jr: DMA-API: device driver has pending DMA allocations while released from device [count=1938] One of leaked entries details: [device address=0x0000000173fda090] [size=63 bytes] [mapped with DMA_TO_DEVICE] [mapped as single] It turns out there are several issues with handling buf_dma (mapping of buffer holding the previous chunk smaller than hash block size): -detection of buf_dma mapping failure occurs too late, after a job descriptor using that value has been submitted for execution -dma mapping leak - unmapping is not performed in all places: for e.g. in ahash_export or in most ahash_fin* callbacks (due to current back-to-back implementation of buf_dma unmapping/mapping) Fix these by: -calling dma_mapping_error() on buf_dma right after the mapping and providing an error code if needed -unmapping buf_dma during the "job done" (ahash_done_*) callbacks Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamhash.c | 105 ++++++++++++++++----------------- 1 file changed, 51 insertions(+), 54 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index b37d555a80d0..da4f94eab3da 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -194,36 +194,27 @@ static inline dma_addr_t map_seq_out_ptr_result(u32 *desc, struct device *jrdev, return dst_dma; } -/* Map current buffer in state and put it in link table */ -static inline dma_addr_t buf_map_to_sec4_sg(struct device *jrdev, - struct sec4_sg_entry *sec4_sg, - u8 *buf, int buflen) +/* Map current buffer in state (if length > 0) and put it in link table */ +static inline int buf_map_to_sec4_sg(struct device *jrdev, + struct sec4_sg_entry *sec4_sg, + struct caam_hash_state *state) { - dma_addr_t buf_dma; + int buflen = *current_buflen(state); - buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE); - dma_to_sec4_sg_one(sec4_sg, buf_dma, buflen, 0); + if (!buflen) + return 0; - return buf_dma; -} + state->buf_dma = dma_map_single(jrdev, current_buf(state), buflen, + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, state->buf_dma)) { + dev_err(jrdev, "unable to map buf\n"); + state->buf_dma = 0; + return -ENOMEM; + } -/* - * Only put buffer in link table if it contains data, which is possible, - * since a buffer has previously been used, and needs to be unmapped, - */ -static inline dma_addr_t -try_buf_map_to_sec4_sg(struct device *jrdev, struct sec4_sg_entry *sec4_sg, - u8 *buf, dma_addr_t buf_dma, int buflen, - int last_buflen) -{ - if (buf_dma && !dma_mapping_error(jrdev, buf_dma)) - dma_unmap_single(jrdev, buf_dma, last_buflen, DMA_TO_DEVICE); - if (buflen) - buf_dma = buf_map_to_sec4_sg(jrdev, sec4_sg, buf, buflen); - else - buf_dma = 0; + dma_to_sec4_sg_one(sec4_sg, state->buf_dma, buflen, 0); - return buf_dma; + return 0; } /* Map state->caam_ctx, and add it to link table */ @@ -491,6 +482,8 @@ static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc, struct ahash_request *req, int dst_len) { + struct caam_hash_state *state = ahash_request_ctx(req); + if (edesc->src_nents) dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); if (edesc->dst_dma) @@ -499,6 +492,12 @@ static inline void ahash_unmap(struct device *dev, if (edesc->sec4_sg_bytes) dma_unmap_single(dev, edesc->sec4_sg_dma, edesc->sec4_sg_bytes, DMA_TO_DEVICE); + + if (state->buf_dma) { + dma_unmap_single(dev, state->buf_dma, *current_buflen(state), + DMA_TO_DEVICE); + state->buf_dma = 0; + } } static inline void ahash_unmap_ctx(struct device *dev, @@ -557,8 +556,8 @@ static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); -#ifdef DEBUG struct caam_hash_state *state = ahash_request_ctx(req); +#ifdef DEBUG int digestsize = crypto_ahash_digestsize(ahash); dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); @@ -569,6 +568,7 @@ static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, caam_jr_strstatus(jrdev, err); ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL); + switch_buf(state); kfree(edesc); #ifdef DEBUG @@ -625,8 +625,8 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); -#ifdef DEBUG struct caam_hash_state *state = ahash_request_ctx(req); +#ifdef DEBUG int digestsize = crypto_ahash_digestsize(ahash); dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); @@ -637,6 +637,7 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, caam_jr_strstatus(jrdev, err); ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE); + switch_buf(state); kfree(edesc); #ifdef DEBUG @@ -777,10 +778,9 @@ static int ahash_update_ctx(struct ahash_request *req) if (ret) goto unmap_ctx; - state->buf_dma = try_buf_map_to_sec4_sg(jrdev, - edesc->sec4_sg + 1, - buf, state->buf_dma, - *buflen, last_buflen); + ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, state); + if (ret) + goto unmap_ctx; if (mapped_nents) { sg_to_sec4_sg_last(req->src, mapped_nents, @@ -795,8 +795,6 @@ static int ahash_update_ctx(struct ahash_request *req) cpu_to_caam32(SEC4_SG_LEN_FIN); } - switch_buf(state); - desc = edesc->hw_desc; edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, @@ -853,9 +851,7 @@ static int ahash_final_ctx(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *buf = current_buf(state); int buflen = *current_buflen(state); - int last_buflen = *alt_buflen(state); u32 *desc; int sec4_sg_bytes, sec4_sg_src_index; int digestsize = crypto_ahash_digestsize(ahash); @@ -882,9 +878,10 @@ static int ahash_final_ctx(struct ahash_request *req) if (ret) goto unmap_ctx; - state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, - buf, state->buf_dma, buflen, - last_buflen); + ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, state); + if (ret) + goto unmap_ctx; + (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= cpu_to_caam32(SEC4_SG_LEN_FIN); @@ -931,9 +928,7 @@ static int ahash_finup_ctx(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *buf = current_buf(state); int buflen = *current_buflen(state); - int last_buflen = *alt_buflen(state); u32 *desc; int sec4_sg_src_index; int src_nents, mapped_nents; @@ -978,9 +973,9 @@ static int ahash_finup_ctx(struct ahash_request *req) if (ret) goto unmap_ctx; - state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, - buf, state->buf_dma, buflen, - last_buflen); + ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, state); + if (ret) + goto unmap_ctx; ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, sec4_sg_src_index, ctx->ctx_len + buflen, @@ -1016,6 +1011,7 @@ static int ahash_digest(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; @@ -1025,6 +1021,8 @@ static int ahash_digest(struct ahash_request *req) struct ahash_edesc *edesc; int ret; + state->buf_dma = 0; + src_nents = sg_nents_for_len(req->src, req->nbytes); if (src_nents < 0) { dev_err(jrdev, "Invalid number of src SG.\n"); @@ -1210,8 +1208,10 @@ static int ahash_update_no_ctx(struct ahash_request *req) edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->dst_dma = 0; - state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, - buf, *buflen); + ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, state); + if (ret) + goto unmap_ctx; + sg_to_sec4_sg_last(req->src, mapped_nents, edesc->sec4_sg + 1, 0); @@ -1221,8 +1221,6 @@ static int ahash_update_no_ctx(struct ahash_request *req) *next_buflen, 0); } - switch_buf(state); - desc = edesc->hw_desc; edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, @@ -1284,9 +1282,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *buf = current_buf(state); int buflen = *current_buflen(state); - int last_buflen = *alt_buflen(state); u32 *desc; int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents; int digestsize = crypto_ahash_digestsize(ahash); @@ -1328,9 +1324,9 @@ static int ahash_finup_no_ctx(struct ahash_request *req) edesc->src_nents = src_nents; edesc->sec4_sg_bytes = sec4_sg_bytes; - state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, buf, - state->buf_dma, buflen, - last_buflen); + ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, state); + if (ret) + goto unmap; ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 1, buflen, req->nbytes); @@ -1376,8 +1372,8 @@ static int ahash_update_first(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *next_buf = current_buf(state); - int *next_buflen = current_buflen(state); + u8 *next_buf = alt_buf(state); + int *next_buflen = alt_buflen(state); int to_hash; u32 *desc; int src_nents, mapped_nents; @@ -1459,6 +1455,7 @@ static int ahash_update_first(struct ahash_request *req) state->final = ahash_final_no_ctx; scatterwalk_map_and_copy(next_buf, req->src, 0, req->nbytes, 0); + switch_buf(state); } #ifdef DEBUG print_hex_dump(KERN_ERR, "next buf@"__stringify(__LINE__)": ", From 5338ad7065c0a4cb55e949638b1fdba6b09dc5a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 11 Feb 2017 19:25:21 +0000 Subject: [PATCH 132/142] crypto: ccm - honour alignmask of subordinate MAC cipher The CCM driver was recently updated to defer the MAC part of the algorithm to a dedicated crypto transform, and a template for instantiating such transforms was added at the same time. However, this new cbcmac template fails to take the alignmask of the encapsulated cipher into account, which may result in buffer addresses being passed down that are not sufficiently aligned. So update the code to ensure that the digest buffer in the desc ctx appears at a sufficiently aligned offset, and tweak the code so that all calls to crypto_cipher_encrypt_one() operate on this buffer exclusively. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/ccm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/crypto/ccm.c b/crypto/ccm.c index 52e307807ff6..24c26ab052ca 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -58,7 +58,6 @@ struct cbcmac_tfm_ctx { struct cbcmac_desc_ctx { unsigned int len; - u8 dg[]; }; static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx( @@ -868,9 +867,10 @@ static int crypto_cbcmac_digest_init(struct shash_desc *pdesc) { struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc); int bs = crypto_shash_digestsize(pdesc->tfm); + u8 *dg = (u8 *)ctx + crypto_shash_descsize(pdesc->tfm) - bs; ctx->len = 0; - memset(ctx->dg, 0, bs); + memset(dg, 0, bs); return 0; } @@ -883,17 +883,18 @@ static int crypto_cbcmac_digest_update(struct shash_desc *pdesc, const u8 *p, struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc); struct crypto_cipher *tfm = tctx->child; int bs = crypto_shash_digestsize(parent); + u8 *dg = (u8 *)ctx + crypto_shash_descsize(parent) - bs; while (len > 0) { unsigned int l = min(len, bs - ctx->len); - crypto_xor(ctx->dg + ctx->len, p, l); + crypto_xor(dg + ctx->len, p, l); ctx->len +=l; len -= l; p += l; if (ctx->len == bs) { - crypto_cipher_encrypt_one(tfm, ctx->dg, ctx->dg); + crypto_cipher_encrypt_one(tfm, dg, dg); ctx->len = 0; } } @@ -908,12 +909,12 @@ static int crypto_cbcmac_digest_final(struct shash_desc *pdesc, u8 *out) struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc); struct crypto_cipher *tfm = tctx->child; int bs = crypto_shash_digestsize(parent); + u8 *dg = (u8 *)ctx + crypto_shash_descsize(parent) - bs; if (ctx->len) - crypto_cipher_encrypt_one(tfm, out, ctx->dg); - else - memcpy(out, ctx->dg, bs); + crypto_cipher_encrypt_one(tfm, dg, dg); + memcpy(out, dg, bs); return 0; } @@ -969,7 +970,8 @@ static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.base.cra_blocksize = 1; inst->alg.digestsize = alg->cra_blocksize; - inst->alg.descsize = sizeof(struct cbcmac_desc_ctx) + + inst->alg.descsize = ALIGN(sizeof(struct cbcmac_desc_ctx), + alg->cra_alignmask + 1) + alg->cra_blocksize; inst->alg.base.cra_ctxsize = sizeof(struct cbcmac_tfm_ctx); From 5ba8e2a05ed6695f38f3961ca8cb5cfc1063a842 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 11 Feb 2017 19:25:22 +0000 Subject: [PATCH 133/142] crypto: ccm - drop unnecessary minimum 32-bit alignment The CCM driver forces 32-bit alignment even if the underlying ciphers don't care about alignment. This is because crypto_xor() used to require this, but since this is no longer the case, drop the hardcoded minimum of 32 bits. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/ccm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crypto/ccm.c b/crypto/ccm.c index 24c26ab052ca..442848807a52 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -525,8 +525,7 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, ctr->base.cra_priority) / 2; inst->alg.base.cra_blocksize = 1; inst->alg.base.cra_alignmask = mac->base.cra_alignmask | - ctr->base.cra_alignmask | - (__alignof__(u32) - 1); + ctr->base.cra_alignmask; inst->alg.ivsize = 16; inst->alg.chunksize = crypto_skcipher_alg_chunksize(ctr); inst->alg.maxauthsize = 16; From a9f5a62a112b6cedcb0d13925a3b6e728caac002 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Feb 2017 12:04:08 +0000 Subject: [PATCH 134/142] hwrng: omap - update Kconfig help description omap-rng also supports Marvell Armada 7k/8k SoCs, but no mention of this is made in the help text, despite the dependency being added. Explicitly mention these SoCs in the help description so people know that it covers more than just TI SoCs. Fixes: 383212425c92 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K") Signed-off-by: Russell King Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index ceff2fc524b1..0cafe08919c9 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -172,8 +172,8 @@ config HW_RANDOM_OMAP default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number - Generator hardware found on OMAP16xx, OMAP2/3/4/5 and AM33xx/AM43xx - multimedia processors. + Generator hardware found on OMAP16xx, OMAP2/3/4/5, AM33xx/AM43xx + multimedia processors, and Marvell Armada 7k/8k SoCs. To compile this driver as a module, choose M here: the module will be called omap-rng. From 36b05efc1b5b62054232d5b8453782ee7ca4efaa Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Tue, 14 Feb 2017 08:21:45 +0200 Subject: [PATCH 135/142] crypto: doc - fix typo Fix a single letter typo in api-skcipher.rst. Signed-off-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu --- Documentation/crypto/api-skcipher.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/crypto/api-skcipher.rst b/Documentation/crypto/api-skcipher.rst index b20028a361a9..4eec4a93f7e3 100644 --- a/Documentation/crypto/api-skcipher.rst +++ b/Documentation/crypto/api-skcipher.rst @@ -59,4 +59,4 @@ Synchronous Block Cipher API - Deprecated :doc: Synchronous Block Cipher API .. kernel-doc:: include/linux/crypto.h - :functions: crypto_alloc_blkcipher rypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv + :functions: crypto_alloc_blkcipher crypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv From 6ecb7d62bf8ef34325fa55e8cf167610ed7abe83 Mon Sep 17 00:00:00 2001 From: George Cherian Date: Tue, 14 Feb 2017 09:23:17 +0000 Subject: [PATCH 136/142] crypto: cavium - cpt_bind_vq_to_grp could return an error code cpt_bind_vq_to_grp() could return an error code. However, it currently returns a u8. This produce the static checker warning. drivers/crypto/cavium/cpt/cptpf_mbox.c:70 cpt_bind_vq_to_grp() warn: signedness bug returning '(-22)' Reported-by: Dan Carpenter Signed-off-by: George Cherian Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/cptpf_mbox.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/cavium/cpt/cptpf_mbox.c b/drivers/crypto/cavium/cpt/cptpf_mbox.c index 5818b415e814..20f2c6ee46a5 100644 --- a/drivers/crypto/cavium/cpt/cptpf_mbox.c +++ b/drivers/crypto/cavium/cpt/cptpf_mbox.c @@ -59,7 +59,7 @@ static void cpt_cfg_vq_priority(struct cpt_device *cpt, int vf, u32 pri) cpt_write_csr64(cpt->reg_base, CPTX_PF_QX_CTL(0, vf), pf_qx_ctl.u); } -static u8 cpt_bind_vq_to_grp(struct cpt_device *cpt, u8 q, u8 grp) +static int cpt_bind_vq_to_grp(struct cpt_device *cpt, u8 q, u8 grp) { struct microcode *mcode = cpt->mcode; union cptx_pf_qx_ctl pf_qx_ctl; @@ -90,7 +90,7 @@ static void cpt_handle_mbox_intr(struct cpt_device *cpt, int vf) { struct cpt_vf_info *vfx = &cpt->vfinfo[vf]; struct cpt_mbox mbx = {}; - u8 vftype; + int vftype; struct device *dev = &cpt->pdev->dev; /* * MBOX[0] contains msg From eafa26696a648f974942e12f42460abff80a646d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2017 18:07:31 +0100 Subject: [PATCH 137/142] crypto: cavium - fix Kconfig dependencies The driver fails to build if MSI support is disabled: In file included from /git/arm-soc/drivers/crypto/cavium/cpt/cptpf_main.c:18:0: drivers/crypto/cavium/cpt/cptpf.h:57:20: error: array type has incomplete element type 'struct msix_entry' struct msix_entry msix_entries[CPT_PF_MSIX_VECTORS]; ^~~~~~~~~~~~ drivers/crypto/cavium/cpt/cptpf_main.c: In function 'cpt_enable_msix': drivers/crypto/cavium/cpt/cptpf_main.c:344:8: error: implicit declaration of function 'pci_enable_msix';did you mean 'cpt_enable_msix'? [-Werror=implicit-function-declaration] On the other hand, it doesn't seem to have any build dependency on ARCH_THUNDER, so let's allow compile-testing to catch this kind of problem more easily. The 64-bit dependency is needed for the use of readq/writeq. Signed-off-by: Arnd Bergmann Acked-by: David Daney Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/cavium/cpt/Kconfig b/drivers/crypto/cavium/cpt/Kconfig index 247f1cbbefc1..cbd51b1aa046 100644 --- a/drivers/crypto/cavium/cpt/Kconfig +++ b/drivers/crypto/cavium/cpt/Kconfig @@ -7,7 +7,8 @@ config CRYPTO_DEV_CPT config CAVIUM_CPT tristate "Cavium Cryptographic Accelerator driver" - depends on ARCH_THUNDER + depends on ARCH_THUNDER || COMPILE_TEST + depends on PCI_MSI && 64BIT select CRYPTO_DEV_CPT help Support for Cavium CPT block found in octeon-tx series of From dcd36c436c9c1c5052be6bc2a9200e4e6dfbd267 Mon Sep 17 00:00:00 2001 From: Rob Rice Date: Tue, 14 Feb 2017 12:45:52 -0500 Subject: [PATCH 138/142] crypto: brcm - Avoid double free in ahash_finup() In Broadcom SPU driver, in case where incremental hash is done in software in ahash_finup(), tmpbuf was freed twice. Reported-by: Dan Carpenter Signed-off-by: Rob Rice Signed-off-by: Herbert Xu --- drivers/crypto/bcm/cipher.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index a654a01ff2ba..cc0d5b98006e 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -2331,7 +2331,6 @@ static int ahash_finup(struct ahash_request *req) /* Call synchronous update */ ret = crypto_shash_finup(ctx->shash, tmpbuf, req->nbytes, req->result); - kfree(tmpbuf); } else { /* Otherwise call the internal function which uses SPU hw */ return __ahash_finup(req); From f4f228bff3c98990537ebd4bcab6730ae02d6e48 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Feb 2017 08:18:41 +0100 Subject: [PATCH 139/142] crypto: cavium - remove dead MSI-X related define Signed-off-by: Christoph Hellwig Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/cpt_common.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/crypto/cavium/cpt/cpt_common.h b/drivers/crypto/cavium/cpt/cpt_common.h index ede612f306d3..225078d03773 100644 --- a/drivers/crypto/cavium/cpt/cpt_common.h +++ b/drivers/crypto/cavium/cpt/cpt_common.h @@ -20,12 +20,10 @@ #define CPT_81XX_PCI_VF_DEVICE_ID 0xa041 /* flags to indicate the features supported */ -#define CPT_FLAG_MSIX_ENABLED BIT(0) #define CPT_FLAG_SRIOV_ENABLED BIT(1) #define CPT_FLAG_VF_DRIVER BIT(2) #define CPT_FLAG_DEVICE_READY BIT(3) -#define cpt_msix_enabled(cpt) ((cpt)->flags & CPT_FLAG_MSIX_ENABLED) #define cpt_sriov_enabled(cpt) ((cpt)->flags & CPT_FLAG_SRIOV_ENABLED) #define cpt_vf_driver(cpt) ((cpt)->flags & CPT_FLAG_VF_DRIVER) #define cpt_device_ready(cpt) ((cpt)->flags & CPT_FLAG_DEVICE_READY) From 613844e811a87ddbc646bd30e724c34472540296 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Feb 2017 08:18:42 +0100 Subject: [PATCH 140/142] crypto: cavium - switch to pci_alloc_irq_vectors pci_enable_msix has been long deprecated, but this driver adds a new instance. Convert it to pci_alloc_irq_vectors and greatly simplify the code. Signed-off-by: Christoph Hellwig Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/cptpf.h | 5 --- drivers/crypto/cavium/cpt/cptpf_main.c | 58 +++++--------------------- 2 files changed, 10 insertions(+), 53 deletions(-) diff --git a/drivers/crypto/cavium/cpt/cptpf.h b/drivers/crypto/cavium/cpt/cptpf.h index 8a2a8e538da4..c0556c5f63c9 100644 --- a/drivers/crypto/cavium/cpt/cptpf.h +++ b/drivers/crypto/cavium/cpt/cptpf.h @@ -51,11 +51,6 @@ struct cpt_device { struct cpt_vf_info vfinfo[CPT_MAX_VF_NUM]; /* Per VF info */ void __iomem *reg_base; /* Register start address */ - /* MSI-X */ - u8 num_vec; - bool msix_enabled; - struct msix_entry msix_entries[CPT_PF_MSIX_VECTORS]; - bool irq_allocated[CPT_PF_MSIX_VECTORS]; struct pci_dev *pdev; /* pci device handle */ struct microcode mcode[CPT_MAX_CORE_GROUPS]; diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c index 682d57a11a75..4119c40e7c4b 100644 --- a/drivers/crypto/cavium/cpt/cptpf_main.c +++ b/drivers/crypto/cavium/cpt/cptpf_main.c @@ -332,26 +332,6 @@ static int cpt_ucode_load(struct cpt_device *cpt) return ret; } -static int cpt_enable_msix(struct cpt_device *cpt) -{ - int i, ret; - - cpt->num_vec = CPT_PF_MSIX_VECTORS; - - for (i = 0; i < cpt->num_vec; i++) - cpt->msix_entries[i].entry = i; - - ret = pci_enable_msix(cpt->pdev, cpt->msix_entries, cpt->num_vec); - if (ret) { - dev_err(&cpt->pdev->dev, "Request for #%d msix vectors failed\n", - cpt->num_vec); - return ret; - } - - cpt->msix_enabled = 1; - return 0; -} - static irqreturn_t cpt_mbx0_intr_handler(int irq, void *cpt_irq) { struct cpt_device *cpt = (struct cpt_device *)cpt_irq; @@ -361,26 +341,6 @@ static irqreturn_t cpt_mbx0_intr_handler(int irq, void *cpt_irq) return IRQ_HANDLED; } -static void cpt_disable_msix(struct cpt_device *cpt) -{ - if (cpt->msix_enabled) { - pci_disable_msix(cpt->pdev); - cpt->msix_enabled = 0; - cpt->num_vec = 0; - } -} - -static void cpt_free_all_interrupts(struct cpt_device *cpt) -{ - int irq; - - for (irq = 0; irq < cpt->num_vec; irq++) { - if (cpt->irq_allocated[irq]) - free_irq(cpt->msix_entries[irq].vector, cpt); - cpt->irq_allocated[irq] = false; - } -} - static void cpt_reset(struct cpt_device *cpt) { cpt_write_csr64(cpt->reg_base, CPTX_PF_RESET(0), 1); @@ -506,32 +466,34 @@ static int cpt_register_interrupts(struct cpt_device *cpt) struct device *dev = &cpt->pdev->dev; /* Enable MSI-X */ - ret = cpt_enable_msix(cpt); - if (ret) + ret = pci_alloc_irq_vectors(cpt->pdev, CPT_PF_MSIX_VECTORS, + CPT_PF_MSIX_VECTORS, PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&cpt->pdev->dev, "Request for #%d msix vectors failed\n", + CPT_PF_MSIX_VECTORS); return ret; + } /* Register mailbox interrupt handlers */ - ret = request_irq(cpt->msix_entries[CPT_PF_INT_VEC_E_MBOXX(0)].vector, + ret = request_irq(pci_irq_vector(cpt->pdev, CPT_PF_INT_VEC_E_MBOXX(0)), cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt); if (ret) goto fail; - cpt->irq_allocated[CPT_PF_INT_VEC_E_MBOXX(0)] = true; - /* Enable mailbox interrupt */ cpt_enable_mbox_interrupts(cpt); return 0; fail: dev_err(dev, "Request irq failed\n"); - cpt_free_all_interrupts(cpt); + pci_disable_msix(cpt->pdev); return ret; } static void cpt_unregister_interrupts(struct cpt_device *cpt) { - cpt_free_all_interrupts(cpt); - cpt_disable_msix(cpt); + free_irq(pci_irq_vector(cpt->pdev, CPT_PF_INT_VEC_E_MBOXX(0)), cpt); + pci_disable_msix(cpt->pdev); } static int cpt_sriov_init(struct cpt_device *cpt, int num_vfs) From 15c0b9edcc41fe8fddcd07d6b58ee15e6554d17e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Feb 2017 08:18:43 +0100 Subject: [PATCH 141/142] crypto: cavium - switch to pci_alloc_irq_vectors pci_enable_msix has been long deprecated, but this driver adds a new instance. Convert it to pci_alloc_irq_vectors and greatly simplify the code, and make sure the prope code properly unwinds. Signed-off-by: Christoph Hellwig Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/cptvf.h | 3 - drivers/crypto/cavium/cpt/cptvf_main.c | 203 ++++++++----------------- 2 files changed, 65 insertions(+), 141 deletions(-) diff --git a/drivers/crypto/cavium/cpt/cptvf.h b/drivers/crypto/cavium/cpt/cptvf.h index 1cc04aa611e4..0a835a07d4f2 100644 --- a/drivers/crypto/cavium/cpt/cptvf.h +++ b/drivers/crypto/cavium/cpt/cptvf.h @@ -107,9 +107,6 @@ struct cpt_vf { void __iomem *reg_base; /* Register start address */ void *wqe_info; /* BH worker info */ /* MSI-X */ - bool msix_enabled; - struct msix_entry msix_entries[CPT_VF_MSIX_VECTORS]; - bool irq_allocated[CPT_VF_MSIX_VECTORS]; cpumask_var_t affinity_mask[CPT_VF_MSIX_VECTORS]; /* Command and Pending queues */ u32 qsize; diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c index 527bdc3c2969..aac2966ff8d9 100644 --- a/drivers/crypto/cavium/cpt/cptvf_main.c +++ b/drivers/crypto/cavium/cpt/cptvf_main.c @@ -357,48 +357,10 @@ setup_pqfail: return ret; } -static void cptvf_disable_msix(struct cpt_vf *cptvf) +static void cptvf_free_irq_affinity(struct cpt_vf *cptvf, int vec) { - if (cptvf->msix_enabled) { - pci_disable_msix(cptvf->pdev); - cptvf->msix_enabled = 0; - } -} - -static int cptvf_enable_msix(struct cpt_vf *cptvf) -{ - int i, ret; - - for (i = 0; i < CPT_VF_MSIX_VECTORS; i++) - cptvf->msix_entries[i].entry = i; - - ret = pci_enable_msix(cptvf->pdev, cptvf->msix_entries, - CPT_VF_MSIX_VECTORS); - if (ret) { - dev_err(&cptvf->pdev->dev, "Request for #%d msix vectors failed\n", - CPT_VF_MSIX_VECTORS); - return ret; - } - - cptvf->msix_enabled = 1; - /* Mark MSIX enabled */ - cptvf->flags |= CPT_FLAG_MSIX_ENABLED; - - return 0; -} - -static void cptvf_free_all_interrupts(struct cpt_vf *cptvf) -{ - int irq; - - for (irq = 0; irq < CPT_VF_MSIX_VECTORS; irq++) { - if (cptvf->irq_allocated[irq]) - irq_set_affinity_hint(cptvf->msix_entries[irq].vector, - NULL); - free_cpumask_var(cptvf->affinity_mask[irq]); - free_irq(cptvf->msix_entries[irq].vector, cptvf); - cptvf->irq_allocated[irq] = false; - } + irq_set_affinity_hint(pci_irq_vector(cptvf->pdev, vec), NULL); + free_cpumask_var(cptvf->affinity_mask[vec]); } static void cptvf_write_vq_ctl(struct cpt_vf *cptvf, bool val) @@ -650,85 +612,23 @@ static irqreturn_t cptvf_done_intr_handler(int irq, void *cptvf_irq) return IRQ_HANDLED; } -static int cptvf_register_misc_intr(struct cpt_vf *cptvf) +static void cptvf_set_irq_affinity(struct cpt_vf *cptvf, int vec) { struct pci_dev *pdev = cptvf->pdev; - int ret; + int cpu; - /* Register misc interrupt handlers */ - ret = request_irq(cptvf->msix_entries[CPT_VF_INT_VEC_E_MISC].vector, - cptvf_misc_intr_handler, 0, "CPT VF misc intr", - cptvf); - if (ret) - goto fail; - - cptvf->irq_allocated[CPT_VF_INT_VEC_E_MISC] = true; - - /* Enable mailbox interrupt */ - cptvf_enable_mbox_interrupts(cptvf); - cptvf_enable_swerr_interrupts(cptvf); - - return 0; - -fail: - dev_err(&pdev->dev, "Request misc irq failed"); - cptvf_free_all_interrupts(cptvf); - return ret; -} - -static int cptvf_register_done_intr(struct cpt_vf *cptvf) -{ - struct pci_dev *pdev = cptvf->pdev; - int ret; - - /* Register DONE interrupt handlers */ - ret = request_irq(cptvf->msix_entries[CPT_VF_INT_VEC_E_DONE].vector, - cptvf_done_intr_handler, 0, "CPT VF done intr", - cptvf); - if (ret) - goto fail; - - cptvf->irq_allocated[CPT_VF_INT_VEC_E_DONE] = true; - - /* Enable mailbox interrupt */ - cptvf_enable_done_interrupts(cptvf); - return 0; - -fail: - dev_err(&pdev->dev, "Request done irq failed\n"); - cptvf_free_all_interrupts(cptvf); - return ret; -} - -static void cptvf_unregister_interrupts(struct cpt_vf *cptvf) -{ - cptvf_free_all_interrupts(cptvf); - cptvf_disable_msix(cptvf); -} - -static void cptvf_set_irq_affinity(struct cpt_vf *cptvf) -{ - struct pci_dev *pdev = cptvf->pdev; - int vec, cpu; - int irqnum; - - for (vec = 0; vec < CPT_VF_MSIX_VECTORS; vec++) { - if (!cptvf->irq_allocated[vec]) - continue; - - if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec], - GFP_KERNEL)) { - dev_err(&pdev->dev, "Allocation failed for affinity_mask for VF %d", - cptvf->vfid); - return; - } - - cpu = cptvf->vfid % num_online_cpus(); - cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node), - cptvf->affinity_mask[vec]); - irqnum = cptvf->msix_entries[vec].vector; - irq_set_affinity_hint(irqnum, cptvf->affinity_mask[vec]); + if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec], + GFP_KERNEL)) { + dev_err(&pdev->dev, "Allocation failed for affinity_mask for VF %d", + cptvf->vfid); + return; } + + cpu = cptvf->vfid % num_online_cpus(); + cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node), + cptvf->affinity_mask[vec]); + irq_set_affinity_hint(pci_irq_vector(pdev, vec), + cptvf->affinity_mask[vec]); } static void cptvf_write_vq_saddr(struct cpt_vf *cptvf, u64 val) @@ -809,22 +709,32 @@ static int cptvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } cptvf->node = dev_to_node(&pdev->dev); - /* Enable MSI-X */ - err = cptvf_enable_msix(cptvf); - if (err) { - dev_err(dev, "cptvf_enable_msix() failed"); + err = pci_alloc_irq_vectors(pdev, CPT_VF_MSIX_VECTORS, + CPT_VF_MSIX_VECTORS, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "Request for #%d msix vectors failed\n", + CPT_VF_MSIX_VECTORS); goto cptvf_err_release_regions; } - /* Register mailbox interrupts */ - cptvf_register_misc_intr(cptvf); + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), + cptvf_misc_intr_handler, 0, "CPT VF misc intr", + cptvf); + if (err) { + dev_err(dev, "Request misc irq failed"); + goto cptvf_free_vectors; + } + + /* Enable mailbox interrupt */ + cptvf_enable_mbox_interrupts(cptvf); + cptvf_enable_swerr_interrupts(cptvf); /* Check ready with PF */ /* Gets chip ID / device Id from PF if ready */ err = cptvf_check_pf_ready(cptvf); if (err) { dev_err(dev, "PF not responding to READY msg"); - goto cptvf_err_release_regions; + goto cptvf_free_misc_irq; } /* CPT VF software resources initialization */ @@ -832,13 +742,13 @@ static int cptvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = cptvf_sw_init(cptvf, CPT_CMD_QLEN, CPT_NUM_QS_PER_VF); if (err) { dev_err(dev, "cptvf_sw_init() failed"); - goto cptvf_err_release_regions; + goto cptvf_free_misc_irq; } /* Convey VQ LEN to PF */ err = cptvf_send_vq_size_msg(cptvf); if (err) { dev_err(dev, "PF not responding to QLEN msg"); - goto cptvf_err_release_regions; + goto cptvf_free_misc_irq; } /* CPT VF device initialization */ @@ -848,37 +758,50 @@ static int cptvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = cptvf_send_vf_to_grp_msg(cptvf); if (err) { dev_err(dev, "PF not responding to VF_GRP msg"); - goto cptvf_err_release_regions; + goto cptvf_free_misc_irq; } cptvf->priority = 1; err = cptvf_send_vf_priority_msg(cptvf); if (err) { dev_err(dev, "PF not responding to VF_PRIO msg"); - goto cptvf_err_release_regions; + goto cptvf_free_misc_irq; } - /* Register DONE interrupts */ - err = cptvf_register_done_intr(cptvf); - if (err) - goto cptvf_err_release_regions; + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), + cptvf_done_intr_handler, 0, "CPT VF done intr", + cptvf); + if (err) { + dev_err(dev, "Request done irq failed\n"); + goto cptvf_free_misc_irq; + } + + /* Enable mailbox interrupt */ + cptvf_enable_done_interrupts(cptvf); /* Set irq affinity masks */ - cptvf_set_irq_affinity(cptvf); - /* Convey UP to PF */ + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + err = cptvf_send_vf_up(cptvf); if (err) { dev_err(dev, "PF not responding to UP msg"); - goto cptvf_up_fail; + goto cptvf_free_irq_affinity; } err = cvm_crypto_init(cptvf); if (err) { dev_err(dev, "Algorithm register failed\n"); - goto cptvf_up_fail; + goto cptvf_free_irq_affinity; } return 0; -cptvf_up_fail: - cptvf_unregister_interrupts(cptvf); +cptvf_free_irq_affinity: + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); +cptvf_free_misc_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); +cptvf_free_vectors: + pci_free_irq_vectors(cptvf->pdev); cptvf_err_release_regions: pci_release_regions(pdev); cptvf_err_disable_device: @@ -899,7 +822,11 @@ static void cptvf_remove(struct pci_dev *pdev) if (cptvf_send_vf_down(cptvf)) { dev_err(&pdev->dev, "PF not responding to DOWN msg"); } else { - cptvf_unregister_interrupts(cptvf); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); + pci_free_irq_vectors(cptvf->pdev); cptvf_sw_cleanup(cptvf); pci_set_drvdata(pdev, NULL); pci_release_regions(pdev); From 12cb3a1c4184f891d965d1f39f8cfcc9ef617647 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 23 Feb 2017 08:38:26 +0100 Subject: [PATCH 142/142] crypto: xts - Add ECB dependency Since the commit f1c131b45410a202eb45cc55980a7a9e4e4b4f40 crypto: xts - Convert to skcipher the XTS mode is based on ECB, so the mode must select ECB otherwise it can fail to initialize. Signed-off-by: Milan Broz Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/Kconfig b/crypto/Kconfig index 5a51b877277e..f37e9cca50e1 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -375,6 +375,7 @@ config CRYPTO_XTS select CRYPTO_BLKCIPHER select CRYPTO_MANAGER select CRYPTO_GF128MUL + select CRYPTO_ECB help XTS: IEEE1619/D16 narrow block cipher use with aes-xts-plain, key size 256, 384 or 512 bits. This implementation currently