1
0
Fork 0

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:
 "API:
   - Try to catch hash output overrun in testmgr
   - Introduce walksize attribute for batched walking
   - Make crypto_xor() and crypto_inc() alignment agnostic

  Algorithms:
   - Add time-invariant AES algorithm
   - Add standalone CBCMAC algorithm

  Drivers:
   - Add NEON acclerated chacha20 on ARM/ARM64
   - Expose AES-CTR as synchronous skcipher on ARM64
   - Add scalar AES implementation on ARM64
   - Improve scalar AES implementation on ARM
   - Improve NEON AES implementation on ARM/ARM64
   - Merge CRC32 and PMULL instruction based drivers on ARM64
   - Add NEON acclerated CBCMAC/CMAC/XCBC AES on ARM64
   - Add IPsec AUTHENC implementation in atmel
   - Add Support for Octeon-tx CPT Engine
   - Add Broadcom SPU driver
   - Add MediaTek driver"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (142 commits)
  crypto: xts - Add ECB dependency
  crypto: cavium - switch to pci_alloc_irq_vectors
  crypto: cavium - switch to pci_alloc_irq_vectors
  crypto: cavium - remove dead MSI-X related define
  crypto: brcm - Avoid double free in ahash_finup()
  crypto: cavium - fix Kconfig dependencies
  crypto: cavium - cpt_bind_vq_to_grp could return an error code
  crypto: doc - fix typo
  hwrng: omap - update Kconfig help description
  crypto: ccm - drop unnecessary minimum 32-bit alignment
  crypto: ccm - honour alignmask of subordinate MAC cipher
  crypto: caam - fix state buffer DMA (un)mapping
  crypto: caam - abstract ahash request double buffering
  crypto: caam - fix error path for ctx_dma mapping failure
  crypto: caam - fix DMA API leaks for multiple setkey() calls
  crypto: caam - don't dma_map key for hash algorithms
  crypto: caam - use dma_map_sg() return code
  crypto: caam - replace sg_count() with sg_nents_for_len()
  crypto: caam - check sg_count() return value
  crypto: caam - fix HW S/G in ablkcipher_giv_edesc_alloc()
  ..
hifive-unleashed-5.1
Linus Torvalds 2017-02-23 09:54:19 -08:00
commit 5bcbe22ca4
187 changed files with 26974 additions and 9589 deletions

View File

@ -14,7 +14,7 @@ Asynchronous Message Digest API
:doc: Asynchronous Message Digest API
.. kernel-doc:: include/crypto/hash.h
:functions: crypto_alloc_ahash crypto_free_ahash crypto_ahash_init crypto_ahash_digestsize crypto_ahash_reqtfm crypto_ahash_reqsize crypto_ahash_setkey crypto_ahash_finup crypto_ahash_final crypto_ahash_digest crypto_ahash_export crypto_ahash_import
:functions: crypto_alloc_ahash crypto_free_ahash crypto_ahash_init crypto_ahash_digestsize crypto_ahash_reqtfm crypto_ahash_reqsize crypto_ahash_statesize crypto_ahash_setkey crypto_ahash_finup crypto_ahash_final crypto_ahash_digest crypto_ahash_export crypto_ahash_import
Asynchronous Hash Request Handle
--------------------------------

View File

@ -59,4 +59,4 @@ Synchronous Block Cipher API - Deprecated
:doc: Synchronous Block Cipher API
.. kernel-doc:: include/linux/crypto.h
:functions: crypto_alloc_blkcipher rypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv
:functions: crypto_alloc_blkcipher crypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv

View File

@ -0,0 +1,22 @@
The Broadcom Secure Processing Unit (SPU) hardware supports symmetric
cryptographic offload for Broadcom SoCs. A SoC may have multiple SPU hardware
blocks.
Required properties:
- compatible: Should be one of the following:
brcm,spum-crypto - for devices with SPU-M hardware
brcm,spu2-crypto - for devices with SPU2 hardware
brcm,spu2-v2-crypto - for devices with enhanced SPU2 hardware features like SHA3
and Rabin Fingerprint support
brcm,spum-nsp-crypto - for the Northstar Plus variant of the SPU-M hardware
- reg: Should contain SPU registers location and length.
- mboxes: The mailbox channel to be used to communicate with the SPU.
Mailbox channels correspond to DMA rings on the device.
Example:
crypto@612d0000 {
compatible = "brcm,spum-crypto";
reg = <0 0x612d0000 0 0x900>;
mboxes = <&pdc0 0>;
};

View File

@ -0,0 +1,27 @@
MediaTek cryptographic accelerators
Required properties:
- compatible: Should be "mediatek,eip97-crypto"
- reg: Address and length of the register set for the device
- interrupts: Should contain the five crypto engines interrupts in numeric
order. These are global system and four descriptor rings.
- clocks: the clock used by the core
- clock-names: the names of the clock listed in the clocks property. These are
"ethif", "cryp"
- power-domains: Must contain a reference to the PM domain.
Example:
crypto: crypto@1b240000 {
compatible = "mediatek,eip97-crypto";
reg = <0 0x1b240000 0 0x20000>;
interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_LOW>,
<GIC_SPI 83 IRQ_TYPE_LEVEL_LOW>,
<GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>,
<GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>,
<GIC_SPI 97 IRQ_TYPE_LEVEL_LOW>;
clocks = <&topckgen CLK_TOP_ETHIF_SEL>,
<&ethsys CLK_ETHSYS_CRYPTO>;
clock-names = "ethif","cryp";
power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
};

View File

@ -3031,6 +3031,13 @@ W: http://www.cavium.com
S: Supported
F: drivers/net/ethernet/cavium/liquidio/
CAVIUM OCTEON-TX CRYPTO DRIVER
M: George Cherian <george.cherian@cavium.com>
L: linux-crypto@vger.kernel.org
W: http://www.cavium.com
S: Supported
F: drivers/crypto/cavium/cpt/
CC2520 IEEE-802.15.4 RADIO DRIVER
M: Varka Bhadram <varkabhadram@gmail.com>
L: linux-wpan@vger.kernel.org

View File

@ -62,35 +62,18 @@ config CRYPTO_SHA512_ARM
using optimized ARM assembler and NEON, when available.
config CRYPTO_AES_ARM
tristate "AES cipher algorithms (ARM-asm)"
depends on ARM
tristate "Scalar AES cipher for ARM"
select CRYPTO_ALGAPI
select CRYPTO_AES
help
Use optimized AES assembler routines for ARM platforms.
AES cipher algorithms (FIPS-197). AES uses the Rijndael
algorithm.
Rijndael appears to be consistently a very good performer in
both hardware and software across a wide range of computing
environments regardless of its use in feedback or non-feedback
modes. Its key setup time is excellent, and its key agility is
good. Rijndael's very low memory requirements make it very well
suited for restricted-space environments, in which it also
demonstrates excellent performance. Rijndael's operations are
among the easiest to defend against power and timing attacks.
The AES specifies three key sizes: 128, 192 and 256 bits
See <http://csrc.nist.gov/encryption/aes/> for more information.
config CRYPTO_AES_ARM_BS
tristate "Bit sliced AES using NEON instructions"
depends on KERNEL_MODE_NEON
select CRYPTO_AES_ARM
select CRYPTO_BLKCIPHER
select CRYPTO_SIMD
select CRYPTO_AES_ARM
help
Use a faster and more secure NEON based implementation of AES in CBC,
CTR and XTS modes
@ -130,4 +113,10 @@ config CRYPTO_CRC32_ARM_CE
depends on KERNEL_MODE_NEON && CRC32
select CRYPTO_HASH
config CRYPTO_CHACHA20_NEON
tristate "NEON accelerated ChaCha20 symmetric cipher"
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_CHACHA20
endif

View File

@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@ -26,8 +27,8 @@ $(warning $(ce-obj-y) $(ce-obj-m))
endif
endif
aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
@ -40,17 +41,15 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
$(call cmd,perl)
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
$(call cmd,perl)
$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
$(call cmd,perl)
.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S
.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S

File diff suppressed because it is too large Load Diff

View File

@ -169,19 +169,19 @@ ENTRY(ce_aes_ecb_encrypt)
.Lecbencloop3x:
subs r4, r4, #3
bmi .Lecbenc1x
vld1.8 {q0-q1}, [r1, :64]!
vld1.8 {q2}, [r1, :64]!
vld1.8 {q0-q1}, [r1]!
vld1.8 {q2}, [r1]!
bl aes_encrypt_3x
vst1.8 {q0-q1}, [r0, :64]!
vst1.8 {q2}, [r0, :64]!
vst1.8 {q0-q1}, [r0]!
vst1.8 {q2}, [r0]!
b .Lecbencloop3x
.Lecbenc1x:
adds r4, r4, #3
beq .Lecbencout
.Lecbencloop:
vld1.8 {q0}, [r1, :64]!
vld1.8 {q0}, [r1]!
bl aes_encrypt
vst1.8 {q0}, [r0, :64]!
vst1.8 {q0}, [r0]!
subs r4, r4, #1
bne .Lecbencloop
.Lecbencout:
@ -195,19 +195,19 @@ ENTRY(ce_aes_ecb_decrypt)
.Lecbdecloop3x:
subs r4, r4, #3
bmi .Lecbdec1x
vld1.8 {q0-q1}, [r1, :64]!
vld1.8 {q2}, [r1, :64]!
vld1.8 {q0-q1}, [r1]!
vld1.8 {q2}, [r1]!
bl aes_decrypt_3x
vst1.8 {q0-q1}, [r0, :64]!
vst1.8 {q2}, [r0, :64]!
vst1.8 {q0-q1}, [r0]!
vst1.8 {q2}, [r0]!
b .Lecbdecloop3x
.Lecbdec1x:
adds r4, r4, #3
beq .Lecbdecout
.Lecbdecloop:
vld1.8 {q0}, [r1, :64]!
vld1.8 {q0}, [r1]!
bl aes_decrypt
vst1.8 {q0}, [r0, :64]!
vst1.8 {q0}, [r0]!
subs r4, r4, #1
bne .Lecbdecloop
.Lecbdecout:
@ -226,10 +226,10 @@ ENTRY(ce_aes_cbc_encrypt)
vld1.8 {q0}, [r5]
prepare_key r2, r3
.Lcbcencloop:
vld1.8 {q1}, [r1, :64]! @ get next pt block
vld1.8 {q1}, [r1]! @ get next pt block
veor q0, q0, q1 @ ..and xor with iv
bl aes_encrypt
vst1.8 {q0}, [r0, :64]!
vst1.8 {q0}, [r0]!
subs r4, r4, #1
bne .Lcbcencloop
vst1.8 {q0}, [r5]
@ -244,8 +244,8 @@ ENTRY(ce_aes_cbc_decrypt)
.Lcbcdecloop3x:
subs r4, r4, #3
bmi .Lcbcdec1x
vld1.8 {q0-q1}, [r1, :64]!
vld1.8 {q2}, [r1, :64]!
vld1.8 {q0-q1}, [r1]!
vld1.8 {q2}, [r1]!
vmov q3, q0
vmov q4, q1
vmov q5, q2
@ -254,19 +254,19 @@ ENTRY(ce_aes_cbc_decrypt)
veor q1, q1, q3
veor q2, q2, q4
vmov q6, q5
vst1.8 {q0-q1}, [r0, :64]!
vst1.8 {q2}, [r0, :64]!
vst1.8 {q0-q1}, [r0]!
vst1.8 {q2}, [r0]!
b .Lcbcdecloop3x
.Lcbcdec1x:
adds r4, r4, #3
beq .Lcbcdecout
vmov q15, q14 @ preserve last round key
.Lcbcdecloop:
vld1.8 {q0}, [r1, :64]! @ get next ct block
vld1.8 {q0}, [r1]! @ get next ct block
veor q14, q15, q6 @ combine prev ct with last key
vmov q6, q0
bl aes_decrypt
vst1.8 {q0}, [r0, :64]!
vst1.8 {q0}, [r0]!
subs r4, r4, #1
bne .Lcbcdecloop
.Lcbcdecout:
@ -300,15 +300,15 @@ ENTRY(ce_aes_ctr_encrypt)
rev ip, r6
add r6, r6, #1
vmov s11, ip
vld1.8 {q3-q4}, [r1, :64]!
vld1.8 {q5}, [r1, :64]!
vld1.8 {q3-q4}, [r1]!
vld1.8 {q5}, [r1]!
bl aes_encrypt_3x
veor q0, q0, q3
veor q1, q1, q4
veor q2, q2, q5
rev ip, r6
vst1.8 {q0-q1}, [r0, :64]!
vst1.8 {q2}, [r0, :64]!
vst1.8 {q0-q1}, [r0]!
vst1.8 {q2}, [r0]!
vmov s27, ip
b .Lctrloop3x
.Lctr1x:
@ -318,10 +318,10 @@ ENTRY(ce_aes_ctr_encrypt)
vmov q0, q6
bl aes_encrypt
subs r4, r4, #1
bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
vld1.8 {q3}, [r1, :64]!
bmi .Lctrtailblock @ blocks < 0 means tail block
vld1.8 {q3}, [r1]!
veor q3, q0, q3
vst1.8 {q3}, [r0, :64]!
vst1.8 {q3}, [r0]!
adds r6, r6, #1 @ increment BE ctr
rev ip, r6
@ -333,10 +333,8 @@ ENTRY(ce_aes_ctr_encrypt)
vst1.8 {q6}, [r5]
pop {r4-r6, pc}
.Lctrhalfblock:
vld1.8 {d1}, [r1, :64]
veor d0, d0, d1
vst1.8 {d0}, [r0, :64]
.Lctrtailblock:
vst1.8 {q0}, [r0, :64] @ return just the key stream
pop {r4-r6, pc}
.Lctrcarry:
@ -405,8 +403,8 @@ ENTRY(ce_aes_xts_encrypt)
.Lxtsenc3x:
subs r4, r4, #3
bmi .Lxtsenc1x
vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
vld1.8 {q2}, [r1, :64]!
vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
vld1.8 {q2}, [r1]!
next_tweak q4, q3, q7, q6
veor q0, q0, q3
next_tweak q5, q4, q7, q6
@ -416,8 +414,8 @@ ENTRY(ce_aes_xts_encrypt)
veor q0, q0, q3
veor q1, q1, q4
veor q2, q2, q5
vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
vst1.8 {q2}, [r0, :64]!
vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
vst1.8 {q2}, [r0]!
vmov q3, q5
teq r4, #0
beq .Lxtsencout
@ -426,11 +424,11 @@ ENTRY(ce_aes_xts_encrypt)
adds r4, r4, #3
beq .Lxtsencout
.Lxtsencloop:
vld1.8 {q0}, [r1, :64]!
vld1.8 {q0}, [r1]!
veor q0, q0, q3
bl aes_encrypt
veor q0, q0, q3
vst1.8 {q0}, [r0, :64]!
vst1.8 {q0}, [r0]!
subs r4, r4, #1
beq .Lxtsencout
next_tweak q3, q3, q7, q6
@ -456,8 +454,8 @@ ENTRY(ce_aes_xts_decrypt)
.Lxtsdec3x:
subs r4, r4, #3
bmi .Lxtsdec1x
vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
vld1.8 {q2}, [r1, :64]!
vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
vld1.8 {q2}, [r1]!
next_tweak q4, q3, q7, q6
veor q0, q0, q3
next_tweak q5, q4, q7, q6
@ -467,8 +465,8 @@ ENTRY(ce_aes_xts_decrypt)
veor q0, q0, q3
veor q1, q1, q4
veor q2, q2, q5
vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
vst1.8 {q2}, [r0, :64]!
vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
vst1.8 {q2}, [r0]!
vmov q3, q5
teq r4, #0
beq .Lxtsdecout
@ -477,12 +475,12 @@ ENTRY(ce_aes_xts_decrypt)
adds r4, r4, #3
beq .Lxtsdecout
.Lxtsdecloop:
vld1.8 {q0}, [r1, :64]!
vld1.8 {q0}, [r1]!
veor q0, q0, q3
add ip, r2, #32 @ 3rd round key
bl aes_decrypt
veor q0, q0, q3
vst1.8 {q0}, [r0, :64]!
vst1.8 {q0}, [r0]!
subs r4, r4, #1
beq .Lxtsdecout
next_tweak q3, q3, q7, q6

View File

@ -278,14 +278,15 @@ static int ctr_encrypt(struct skcipher_request *req)
u8 *tsrc = walk.src.virt.addr;
/*
* Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
* to tell aes_ctr_encrypt() to only read half a block.
* Tell aes_ctr_encrypt() to process a tail block.
*/
blocks = (nbytes <= 8) ? -1 : 1;
blocks = -1;
ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
num_rounds(ctx), blocks, walk.iv);
memcpy(tdst, tail, nbytes);
if (tdst != tsrc)
memcpy(tdst, tsrc, nbytes);
crypto_xor(tdst, tail, nbytes);
err = skcipher_walk_done(&walk, 0);
}
kernel_neon_end();
@ -345,7 +346,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@ -361,7 +361,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@ -378,7 +377,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@ -396,7 +394,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = 2 * AES_MIN_KEY_SIZE,

View File

@ -0,0 +1,179 @@
/*
* Scalar AES core transform
*
* Copyright (C) 2017 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
.text
.align 5
rk .req r0
rounds .req r1
in .req r2
out .req r3
ttab .req ip
t0 .req lr
t1 .req r2
t2 .req r3
.macro __select, out, in, idx
.if __LINUX_ARM_ARCH__ < 7
and \out, \in, #0xff << (8 * \idx)
.else
ubfx \out, \in, #(8 * \idx), #8
.endif
.endm
.macro __load, out, in, idx
.if __LINUX_ARM_ARCH__ < 7 && \idx > 0
ldr \out, [ttab, \in, lsr #(8 * \idx) - 2]
.else
ldr \out, [ttab, \in, lsl #2]
.endif
.endm
.macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
__select \out0, \in0, 0
__select t0, \in1, 1
__load \out0, \out0, 0
__load t0, t0, 1
.if \enc
__select \out1, \in1, 0
__select t1, \in2, 1
.else
__select \out1, \in3, 0
__select t1, \in0, 1
.endif
__load \out1, \out1, 0
__select t2, \in2, 2
__load t1, t1, 1
__load t2, t2, 2
eor \out0, \out0, t0, ror #24
__select t0, \in3, 3
.if \enc
__select \t3, \in3, 2
__select \t4, \in0, 3
.else
__select \t3, \in1, 2
__select \t4, \in2, 3
.endif
__load \t3, \t3, 2
__load t0, t0, 3
__load \t4, \t4, 3
eor \out1, \out1, t1, ror #24
eor \out0, \out0, t2, ror #16
ldm rk!, {t1, t2}
eor \out1, \out1, \t3, ror #16
eor \out0, \out0, t0, ror #8
eor \out1, \out1, \t4, ror #8
eor \out0, \out0, t1
eor \out1, \out1, t2
.endm
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
.endm
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
.endm
.macro __rev, out, in
.if __LINUX_ARM_ARCH__ < 6
lsl t0, \in, #24
and t1, \in, #0xff00
and t2, \in, #0xff0000
orr \out, t0, \in, lsr #24
orr \out, \out, t1, lsl #8
orr \out, \out, t2, lsr #8
.else
rev \out, \in
.endif
.endm
.macro __adrl, out, sym, c
.if __LINUX_ARM_ARCH__ < 7
ldr\c \out, =\sym
.else
movw\c \out, #:lower16:\sym
movt\c \out, #:upper16:\sym
.endif
.endm
.macro do_crypt, round, ttab, ltab
push {r3-r11, lr}
ldr r4, [in]
ldr r5, [in, #4]
ldr r6, [in, #8]
ldr r7, [in, #12]
ldm rk!, {r8-r11}
#ifdef CONFIG_CPU_BIG_ENDIAN
__rev r4, r4
__rev r5, r5
__rev r6, r6
__rev r7, r7
#endif
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
__adrl ttab, \ttab
tst rounds, #2
bne 1f
0: \round r8, r9, r10, r11, r4, r5, r6, r7
\round r4, r5, r6, r7, r8, r9, r10, r11
1: subs rounds, rounds, #4
\round r8, r9, r10, r11, r4, r5, r6, r7
__adrl ttab, \ltab, ls
\round r4, r5, r6, r7, r8, r9, r10, r11
bhi 0b
#ifdef CONFIG_CPU_BIG_ENDIAN
__rev r4, r4
__rev r5, r5
__rev r6, r6
__rev r7, r7
#endif
ldr out, [sp]
str r4, [out]
str r5, [out, #4]
str r6, [out, #8]
str r7, [out, #12]
pop {r3-r11, pc}
.align 3
.ltorg
.endm
ENTRY(__aes_arm_encrypt)
do_crypt fround, crypto_ft_tab, crypto_fl_tab
ENDPROC(__aes_arm_encrypt)
ENTRY(__aes_arm_decrypt)
do_crypt iround, crypto_it_tab, crypto_il_tab
ENDPROC(__aes_arm_decrypt)

View File

@ -0,0 +1,74 @@
/*
* Scalar AES core transform
*
* Copyright (C) 2017 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <crypto/aes.h>
#include <linux/crypto.h>
#include <linux/module.h>
asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
EXPORT_SYMBOL(__aes_arm_encrypt);
asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
EXPORT_SYMBOL(__aes_arm_decrypt);
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
int rounds = 6 + ctx->key_length / 4;
__aes_arm_encrypt(ctx->key_enc, rounds, in, out);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
int rounds = 6 + ctx->key_length / 4;
__aes_arm_decrypt(ctx->key_dec, rounds, in, out);
}
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-arm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
.cra_cipher.cia_setkey = crypto_aes_set_key,
.cra_cipher.cia_encrypt = aes_encrypt,
.cra_cipher.cia_decrypt = aes_decrypt,
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
.cra_alignmask = 3,
#endif
};
static int __init aes_init(void)
{
return crypto_register_alg(&aes_alg);
}
static void __exit aes_fini(void)
{
crypto_unregister_alg(&aes_alg);
}
module_init(aes_init);
module_exit(aes_fini);
MODULE_DESCRIPTION("Scalar AES cipher for ARM");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("aes");

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,406 @@
/*
* Bit sliced AES using NEON instructions
*
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/cbc.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
#include <linux/module.h>
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");
asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks);
asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks);
asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 ctr[], u8 final[]);
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds, const u8 in[],
u8 out[]);
struct aesbs_ctx {
int rounds;
u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32] __aligned(AES_BLOCK_SIZE);
};
struct aesbs_cbc_ctx {
struct aesbs_ctx key;
u32 enc[AES_MAX_KEYLENGTH_U32];
};
struct aesbs_xts_ctx {
struct aesbs_ctx key;
u32 twkey[AES_MAX_KEYLENGTH_U32];
};
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_aes_ctx rk;
int err;
err = crypto_aes_expand_key(&rk, in_key, key_len);
if (err)
return err;
ctx->rounds = 6 + key_len / 4;
kernel_neon_begin();
aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
kernel_neon_end();
return 0;
}
static int __ecb_crypt(struct skcipher_request *req,
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks))
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
if (walk.nbytes < walk.total)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
ctx->rounds, blocks);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int ecb_encrypt(struct skcipher_request *req)
{
return __ecb_crypt(req, aesbs_ecb_encrypt);
}
static int ecb_decrypt(struct skcipher_request *req)
{
return __ecb_crypt(req, aesbs_ecb_decrypt);
}
static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_aes_ctx rk;
int err;
err = crypto_aes_expand_key(&rk, in_key, key_len);
if (err)
return err;
ctx->key.rounds = 6 + key_len / 4;
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
kernel_neon_begin();
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
kernel_neon_end();
return 0;
}
static void cbc_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
{
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
__aes_arm_encrypt(ctx->enc, ctx->key.rounds, src, dst);
}
static int cbc_encrypt(struct skcipher_request *req)
{
return crypto_cbc_encrypt_walk(req, cbc_encrypt_one);
}
static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
if (walk.nbytes < walk.total)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key.rk, ctx->key.rounds, blocks,
walk.iv);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
u8 buf[AES_BLOCK_SIZE];
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while (walk.nbytes > 0) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
if (walk.nbytes < walk.total) {
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
final = NULL;
}
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->rk, ctx->rounds, blocks, walk.iv, final);
if (final) {
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
if (dst != src)
memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, 0);
break;
}
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_aes_ctx rk;
int err;
err = xts_verify_key(tfm, in_key, key_len);
if (err)
return err;
key_len /= 2;
err = crypto_aes_expand_key(&rk, in_key + key_len, key_len);
if (err)
return err;
memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey));
return aesbs_setkey(tfm, in_key, key_len);
}
static int __xts_crypt(struct skcipher_request *req,
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]))
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, true);
__aes_arm_encrypt(ctx->twkey, ctx->key.rounds, walk.iv, walk.iv);
kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
if (walk.nbytes < walk.total)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
ctx->key.rounds, blocks, walk.iv);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int xts_encrypt(struct skcipher_request *req)
{
return __xts_crypt(req, aesbs_xts_encrypt);
}
static int xts_decrypt(struct skcipher_request *req)
{
return __xts_crypt(req, aesbs_xts_decrypt);
}
static struct skcipher_alg aes_algs[] = { {
.base.cra_name = "__ecb(aes)",
.base.cra_driver_name = "__ecb-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
.base.cra_module = THIS_MODULE,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.setkey = aesbs_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "__cbc(aes)",
.base.cra_driver_name = "__cbc-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.base.cra_module = THIS_MODULE,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_cbc_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "__ctr(aes)",
.base.cra_driver_name = "__ctr-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
.base.cra_module = THIS_MODULE,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.chunksize = AES_BLOCK_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
.base.cra_name = "__xts(aes)",
.base.cra_driver_name = "__xts-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
.base.cra_module = THIS_MODULE,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_xts_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
} };
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
static void aes_exit(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
if (aes_simd_algs[i])
simd_skcipher_free(aes_simd_algs[i]);
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
static int __init aes_init(void)
{
struct simd_skcipher_alg *simd;
const char *basename;
const char *algname;
const char *drvname;
int err;
int i;
if (!(elf_hwcap & HWCAP_NEON))
return -ENODEV;
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
if (err)
return err;
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
continue;
algname = aes_algs[i].base.cra_name + 2;
drvname = aes_algs[i].base.cra_driver_name + 2;
basename = aes_algs[i].base.cra_driver_name;
simd = simd_skcipher_create_compat(algname, drvname, basename);
err = PTR_ERR(simd);
if (IS_ERR(simd))
goto unregister_simds;
aes_simd_algs[i] = simd;
}
return 0;
unregister_simds:
aes_exit();
return err;
}
module_init(aes_init);
module_exit(aes_exit);

View File

@ -1,98 +0,0 @@
/*
* Glue Code for the asm optimized version of the AES Cipher Algorithm
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <crypto/aes.h>
#include "aes_glue.h"
EXPORT_SYMBOL(AES_encrypt);
EXPORT_SYMBOL(AES_decrypt);
EXPORT_SYMBOL(private_AES_set_encrypt_key);
EXPORT_SYMBOL(private_AES_set_decrypt_key);
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
AES_encrypt(src, dst, &ctx->enc_key);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
AES_decrypt(src, dst, &ctx->dec_key);
}
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
switch (key_len) {
case AES_KEYSIZE_128:
key_len = 128;
break;
case AES_KEYSIZE_192:
key_len = 192;
break;
case AES_KEYSIZE_256:
key_len = 256;
break;
default:
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
/* private_AES_set_decrypt_key expects an encryption key as input */
ctx->dec_key = ctx->enc_key;
if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
return 0;
}
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct AES_CTX),
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
.cra_u = {
.cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = aes_set_key,
.cia_encrypt = aes_encrypt,
.cia_decrypt = aes_decrypt
}
}
};
static int __init aes_init(void)
{
return crypto_register_alg(&aes_alg);
}
static void __exit aes_fini(void)
{
crypto_unregister_alg(&aes_alg);
}
module_init(aes_init);
module_exit(aes_fini);
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("aes");
MODULE_ALIAS_CRYPTO("aes-asm");
MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");

View File

@ -1,19 +0,0 @@
#define AES_MAXNR 14
struct AES_KEY {
unsigned int rd_key[4 * (AES_MAXNR + 1)];
int rounds;
};
struct AES_CTX {
struct AES_KEY enc_key;
struct AES_KEY dec_key;
};
asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey,
const int bits, struct AES_KEY *key);
asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey,
const int bits, struct AES_KEY *key);

File diff suppressed because it is too large Load Diff

View File

@ -1,367 +0,0 @@
/*
* linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES
*
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/cbc.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
#include <crypto/xts.h>
#include "aes_glue.h"
#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE)
struct BS_KEY {
struct AES_KEY rk;
int converted;
u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE];
} __aligned(8);
asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in);
asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in);
asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes,
struct BS_KEY *key, u8 iv[]);
asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks,
struct BS_KEY *key, u8 const iv[]);
asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes,
struct BS_KEY *key, u8 tweak[]);
asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes,
struct BS_KEY *key, u8 tweak[]);
struct aesbs_cbc_ctx {
struct AES_KEY enc;
struct BS_KEY dec;
};
struct aesbs_ctr_ctx {
struct BS_KEY enc;
};
struct aesbs_xts_ctx {
struct BS_KEY enc;
struct BS_KEY dec;
struct AES_KEY twkey;
};
static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
int bits = key_len * 8;
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) {
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
ctx->dec.rk = ctx->enc;
private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
ctx->dec.converted = 0;
return 0;
}
static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
int bits = key_len * 8;
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
ctx->enc.converted = 0;
return 0;
}
static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int bits = key_len * 4;
int err;
err = xts_verify_key(tfm, in_key, key_len);
if (err)
return err;
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
ctx->dec.rk = ctx->enc.rk;
private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey);
ctx->enc.converted = ctx->dec.converted = 0;
return 0;
}
static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm,
const u8 *src, u8 *dst)
{
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
AES_encrypt(src, dst, &ctx->enc);
}
static int aesbs_cbc_encrypt(struct skcipher_request *req)
{
return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one);
}
static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm,
const u8 *src, u8 *dst)
{
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
AES_decrypt(src, dst, &ctx->dec.rk);
}
static int aesbs_cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
for (err = skcipher_walk_virt(&walk, req, false);
(nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) {
u32 blocks = nbytes / AES_BLOCK_SIZE;
u8 *dst = walk.dst.virt.addr;
u8 *src = walk.src.virt.addr;
u8 *iv = walk.iv;
if (blocks >= 8) {
kernel_neon_begin();
bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv);
kernel_neon_end();
nbytes %= AES_BLOCK_SIZE;
continue;
}
nbytes = crypto_cbc_decrypt_blocks(&walk, tfm,
aesbs_decrypt_one);
}
return err;
}
static void inc_be128_ctr(__be32 ctr[], u32 addend)
{
int i;
for (i = 3; i >= 0; i--, addend = 1) {
u32 n = be32_to_cpu(ctr[i]) + addend;
ctr[i] = cpu_to_be32(n);
if (n >= addend)
break;
}
}
static int aesbs_ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
u32 blocks;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
__be32 *ctr = (__be32 *)walk.iv;
u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]);
/* avoid 32 bit counter overflow in the NEON code */
if (unlikely(headroom < blocks)) {
blocks = headroom + 1;
tail = walk.nbytes - blocks * AES_BLOCK_SIZE;
}
kernel_neon_begin();
bsaes_ctr32_encrypt_blocks(walk.src.virt.addr,
walk.dst.virt.addr, blocks,
&ctx->enc, walk.iv);
kernel_neon_end();
inc_be128_ctr(ctr, blocks);
err = skcipher_walk_done(&walk, tail);
}
if (walk.nbytes) {
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
u8 ks[AES_BLOCK_SIZE];
AES_encrypt(walk.iv, ks, &ctx->enc.rk);
if (tdst != tsrc)
memcpy(tdst, tsrc, walk.nbytes);
crypto_xor(tdst, ks, walk.nbytes);
err = skcipher_walk_done(&walk, 0);
}
return err;
}
static int aesbs_xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, false);
/* generate the initial tweak */
AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
while (walk.nbytes) {
kernel_neon_begin();
bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
walk.nbytes, &ctx->enc, walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
}
static int aesbs_xts_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, false);
/* generate the initial tweak */
AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
while (walk.nbytes) {
kernel_neon_begin();
bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
walk.nbytes, &ctx->dec, walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
}
static struct skcipher_alg aesbs_algs[] = { {
.base = {
.cra_name = "__cbc(aes)",
.cra_driver_name = "__cbc-aes-neonbs",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_cbc_set_key,
.encrypt = aesbs_cbc_encrypt,
.decrypt = aesbs_cbc_decrypt,
}, {
.base = {
.cra_name = "__ctr(aes)",
.cra_driver_name = "__ctr-aes-neonbs",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = aesbs_ctr_set_key,
.encrypt = aesbs_ctr_encrypt,
.decrypt = aesbs_ctr_encrypt,
}, {
.base = {
.cra_name = "__xts(aes)",
.cra_driver_name = "__xts-aes-neonbs",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_xts_set_key,
.encrypt = aesbs_xts_encrypt,
.decrypt = aesbs_xts_decrypt,
} };
struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)];
static void aesbs_mod_exit(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++)
simd_skcipher_free(aesbs_simd_algs[i]);
crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
}
static int __init aesbs_mod_init(void)
{
struct simd_skcipher_alg *simd;
const char *basename;
const char *algname;
const char *drvname;
int err;
int i;
if (!cpu_has_neon())
return -ENODEV;
err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
if (err)
return err;
for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) {
algname = aesbs_algs[i].base.cra_name + 2;
drvname = aesbs_algs[i].base.cra_driver_name + 2;
basename = aesbs_algs[i].base.cra_driver_name;
simd = simd_skcipher_create_compat(algname, drvname, basename);
err = PTR_ERR(simd);
if (IS_ERR(simd))
goto unregister_simds;
aesbs_simd_algs[i] = simd;
}
return 0;
unregister_simds:
aesbs_mod_exit();
return err;
}
module_init(aesbs_mod_init);
module_exit(aesbs_mod_exit);
MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL");

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,523 @@
/*
* ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
*
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Based on:
* ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
*
* Copyright (C) 2015 Martin Willi
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/linkage.h>
.text
.fpu neon
.align 5
ENTRY(chacha20_block_xor_neon)
// r0: Input state matrix, s
// r1: 1 data block output, o
// r2: 1 data block input, i
//
// This function encrypts one ChaCha20 block by loading the state matrix
// in four NEON registers. It performs matrix operation on four words in
// parallel, but requireds shuffling to rearrange the words after each
// round.
//
// x0..3 = s0..3
add ip, r0, #0x20
vld1.32 {q0-q1}, [r0]
vld1.32 {q2-q3}, [ip]
vmov q8, q0
vmov q9, q1
vmov q10, q2
vmov q11, q3
mov r3, #10
.Ldoubleround:
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
vadd.i32 q0, q0, q1
veor q4, q3, q0
vshl.u32 q3, q4, #16
vsri.u32 q3, q4, #16
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
vadd.i32 q2, q2, q3
veor q4, q1, q2
vshl.u32 q1, q4, #12
vsri.u32 q1, q4, #20
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
vadd.i32 q0, q0, q1
veor q4, q3, q0
vshl.u32 q3, q4, #8
vsri.u32 q3, q4, #24
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
vadd.i32 q2, q2, q3
veor q4, q1, q2
vshl.u32 q1, q4, #7
vsri.u32 q1, q4, #25
// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
vext.8 q1, q1, q1, #4
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
vext.8 q2, q2, q2, #8
// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
vext.8 q3, q3, q3, #12
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
vadd.i32 q0, q0, q1
veor q4, q3, q0
vshl.u32 q3, q4, #16
vsri.u32 q3, q4, #16
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
vadd.i32 q2, q2, q3
veor q4, q1, q2
vshl.u32 q1, q4, #12
vsri.u32 q1, q4, #20
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
vadd.i32 q0, q0, q1
veor q4, q3, q0
vshl.u32 q3, q4, #8
vsri.u32 q3, q4, #24
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
vadd.i32 q2, q2, q3
veor q4, q1, q2
vshl.u32 q1, q4, #7
vsri.u32 q1, q4, #25
// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
vext.8 q1, q1, q1, #12
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
vext.8 q2, q2, q2, #8
// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
vext.8 q3, q3, q3, #4
subs r3, r3, #1
bne .Ldoubleround
add ip, r2, #0x20
vld1.8 {q4-q5}, [r2]
vld1.8 {q6-q7}, [ip]
// o0 = i0 ^ (x0 + s0)
vadd.i32 q0, q0, q8
veor q0, q0, q4
// o1 = i1 ^ (x1 + s1)
vadd.i32 q1, q1, q9
veor q1, q1, q5
// o2 = i2 ^ (x2 + s2)
vadd.i32 q2, q2, q10
veor q2, q2, q6
// o3 = i3 ^ (x3 + s3)
vadd.i32 q3, q3, q11
veor q3, q3, q7
add ip, r1, #0x20
vst1.8 {q0-q1}, [r1]
vst1.8 {q2-q3}, [ip]
bx lr
ENDPROC(chacha20_block_xor_neon)
.align 5
ENTRY(chacha20_4block_xor_neon)
push {r4-r6, lr}
mov ip, sp // preserve the stack pointer
sub r3, sp, #0x20 // allocate a 32 byte buffer
bic r3, r3, #0x1f // aligned to 32 bytes
mov sp, r3
// r0: Input state matrix, s
// r1: 4 data blocks output, o
// r2: 4 data blocks input, i
//
// This function encrypts four consecutive ChaCha20 blocks by loading
// the state matrix in NEON registers four times. The algorithm performs
// each operation on the corresponding word of each state matrix, hence
// requires no word shuffling. For final XORing step we transpose the
// matrix by interleaving 32- and then 64-bit words, which allows us to
// do XOR in NEON registers.
//
// x0..15[0-3] = s0..3[0..3]
add r3, r0, #0x20
vld1.32 {q0-q1}, [r0]
vld1.32 {q2-q3}, [r3]
adr r3, CTRINC
vdup.32 q15, d7[1]
vdup.32 q14, d7[0]
vld1.32 {q11}, [r3, :128]
vdup.32 q13, d6[1]
vdup.32 q12, d6[0]
vadd.i32 q12, q12, q11 // x12 += counter values 0-3
vdup.32 q11, d5[1]
vdup.32 q10, d5[0]
vdup.32 q9, d4[1]
vdup.32 q8, d4[0]
vdup.32 q7, d3[1]
vdup.32 q6, d3[0]
vdup.32 q5, d2[1]
vdup.32 q4, d2[0]
vdup.32 q3, d1[1]
vdup.32 q2, d1[0]
vdup.32 q1, d0[1]
vdup.32 q0, d0[0]
mov r3, #10
.Ldoubleround4:
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
vadd.i32 q0, q0, q4
vadd.i32 q1, q1, q5
vadd.i32 q2, q2, q6
vadd.i32 q3, q3, q7
veor q12, q12, q0
veor q13, q13, q1
veor q14, q14, q2
veor q15, q15, q3
vrev32.16 q12, q12
vrev32.16 q13, q13
vrev32.16 q14, q14
vrev32.16 q15, q15
// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
vadd.i32 q8, q8, q12
vadd.i32 q9, q9, q13
vadd.i32 q10, q10, q14
vadd.i32 q11, q11, q15
vst1.32 {q8-q9}, [sp, :256]
veor q8, q4, q8
veor q9, q5, q9
vshl.u32 q4, q8, #12
vshl.u32 q5, q9, #12
vsri.u32 q4, q8, #20
vsri.u32 q5, q9, #20
veor q8, q6, q10
veor q9, q7, q11
vshl.u32 q6, q8, #12
vshl.u32 q7, q9, #12
vsri.u32 q6, q8, #20
vsri.u32 q7, q9, #20
// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
vadd.i32 q0, q0, q4
vadd.i32 q1, q1, q5
vadd.i32 q2, q2, q6
vadd.i32 q3, q3, q7
veor q8, q12, q0
veor q9, q13, q1
vshl.u32 q12, q8, #8
vshl.u32 q13, q9, #8
vsri.u32 q12, q8, #24
vsri.u32 q13, q9, #24
veor q8, q14, q2
veor q9, q15, q3
vshl.u32 q14, q8, #8
vshl.u32 q15, q9, #8
vsri.u32 q14, q8, #24
vsri.u32 q15, q9, #24
vld1.32 {q8-q9}, [sp, :256]
// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
vadd.i32 q8, q8, q12
vadd.i32 q9, q9, q13
vadd.i32 q10, q10, q14
vadd.i32 q11, q11, q15
vst1.32 {q8-q9}, [sp, :256]
veor q8, q4, q8
veor q9, q5, q9
vshl.u32 q4, q8, #7
vshl.u32 q5, q9, #7
vsri.u32 q4, q8, #25
vsri.u32 q5, q9, #25
veor q8, q6, q10
veor q9, q7, q11
vshl.u32 q6, q8, #7
vshl.u32 q7, q9, #7
vsri.u32 q6, q8, #25
vsri.u32 q7, q9, #25
vld1.32 {q8-q9}, [sp, :256]
// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
vadd.i32 q0, q0, q5
vadd.i32 q1, q1, q6
vadd.i32 q2, q2, q7
vadd.i32 q3, q3, q4
veor q15, q15, q0
veor q12, q12, q1
veor q13, q13, q2
veor q14, q14, q3
vrev32.16 q15, q15
vrev32.16 q12, q12
vrev32.16 q13, q13
vrev32.16 q14, q14
// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
vadd.i32 q10, q10, q15
vadd.i32 q11, q11, q12
vadd.i32 q8, q8, q13
vadd.i32 q9, q9, q14
vst1.32 {q8-q9}, [sp, :256]
veor q8, q7, q8
veor q9, q4, q9
vshl.u32 q7, q8, #12
vshl.u32 q4, q9, #12
vsri.u32 q7, q8, #20
vsri.u32 q4, q9, #20
veor q8, q5, q10
veor q9, q6, q11
vshl.u32 q5, q8, #12
vshl.u32 q6, q9, #12
vsri.u32 q5, q8, #20
vsri.u32 q6, q9, #20
// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
vadd.i32 q0, q0, q5
vadd.i32 q1, q1, q6
vadd.i32 q2, q2, q7
vadd.i32 q3, q3, q4
veor q8, q15, q0
veor q9, q12, q1
vshl.u32 q15, q8, #8
vshl.u32 q12, q9, #8
vsri.u32 q15, q8, #24
vsri.u32 q12, q9, #24
veor q8, q13, q2
veor q9, q14, q3
vshl.u32 q13, q8, #8
vshl.u32 q14, q9, #8
vsri.u32 q13, q8, #24
vsri.u32 q14, q9, #24
vld1.32 {q8-q9}, [sp, :256]
// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
vadd.i32 q10, q10, q15
vadd.i32 q11, q11, q12
vadd.i32 q8, q8, q13
vadd.i32 q9, q9, q14
vst1.32 {q8-q9}, [sp, :256]
veor q8, q7, q8
veor q9, q4, q9
vshl.u32 q7, q8, #7
vshl.u32 q4, q9, #7
vsri.u32 q7, q8, #25
vsri.u32 q4, q9, #25
veor q8, q5, q10
veor q9, q6, q11
vshl.u32 q5, q8, #7
vshl.u32 q6, q9, #7
vsri.u32 q5, q8, #25
vsri.u32 q6, q9, #25
subs r3, r3, #1
beq 0f
vld1.32 {q8-q9}, [sp, :256]
b .Ldoubleround4
// x0[0-3] += s0[0]
// x1[0-3] += s0[1]
// x2[0-3] += s0[2]
// x3[0-3] += s0[3]
0: ldmia r0!, {r3-r6}
vdup.32 q8, r3
vdup.32 q9, r4
vadd.i32 q0, q0, q8
vadd.i32 q1, q1, q9
vdup.32 q8, r5
vdup.32 q9, r6
vadd.i32 q2, q2, q8
vadd.i32 q3, q3, q9
// x4[0-3] += s1[0]
// x5[0-3] += s1[1]
// x6[0-3] += s1[2]
// x7[0-3] += s1[3]
ldmia r0!, {r3-r6}
vdup.32 q8, r3
vdup.32 q9, r4
vadd.i32 q4, q4, q8
vadd.i32 q5, q5, q9
vdup.32 q8, r5
vdup.32 q9, r6
vadd.i32 q6, q6, q8
vadd.i32 q7, q7, q9
// interleave 32-bit words in state n, n+1
vzip.32 q0, q1
vzip.32 q2, q3
vzip.32 q4, q5
vzip.32 q6, q7
// interleave 64-bit words in state n, n+2
vswp d1, d4
vswp d3, d6
vswp d9, d12
vswp d11, d14
// xor with corresponding input, write to output
vld1.8 {q8-q9}, [r2]!
veor q8, q8, q0
veor q9, q9, q4
vst1.8 {q8-q9}, [r1]!
vld1.32 {q8-q9}, [sp, :256]
// x8[0-3] += s2[0]
// x9[0-3] += s2[1]
// x10[0-3] += s2[2]
// x11[0-3] += s2[3]
ldmia r0!, {r3-r6}
vdup.32 q0, r3
vdup.32 q4, r4
vadd.i32 q8, q8, q0
vadd.i32 q9, q9, q4
vdup.32 q0, r5
vdup.32 q4, r6
vadd.i32 q10, q10, q0
vadd.i32 q11, q11, q4
// x12[0-3] += s3[0]
// x13[0-3] += s3[1]
// x14[0-3] += s3[2]
// x15[0-3] += s3[3]
ldmia r0!, {r3-r6}
vdup.32 q0, r3
vdup.32 q4, r4
adr r3, CTRINC
vadd.i32 q12, q12, q0
vld1.32 {q0}, [r3, :128]
vadd.i32 q13, q13, q4
vadd.i32 q12, q12, q0 // x12 += counter values 0-3
vdup.32 q0, r5
vdup.32 q4, r6
vadd.i32 q14, q14, q0
vadd.i32 q15, q15, q4
// interleave 32-bit words in state n, n+1
vzip.32 q8, q9
vzip.32 q10, q11
vzip.32 q12, q13
vzip.32 q14, q15
// interleave 64-bit words in state n, n+2
vswp d17, d20
vswp d19, d22
vswp d25, d28
vswp d27, d30
vmov q4, q1
vld1.8 {q0-q1}, [r2]!
veor q0, q0, q8
veor q1, q1, q12
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]!
veor q0, q0, q2
veor q1, q1, q6
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]!
veor q0, q0, q10
veor q1, q1, q14
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]!
veor q0, q0, q4
veor q1, q1, q5
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]!
veor q0, q0, q9
veor q1, q1, q13
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]!
veor q0, q0, q3
veor q1, q1, q7
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]
veor q0, q0, q11
veor q1, q1, q15
vst1.8 {q0-q1}, [r1]
mov sp, ip
pop {r4-r6, pc}
ENDPROC(chacha20_4block_xor_neon)
.align 4
CTRINC: .word 0, 1, 2, 3

View File

@ -0,0 +1,127 @@
/*
* ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
*
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Based on:
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
*
* Copyright (C) 2015 Martin Willi
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <crypto/algapi.h>
#include <crypto/chacha20.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes)
{
u8 buf[CHACHA20_BLOCK_SIZE];
while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
chacha20_4block_xor_neon(state, dst, src);
bytes -= CHACHA20_BLOCK_SIZE * 4;
src += CHACHA20_BLOCK_SIZE * 4;
dst += CHACHA20_BLOCK_SIZE * 4;
state[12] += 4;
}
while (bytes >= CHACHA20_BLOCK_SIZE) {
chacha20_block_xor_neon(state, dst, src);
bytes -= CHACHA20_BLOCK_SIZE;
src += CHACHA20_BLOCK_SIZE;
dst += CHACHA20_BLOCK_SIZE;
state[12]++;
}
if (bytes) {
memcpy(buf, src, bytes);
chacha20_block_xor_neon(state, buf, buf);
memcpy(dst, buf, bytes);
}
}
static int chacha20_neon(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
u32 state[16];
int err;
if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
return crypto_chacha20_crypt(req);
err = skcipher_walk_virt(&walk, req, true);
crypto_chacha20_init(state, ctx, walk.iv);
kernel_neon_begin();
while (walk.nbytes > 0) {
unsigned int nbytes = walk.nbytes;
if (nbytes < walk.total)
nbytes = round_down(nbytes, walk.stride);
chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
nbytes);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
kernel_neon_end();
return err;
}
static struct skcipher_alg alg = {
.base.cra_name = "chacha20",
.base.cra_driver_name = "chacha20-neon",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CHACHA20_KEY_SIZE,
.max_keysize = CHACHA20_KEY_SIZE,
.ivsize = CHACHA20_IV_SIZE,
.chunksize = CHACHA20_BLOCK_SIZE,
.walksize = 4 * CHACHA20_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
.encrypt = chacha20_neon,
.decrypt = chacha20_neon,
};
static int __init chacha20_simd_mod_init(void)
{
if (!(elf_hwcap & HWCAP_NEON))
return -ENODEV;
return crypto_register_skcipher(&alg);
}
static void __exit chacha20_simd_mod_fini(void)
{
crypto_unregister_skcipher(&alg);
}
module_init(chacha20_simd_mod_init);
module_exit(chacha20_simd_mod_fini);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("chacha20");

View File

@ -516,4 +516,3 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
# CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set
CONFIG_CRYPTO_CRC32_ARM64=y

View File

@ -37,10 +37,14 @@ config CRYPTO_CRCT10DIF_ARM64_CE
select CRYPTO_HASH
config CRYPTO_CRC32_ARM64_CE
tristate "CRC32 and CRC32C digest algorithms using PMULL instructions"
depends on KERNEL_MODE_NEON && CRC32
tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
depends on CRC32
select CRYPTO_HASH
config CRYPTO_AES_ARM64
tristate "AES core cipher using scalar instructions"
select CRYPTO_AES
config CRYPTO_AES_ARM64_CE
tristate "AES core cipher using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
@ -67,9 +71,17 @@ config CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_AES
select CRYPTO_SIMD
config CRYPTO_CRC32_ARM64
tristate "CRC32 and CRC32C using optional ARMv8 instructions"
depends on ARM64
select CRYPTO_HASH
config CRYPTO_CHACHA20_NEON
tristate "NEON accelerated ChaCha20 symmetric cipher"
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_CHACHA20
config CRYPTO_AES_ARM64_BS
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_SIMD
endif

View File

@ -41,15 +41,20 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
sha512-arm64-y := sha512-glue.o sha512-core.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
AFLAGS_aes-ce.o := -DINTERLEAVE=4
AFLAGS_aes-neon.o := -DINTERLEAVE=4
CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o
CFLAGS_crc32-arm64.o := -mcpu=generic+crc
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
$(call if_changed_rule,cc_o_c)

View File

@ -258,7 +258,6 @@ static struct aead_alg ccm_aes_alg = {
.cra_priority = 300,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.ivsize = AES_BLOCK_SIZE,

View File

@ -0,0 +1,110 @@
/*
* Scalar AES core transform
*
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
rk .req x0
out .req x1
in .req x2
rounds .req x3
tt .req x4
lt .req x2
.macro __pair, enc, reg0, reg1, in0, in1e, in1d, shift
ubfx \reg0, \in0, #\shift, #8
.if \enc
ubfx \reg1, \in1e, #\shift, #8
.else
ubfx \reg1, \in1d, #\shift, #8
.endif
ldr \reg0, [tt, \reg0, uxtw #2]
ldr \reg1, [tt, \reg1, uxtw #2]
.endm
.macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
ldp \out0, \out1, [rk], #8
__pair \enc, w13, w14, \in0, \in1, \in3, 0
__pair \enc, w15, w16, \in1, \in2, \in0, 8
__pair \enc, w17, w18, \in2, \in3, \in1, 16
__pair \enc, \t0, \t1, \in3, \in0, \in2, 24
eor \out0, \out0, w13
eor \out1, \out1, w14
eor \out0, \out0, w15, ror #24
eor \out1, \out1, w16, ror #24
eor \out0, \out0, w17, ror #16
eor \out1, \out1, w18, ror #16
eor \out0, \out0, \t0, ror #8
eor \out1, \out1, \t1, ror #8
.endm
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
.endm
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
.endm
.macro do_crypt, round, ttab, ltab
ldp w5, w6, [in]
ldp w7, w8, [in, #8]
ldp w9, w10, [rk], #16
ldp w11, w12, [rk, #-8]
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
CPU_BE( rev w8, w8 )
eor w5, w5, w9
eor w6, w6, w10
eor w7, w7, w11
eor w8, w8, w12
adr_l tt, \ttab
adr_l lt, \ltab
tbnz rounds, #1, 1f
0: \round w9, w10, w11, w12, w5, w6, w7, w8
\round w5, w6, w7, w8, w9, w10, w11, w12
1: subs rounds, rounds, #4
\round w9, w10, w11, w12, w5, w6, w7, w8
csel tt, tt, lt, hi
\round w5, w6, w7, w8, w9, w10, w11, w12
b.hi 0b
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
CPU_BE( rev w8, w8 )
stp w5, w6, [out]
stp w7, w8, [out, #8]
ret
.endm
.align 5
ENTRY(__aes_arm64_encrypt)
do_crypt fround, crypto_ft_tab, crypto_fl_tab
ENDPROC(__aes_arm64_encrypt)
.align 5
ENTRY(__aes_arm64_decrypt)
do_crypt iround, crypto_it_tab, crypto_il_tab
ENDPROC(__aes_arm64_decrypt)

View File

@ -0,0 +1,69 @@
/*
* Scalar AES core transform
*
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <crypto/aes.h>
#include <linux/crypto.h>
#include <linux/module.h>
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
EXPORT_SYMBOL(__aes_arm64_encrypt);
asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
EXPORT_SYMBOL(__aes_arm64_decrypt);
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
int rounds = 6 + ctx->key_length / 4;
__aes_arm64_encrypt(ctx->key_enc, out, in, rounds);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
int rounds = 6 + ctx->key_length / 4;
__aes_arm64_decrypt(ctx->key_dec, out, in, rounds);
}
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-arm64",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
.cra_cipher.cia_setkey = crypto_aes_set_key,
.cra_cipher.cia_encrypt = aes_encrypt,
.cra_cipher.cia_decrypt = aes_decrypt
};
static int __init aes_init(void)
{
return crypto_register_alg(&aes_alg);
}
static void __exit aes_fini(void)
{
crypto_unregister_alg(&aes_alg);
}
module_init(aes_init);
module_exit(aes_fini);
MODULE_DESCRIPTION("Scalar AES cipher for arm64");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("aes");

View File

@ -1,7 +1,7 @@
/*
* linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
*
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@ -11,6 +11,7 @@
#include <asm/neon.h>
#include <asm/hwcap.h>
#include <crypto/aes.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
@ -31,6 +32,7 @@
#define aes_ctr_encrypt ce_aes_ctr_encrypt
#define aes_xts_encrypt ce_aes_xts_encrypt
#define aes_xts_decrypt ce_aes_xts_decrypt
#define aes_mac_update ce_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#else
#define MODE "neon"
@ -44,11 +46,15 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#define aes_ctr_encrypt neon_aes_ctr_encrypt
#define aes_xts_encrypt neon_aes_xts_encrypt
#define aes_xts_decrypt neon_aes_xts_decrypt
#define aes_mac_update neon_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");
MODULE_ALIAS_CRYPTO("cmac(aes)");
MODULE_ALIAS_CRYPTO("xcbc(aes)");
MODULE_ALIAS_CRYPTO("cbcmac(aes)");
#endif
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
@ -75,11 +81,25 @@ asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
int rounds, int blocks, u8 const rk2[], u8 iv[],
int first);
asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
int blocks, u8 dg[], int enc_before,
int enc_after);
struct crypto_aes_xts_ctx {
struct crypto_aes_ctx key1;
struct crypto_aes_ctx __aligned(8) key2;
};
struct mac_tfm_ctx {
struct crypto_aes_ctx key;
u8 __aligned(8) consts[];
};
struct mac_desc_ctx {
unsigned int len;
u8 dg[AES_BLOCK_SIZE];
};
static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
@ -215,14 +235,15 @@ static int ctr_encrypt(struct skcipher_request *req)
u8 *tsrc = walk.src.virt.addr;
/*
* Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
* to tell aes_ctr_encrypt() to only read half a block.
* Tell aes_ctr_encrypt() to process a tail block.
*/
blocks = (nbytes <= 8) ? -1 : 1;
blocks = -1;
aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
blocks, walk.iv, first);
memcpy(tdst, tail, nbytes);
if (tdst != tsrc)
memcpy(tdst, tsrc, nbytes);
crypto_xor(tdst, tail, nbytes);
err = skcipher_walk_done(&walk, 0);
}
kernel_neon_end();
@ -282,7 +303,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@ -298,7 +318,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@ -315,7 +334,22 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = skcipher_aes_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
.base = {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-" MODE,
.cra_priority = PRIO - 1,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@ -333,7 +367,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = 2 * AES_MIN_KEY_SIZE,
@ -344,15 +377,228 @@ static struct skcipher_alg aes_algs[] = { {
.decrypt = xts_decrypt,
} };
static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
unsigned int key_len)
{
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
int err;
err = aes_expandkey(&ctx->key, in_key, key_len);
if (err)
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return err;
}
static void cmac_gf128_mul_by_x(be128 *y, const be128 *x)
{
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
y->a = cpu_to_be64((a << 1) | (b >> 63));
y->b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
}
static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
unsigned int key_len)
{
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
be128 *consts = (be128 *)ctx->consts;
u8 *rk = (u8 *)ctx->key.key_enc;
int rounds = 6 + key_len / 4;
int err;
err = cbcmac_setkey(tfm, in_key, key_len);
if (err)
return err;
/* encrypt the zero vector */
kernel_neon_begin();
aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1, 1);
kernel_neon_end();
cmac_gf128_mul_by_x(consts, consts);
cmac_gf128_mul_by_x(consts + 1, consts);
return 0;
}
static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
unsigned int key_len)
{
static u8 const ks[3][AES_BLOCK_SIZE] = {
{ [0 ... AES_BLOCK_SIZE - 1] = 0x1 },
{ [0 ... AES_BLOCK_SIZE - 1] = 0x2 },
{ [0 ... AES_BLOCK_SIZE - 1] = 0x3 },
};
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
u8 *rk = (u8 *)ctx->key.key_enc;
int rounds = 6 + key_len / 4;
u8 key[AES_BLOCK_SIZE];
int err;
err = cbcmac_setkey(tfm, in_key, key_len);
if (err)
return err;
kernel_neon_begin();
aes_ecb_encrypt(key, ks[0], rk, rounds, 1, 1);
aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2, 0);
kernel_neon_end();
return cbcmac_setkey(tfm, key, sizeof(key));
}
static int mac_init(struct shash_desc *desc)
{
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
memset(ctx->dg, 0, AES_BLOCK_SIZE);
ctx->len = 0;
return 0;
}
static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
int rounds = 6 + tctx->key.key_length / 4;
while (len > 0) {
unsigned int l;
if ((ctx->len % AES_BLOCK_SIZE) == 0 &&
(ctx->len + len) > AES_BLOCK_SIZE) {
int blocks = len / AES_BLOCK_SIZE;
len %= AES_BLOCK_SIZE;
kernel_neon_begin();
aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
ctx->dg, (ctx->len != 0), (len != 0));
kernel_neon_end();
p += blocks * AES_BLOCK_SIZE;
if (!len) {
ctx->len = AES_BLOCK_SIZE;
break;
}
ctx->len = 0;
}
l = min(len, AES_BLOCK_SIZE - ctx->len);
if (l <= AES_BLOCK_SIZE) {
crypto_xor(ctx->dg + ctx->len, p, l);
ctx->len += l;
len -= l;
p += l;
}
}
return 0;
}
static int cbcmac_final(struct shash_desc *desc, u8 *out)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
int rounds = 6 + tctx->key.key_length / 4;
kernel_neon_begin();
aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
kernel_neon_end();
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
return 0;
}
static int cmac_final(struct shash_desc *desc, u8 *out)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
int rounds = 6 + tctx->key.key_length / 4;
u8 *consts = tctx->consts;
if (ctx->len != AES_BLOCK_SIZE) {
ctx->dg[ctx->len] ^= 0x80;
consts += AES_BLOCK_SIZE;
}
kernel_neon_begin();
aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
kernel_neon_end();
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
return 0;
}
static struct shash_alg mac_algs[] = { {
.base.cra_name = "cmac(aes)",
.base.cra_driver_name = "cmac-aes-" MODE,
.base.cra_priority = PRIO,
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
2 * AES_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.digestsize = AES_BLOCK_SIZE,
.init = mac_init,
.update = mac_update,
.final = cmac_final,
.setkey = cmac_setkey,
.descsize = sizeof(struct mac_desc_ctx),
}, {
.base.cra_name = "xcbc(aes)",
.base.cra_driver_name = "xcbc-aes-" MODE,
.base.cra_priority = PRIO,
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
2 * AES_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.digestsize = AES_BLOCK_SIZE,
.init = mac_init,
.update = mac_update,
.final = cmac_final,
.setkey = xcbc_setkey,
.descsize = sizeof(struct mac_desc_ctx),
}, {
.base.cra_name = "cbcmac(aes)",
.base.cra_driver_name = "cbcmac-aes-" MODE,
.base.cra_priority = PRIO,
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx),
.base.cra_module = THIS_MODULE,
.digestsize = AES_BLOCK_SIZE,
.init = mac_init,
.update = mac_update,
.final = cbcmac_final,
.setkey = cbcmac_setkey,
.descsize = sizeof(struct mac_desc_ctx),
} };
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
static void aes_exit(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
simd_skcipher_free(aes_simd_algs[i]);
for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
if (aes_simd_algs[i])
simd_skcipher_free(aes_simd_algs[i]);
crypto_unregister_shashes(mac_algs, ARRAY_SIZE(mac_algs));
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
@ -369,7 +615,14 @@ static int __init aes_init(void)
if (err)
return err;
err = crypto_register_shashes(mac_algs, ARRAY_SIZE(mac_algs));
if (err)
goto unregister_ciphers;
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
continue;
algname = aes_algs[i].base.cra_name + 2;
drvname = aes_algs[i].base.cra_driver_name + 2;
basename = aes_algs[i].base.cra_driver_name;
@ -385,6 +638,8 @@ static int __init aes_init(void)
unregister_simds:
aes_exit();
unregister_ciphers:
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
return err;
}
@ -392,5 +647,7 @@ unregister_simds:
module_cpu_feature_match(AES, aes_init);
#else
module_init(aes_init);
EXPORT_SYMBOL(neon_aes_ecb_encrypt);
EXPORT_SYMBOL(neon_aes_cbc_encrypt);
#endif
module_exit(aes_exit);

View File

@ -1,7 +1,7 @@
/*
* linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
*
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@ -337,7 +337,7 @@ AES_ENTRY(aes_ctr_encrypt)
.Lctrcarrydone:
subs w4, w4, #1
bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
bmi .Lctrtailblock /* blocks <0 means tail block */
ld1 {v3.16b}, [x1], #16
eor v3.16b, v0.16b, v3.16b
st1 {v3.16b}, [x0], #16
@ -348,10 +348,8 @@ AES_ENTRY(aes_ctr_encrypt)
FRAME_POP
ret
.Lctrhalfblock:
ld1 {v3.8b}, [x1]
eor v3.8b, v0.8b, v3.8b
st1 {v3.8b}, [x0]
.Lctrtailblock:
st1 {v0.16b}, [x0]
FRAME_POP
ret
@ -527,3 +525,30 @@ AES_ENTRY(aes_xts_decrypt)
FRAME_POP
ret
AES_ENDPROC(aes_xts_decrypt)
/*
* aes_mac_update(u8 const in[], u32 const rk[], int rounds,
* int blocks, u8 dg[], int enc_before, int enc_after)
*/
AES_ENTRY(aes_mac_update)
ld1 {v0.16b}, [x4] /* get dg */
enc_prepare w2, x1, x7
cbnz w5, .Lmacenc
.Lmacloop:
cbz w3, .Lmacout
ld1 {v1.16b}, [x0], #16 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
subs w3, w3, #1
csinv x5, x6, xzr, eq
cbz w5, .Lmacout
.Lmacenc:
encrypt_block v0, w2, x1, x7, w8
b .Lmacloop
.Lmacout:
st1 {v0.16b}, [x4] /* return dg */
ret
AES_ENDPROC(aes_mac_update)

View File

@ -1,7 +1,7 @@
/*
* linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
*
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
* Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@ -17,17 +17,25 @@
/* multiply by polynomial 'x' in GF(2^8) */
.macro mul_by_x, out, in, temp, const
sshr \temp, \in, #7
add \out, \in, \in
shl \out, \in, #1
and \temp, \temp, \const
eor \out, \out, \temp
.endm
/* multiply by polynomial 'x^2' in GF(2^8) */
.macro mul_by_x2, out, in, temp, const
ushr \temp, \in, #6
shl \out, \in, #2
pmul \temp, \temp, \const
eor \out, \out, \temp
.endm
/* preload the entire Sbox */
.macro prepare, sbox, shiftrows, temp
adr \temp, \sbox
movi v12.16b, #0x40
movi v12.16b, #0x1b
ldr q13, \shiftrows
movi v14.16b, #0x1b
ldr q14, .Lror32by8
ld1 {v16.16b-v19.16b}, [\temp], #64
ld1 {v20.16b-v23.16b}, [\temp], #64
ld1 {v24.16b-v27.16b}, [\temp], #64
@ -50,37 +58,31 @@
/* apply SubBytes transformation using the the preloaded Sbox */
.macro sub_bytes, in
sub v9.16b, \in\().16b, v12.16b
sub v9.16b, \in\().16b, v15.16b
tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
sub v10.16b, v9.16b, v12.16b
sub v10.16b, v9.16b, v15.16b
tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
sub v11.16b, v10.16b, v12.16b
sub v11.16b, v10.16b, v15.16b
tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
.endm
/* apply MixColumns transformation */
.macro mix_columns, in
mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
rev32 v8.8h, \in\().8h
eor \in\().16b, v10.16b, \in\().16b
shl v9.4s, v8.4s, #24
shl v11.4s, \in\().4s, #24
sri v9.4s, v8.4s, #8
sri v11.4s, \in\().4s, #8
eor v9.16b, v9.16b, v8.16b
eor v10.16b, v10.16b, v9.16b
eor \in\().16b, v10.16b, v11.16b
.endm
.macro mix_columns, in, enc
.if \enc == 0
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
.macro inv_mix_columns, in
mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
eor \in\().16b, \in\().16b, v11.16b
rev32 v11.8h, v11.8h
eor \in\().16b, \in\().16b, v11.16b
mix_columns \in
mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
eor \in\().16b, \in\().16b, v8.16b
rev32 v8.8h, v8.8h
eor \in\().16b, \in\().16b, v8.16b
.endif
mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
rev32 v8.8h, \in\().8h
eor v8.16b, v8.16b, v9.16b
eor \in\().16b, \in\().16b, v8.16b
tbl \in\().16b, {\in\().16b}, v14.16b
eor \in\().16b, \in\().16b, v8.16b
.endm
.macro do_block, enc, in, rounds, rk, rkp, i
@ -88,16 +90,13 @@
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
movi v15.16b, #0x40
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
sub_bytes \in
ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
ld1 {v15.4s}, [\rkp], #16
beq 2222f
.if \enc == 1
mix_columns \in
.else
inv_mix_columns \in
.endif
mix_columns \in, \enc
b 1111b
2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
.endm
@ -116,139 +115,114 @@
*/
.macro sub_bytes_2x, in0, in1
sub v8.16b, \in0\().16b, v12.16b
sub v9.16b, \in1\().16b, v12.16b
sub v8.16b, \in0\().16b, v15.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
sub v9.16b, \in1\().16b, v15.16b
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
sub v10.16b, v8.16b, v12.16b
sub v11.16b, v9.16b, v12.16b
sub v10.16b, v8.16b, v15.16b
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
sub v11.16b, v9.16b, v15.16b
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
sub v8.16b, v10.16b, v12.16b
sub v9.16b, v11.16b, v12.16b
sub v8.16b, v10.16b, v15.16b
tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
sub v9.16b, v11.16b, v15.16b
tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
.endm
.macro sub_bytes_4x, in0, in1, in2, in3
sub v8.16b, \in0\().16b, v12.16b
sub v8.16b, \in0\().16b, v15.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
sub v9.16b, \in1\().16b, v12.16b
sub v9.16b, \in1\().16b, v15.16b
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
sub v10.16b, \in2\().16b, v12.16b
sub v10.16b, \in2\().16b, v15.16b
tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
sub v11.16b, \in3\().16b, v12.16b
sub v11.16b, \in3\().16b, v15.16b
tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
sub v8.16b, v8.16b, v12.16b
sub v8.16b, v8.16b, v15.16b
tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
sub v9.16b, v9.16b, v12.16b
sub v9.16b, v9.16b, v15.16b
tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
sub v10.16b, v10.16b, v12.16b
sub v10.16b, v10.16b, v15.16b
tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
sub v11.16b, v11.16b, v12.16b
sub v11.16b, v11.16b, v15.16b
tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
sub v8.16b, v8.16b, v12.16b
sub v8.16b, v8.16b, v15.16b
tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
sub v9.16b, v9.16b, v12.16b
sub v9.16b, v9.16b, v15.16b
tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
sub v10.16b, v10.16b, v12.16b
sub v10.16b, v10.16b, v15.16b
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
sub v11.16b, v11.16b, v12.16b
sub v11.16b, v11.16b, v15.16b
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
.endm
.macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
sshr \tmp0\().16b, \in0\().16b, #7
add \out0\().16b, \in0\().16b, \in0\().16b
sshr \tmp1\().16b, \in1\().16b, #7
sshr \tmp0\().16b, \in0\().16b, #7
shl \out0\().16b, \in0\().16b, #1
sshr \tmp1\().16b, \in1\().16b, #7
and \tmp0\().16b, \tmp0\().16b, \const\().16b
add \out1\().16b, \in1\().16b, \in1\().16b
shl \out1\().16b, \in1\().16b, #1
and \tmp1\().16b, \tmp1\().16b, \const\().16b
eor \out0\().16b, \out0\().16b, \tmp0\().16b
eor \out1\().16b, \out1\().16b, \tmp1\().16b
.endm
.macro mix_columns_2x, in0, in1
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
.macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
ushr \tmp0\().16b, \in0\().16b, #6
shl \out0\().16b, \in0\().16b, #2
ushr \tmp1\().16b, \in1\().16b, #6
pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
shl \out1\().16b, \in1\().16b, #2
pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
eor \out0\().16b, \out0\().16b, \tmp0\().16b
eor \out1\().16b, \out1\().16b, \tmp1\().16b
.endm
.macro mix_columns_2x, in0, in1, enc
.if \enc == 0
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
eor \in0\().16b, \in0\().16b, v8.16b
rev32 v8.8h, v8.8h
eor \in1\().16b, \in1\().16b, v9.16b
rev32 v9.8h, v9.8h
eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
.endif
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
rev32 v10.8h, \in0\().8h
rev32 v11.8h, \in1\().8h
eor \in0\().16b, v8.16b, \in0\().16b
eor \in1\().16b, v9.16b, \in1\().16b
shl v12.4s, v10.4s, #24
shl v13.4s, v11.4s, #24
eor v8.16b, v8.16b, v10.16b
sri v12.4s, v10.4s, #8
shl v10.4s, \in0\().4s, #24
eor v9.16b, v9.16b, v11.16b
sri v13.4s, v11.4s, #8
shl v11.4s, \in1\().4s, #24
sri v10.4s, \in0\().4s, #8
eor \in0\().16b, v8.16b, v12.16b
sri v11.4s, \in1\().4s, #8
eor \in1\().16b, v9.16b, v13.16b
eor \in0\().16b, v10.16b, \in0\().16b
eor \in1\().16b, v11.16b, \in1\().16b
eor v10.16b, v10.16b, v8.16b
eor v11.16b, v11.16b, v9.16b
eor \in0\().16b, \in0\().16b, v10.16b
eor \in1\().16b, \in1\().16b, v11.16b
tbl \in0\().16b, {\in0\().16b}, v14.16b
tbl \in1\().16b, {\in1\().16b}, v14.16b
eor \in0\().16b, \in0\().16b, v10.16b
eor \in1\().16b, \in1\().16b, v11.16b
.endm
.macro inv_mix_cols_2x, in0, in1
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
rev32 v8.8h, v8.8h
rev32 v9.8h, v9.8h
eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
mix_columns_2x \in0, \in1
.endm
.macro inv_mix_cols_4x, in0, in1, in2, in3
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
eor \in2\().16b, \in2\().16b, v10.16b
eor \in3\().16b, \in3\().16b, v11.16b
rev32 v8.8h, v8.8h
rev32 v9.8h, v9.8h
rev32 v10.8h, v10.8h
rev32 v11.8h, v11.8h
eor \in0\().16b, \in0\().16b, v8.16b
eor \in1\().16b, \in1\().16b, v9.16b
eor \in2\().16b, \in2\().16b, v10.16b
eor \in3\().16b, \in3\().16b, v11.16b
mix_columns_2x \in0, \in1
mix_columns_2x \in2, \in3
.endm
.macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
.macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
sub_bytes_2x \in0, \in1
movi v15.16b, #0x40
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
ld1 {v15.4s}, [\rkp], #16
sub_bytes_2x \in0, \in1
subs \i, \i, #1
ld1 {v15.4s}, [\rkp], #16
beq 2222f
.if \enc == 1
mix_columns_2x \in0, \in1
ldr q13, .LForward_ShiftRows
.else
inv_mix_cols_2x \in0, \in1
ldr q13, .LReverse_ShiftRows
.endif
movi v12.16b, #0x40
mix_columns_2x \in0, \in1, \enc
b 1111b
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
@ -262,23 +236,17 @@
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
sub_bytes_4x \in0, \in1, \in2, \in3
movi v15.16b, #0x40
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
ld1 {v15.4s}, [\rkp], #16
sub_bytes_4x \in0, \in1, \in2, \in3
subs \i, \i, #1
ld1 {v15.4s}, [\rkp], #16
beq 2222f
.if \enc == 1
mix_columns_2x \in0, \in1
mix_columns_2x \in2, \in3
ldr q13, .LForward_ShiftRows
.else
inv_mix_cols_4x \in0, \in1, \in2, \in3
ldr q13, .LReverse_ShiftRows
.endif
movi v12.16b, #0x40
mix_columns_2x \in0, \in1, \enc
mix_columns_2x \in2, \in3, \enc
b 1111b
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
@ -305,19 +273,7 @@
#include "aes-modes.S"
.text
.align 4
.LForward_ShiftRows:
CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 )
CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb )
CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 )
CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 )
.LReverse_ShiftRows:
CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb )
CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 )
CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 )
CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
.align 6
.LForward_Sbox:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
@ -385,3 +341,12 @@ CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
.LForward_ShiftRows:
.octa 0x0b06010c07020d08030e09040f0a0500
.LReverse_ShiftRows:
.octa 0x0306090c0f0205080b0e0104070a0d00
.Lror32by8:
.octa 0x0c0f0e0d080b0a090407060500030201

View File

@ -0,0 +1,972 @@
/*
* Bit sliced AES using NEON instructions
*
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
/*
* The algorithm implemented here is described in detail by the paper
* 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
* Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
*
* This implementation is based primarily on the OpenSSL implementation
* for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
rounds .req x11
bskey .req x12
.macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
eor \b2, \b2, \b1
eor \b5, \b5, \b6
eor \b3, \b3, \b0
eor \b6, \b6, \b2
eor \b5, \b5, \b0
eor \b6, \b6, \b3
eor \b3, \b3, \b7
eor \b7, \b7, \b5
eor \b3, \b3, \b4
eor \b4, \b4, \b5
eor \b2, \b2, \b7
eor \b3, \b3, \b1
eor \b1, \b1, \b5
.endm
.macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
eor \b0, \b0, \b6
eor \b1, \b1, \b4
eor \b4, \b4, \b6
eor \b2, \b2, \b0
eor \b6, \b6, \b1
eor \b1, \b1, \b5
eor \b5, \b5, \b3
eor \b3, \b3, \b7
eor \b7, \b7, \b5
eor \b2, \b2, \b5
eor \b4, \b4, \b7
.endm
.macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
eor \b1, \b1, \b7
eor \b4, \b4, \b7
eor \b7, \b7, \b5
eor \b1, \b1, \b3
eor \b2, \b2, \b5
eor \b3, \b3, \b7
eor \b6, \b6, \b1
eor \b2, \b2, \b0
eor \b5, \b5, \b3
eor \b4, \b4, \b6
eor \b0, \b0, \b6
eor \b1, \b1, \b4
.endm
.macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
eor \b1, \b1, \b5
eor \b2, \b2, \b7
eor \b3, \b3, \b1
eor \b4, \b4, \b5
eor \b7, \b7, \b5
eor \b3, \b3, \b4
eor \b5, \b5, \b0
eor \b3, \b3, \b7
eor \b6, \b6, \b2
eor \b2, \b2, \b1
eor \b6, \b6, \b3
eor \b3, \b3, \b0
eor \b5, \b5, \b6
.endm
.macro mul_gf4, x0, x1, y0, y1, t0, t1
eor \t0, \y0, \y1
and \t0, \t0, \x0
eor \x0, \x0, \x1
and \t1, \x1, \y0
and \x0, \x0, \y1
eor \x1, \t1, \t0
eor \x0, \x0, \t1
.endm
.macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
eor \t0, \y0, \y1
eor \t1, \y2, \y3
and \t0, \t0, \x0
and \t1, \t1, \x2
eor \x0, \x0, \x1
eor \x2, \x2, \x3
and \x1, \x1, \y0
and \x3, \x3, \y2
and \x0, \x0, \y1
and \x2, \x2, \y3
eor \x1, \x1, \x0
eor \x2, \x2, \x3
eor \x0, \x0, \t0
eor \x3, \x3, \t1
.endm
.macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
y0, y1, y2, y3, t0, t1, t2, t3
eor \t0, \x0, \x2
eor \t1, \x1, \x3
mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3
eor \y0, \y0, \y2
eor \y1, \y1, \y3
mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
eor \x0, \x0, \t0
eor \x2, \x2, \t0
eor \x1, \x1, \t1
eor \x3, \x3, \t1
eor \t0, \x4, \x6
eor \t1, \x5, \x7
mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
eor \y0, \y0, \y2
eor \y1, \y1, \y3
mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3
eor \x4, \x4, \t0
eor \x6, \x6, \t0
eor \x5, \x5, \t1
eor \x7, \x7, \t1
.endm
.macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
t0, t1, t2, t3, s0, s1, s2, s3
eor \t3, \x4, \x6
eor \t0, \x5, \x7
eor \t1, \x1, \x3
eor \s1, \x7, \x6
eor \s0, \x0, \x2
eor \s3, \t3, \t0
orr \t2, \t0, \t1
and \s2, \t3, \s0
orr \t3, \t3, \s0
eor \s0, \s0, \t1
and \t0, \t0, \t1
eor \t1, \x3, \x2
and \s3, \s3, \s0
and \s1, \s1, \t1
eor \t1, \x4, \x5
eor \s0, \x1, \x0
eor \t3, \t3, \s1
eor \t2, \t2, \s1
and \s1, \t1, \s0
orr \t1, \t1, \s0
eor \t3, \t3, \s3
eor \t0, \t0, \s1
eor \t2, \t2, \s2
eor \t1, \t1, \s3
eor \t0, \t0, \s2
and \s0, \x7, \x3
eor \t1, \t1, \s2
and \s1, \x6, \x2
and \s2, \x5, \x1
orr \s3, \x4, \x0
eor \t3, \t3, \s0
eor \t1, \t1, \s2
eor \s0, \t0, \s3
eor \t2, \t2, \s1
and \s2, \t3, \t1
eor \s1, \t2, \s2
eor \s3, \s0, \s2
bsl \s1, \t1, \s0
not \t0, \s0
bsl \s0, \s1, \s3
bsl \t0, \s1, \s3
bsl \s3, \t3, \t2
eor \t3, \t3, \t2
and \s2, \s0, \s3
eor \t1, \t1, \t0
eor \s2, \s2, \t3
mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
.endm
.macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
t0, t1, t2, t3, s0, s1, s2, s3
in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
.endm
.macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
t0, t1, t2, t3, s0, s1, s2, s3
inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
.endm
.macro enc_next_rk
ldp q16, q17, [bskey], #128
ldp q18, q19, [bskey, #-96]
ldp q20, q21, [bskey, #-64]
ldp q22, q23, [bskey, #-32]
.endm
.macro dec_next_rk
ldp q16, q17, [bskey, #-128]!
ldp q18, q19, [bskey, #32]
ldp q20, q21, [bskey, #64]
ldp q22, q23, [bskey, #96]
.endm
.macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
eor \x0\().16b, \x0\().16b, v16.16b
eor \x1\().16b, \x1\().16b, v17.16b
eor \x2\().16b, \x2\().16b, v18.16b
eor \x3\().16b, \x3\().16b, v19.16b
eor \x4\().16b, \x4\().16b, v20.16b
eor \x5\().16b, \x5\().16b, v21.16b
eor \x6\().16b, \x6\().16b, v22.16b
eor \x7\().16b, \x7\().16b, v23.16b
.endm
.macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
tbl \x0\().16b, {\x0\().16b}, \mask\().16b
tbl \x1\().16b, {\x1\().16b}, \mask\().16b
tbl \x2\().16b, {\x2\().16b}, \mask\().16b
tbl \x3\().16b, {\x3\().16b}, \mask\().16b
tbl \x4\().16b, {\x4\().16b}, \mask\().16b
tbl \x5\().16b, {\x5\().16b}, \mask\().16b
tbl \x6\().16b, {\x6\().16b}, \mask\().16b
tbl \x7\().16b, {\x7\().16b}, \mask\().16b
.endm
.macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
t0, t1, t2, t3, t4, t5, t6, t7, inv
ext \t0\().16b, \x0\().16b, \x0\().16b, #12
ext \t1\().16b, \x1\().16b, \x1\().16b, #12
eor \x0\().16b, \x0\().16b, \t0\().16b
ext \t2\().16b, \x2\().16b, \x2\().16b, #12
eor \x1\().16b, \x1\().16b, \t1\().16b
ext \t3\().16b, \x3\().16b, \x3\().16b, #12
eor \x2\().16b, \x2\().16b, \t2\().16b
ext \t4\().16b, \x4\().16b, \x4\().16b, #12
eor \x3\().16b, \x3\().16b, \t3\().16b
ext \t5\().16b, \x5\().16b, \x5\().16b, #12
eor \x4\().16b, \x4\().16b, \t4\().16b
ext \t6\().16b, \x6\().16b, \x6\().16b, #12
eor \x5\().16b, \x5\().16b, \t5\().16b
ext \t7\().16b, \x7\().16b, \x7\().16b, #12
eor \x6\().16b, \x6\().16b, \t6\().16b
eor \t1\().16b, \t1\().16b, \x0\().16b
eor \x7\().16b, \x7\().16b, \t7\().16b
ext \x0\().16b, \x0\().16b, \x0\().16b, #8
eor \t2\().16b, \t2\().16b, \x1\().16b
eor \t0\().16b, \t0\().16b, \x7\().16b
eor \t1\().16b, \t1\().16b, \x7\().16b
ext \x1\().16b, \x1\().16b, \x1\().16b, #8
eor \t5\().16b, \t5\().16b, \x4\().16b
eor \x0\().16b, \x0\().16b, \t0\().16b
eor \t6\().16b, \t6\().16b, \x5\().16b
eor \x1\().16b, \x1\().16b, \t1\().16b
ext \t0\().16b, \x4\().16b, \x4\().16b, #8
eor \t4\().16b, \t4\().16b, \x3\().16b
ext \t1\().16b, \x5\().16b, \x5\().16b, #8
eor \t7\().16b, \t7\().16b, \x6\().16b
ext \x4\().16b, \x3\().16b, \x3\().16b, #8
eor \t3\().16b, \t3\().16b, \x2\().16b
ext \x5\().16b, \x7\().16b, \x7\().16b, #8
eor \t4\().16b, \t4\().16b, \x7\().16b
ext \x3\().16b, \x6\().16b, \x6\().16b, #8
eor \t3\().16b, \t3\().16b, \x7\().16b
ext \x6\().16b, \x2\().16b, \x2\().16b, #8
eor \x7\().16b, \t1\().16b, \t5\().16b
.ifb \inv
eor \x2\().16b, \t0\().16b, \t4\().16b
eor \x4\().16b, \x4\().16b, \t3\().16b
eor \x5\().16b, \x5\().16b, \t7\().16b
eor \x3\().16b, \x3\().16b, \t6\().16b
eor \x6\().16b, \x6\().16b, \t2\().16b
.else
eor \t3\().16b, \t3\().16b, \x4\().16b
eor \x5\().16b, \x5\().16b, \t7\().16b
eor \x2\().16b, \x3\().16b, \t6\().16b
eor \x3\().16b, \t0\().16b, \t4\().16b
eor \x4\().16b, \x6\().16b, \t2\().16b
mov \x6\().16b, \t3\().16b
.endif
.endm
.macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
t0, t1, t2, t3, t4, t5, t6, t7
ext \t0\().16b, \x0\().16b, \x0\().16b, #8
ext \t6\().16b, \x6\().16b, \x6\().16b, #8
ext \t7\().16b, \x7\().16b, \x7\().16b, #8
eor \t0\().16b, \t0\().16b, \x0\().16b
ext \t1\().16b, \x1\().16b, \x1\().16b, #8
eor \t6\().16b, \t6\().16b, \x6\().16b
ext \t2\().16b, \x2\().16b, \x2\().16b, #8
eor \t7\().16b, \t7\().16b, \x7\().16b
ext \t3\().16b, \x3\().16b, \x3\().16b, #8
eor \t1\().16b, \t1\().16b, \x1\().16b
ext \t4\().16b, \x4\().16b, \x4\().16b, #8
eor \t2\().16b, \t2\().16b, \x2\().16b
ext \t5\().16b, \x5\().16b, \x5\().16b, #8
eor \t3\().16b, \t3\().16b, \x3\().16b
eor \t4\().16b, \t4\().16b, \x4\().16b
eor \t5\().16b, \t5\().16b, \x5\().16b
eor \x0\().16b, \x0\().16b, \t6\().16b
eor \x1\().16b, \x1\().16b, \t6\().16b
eor \x2\().16b, \x2\().16b, \t0\().16b
eor \x4\().16b, \x4\().16b, \t2\().16b
eor \x3\().16b, \x3\().16b, \t1\().16b
eor \x1\().16b, \x1\().16b, \t7\().16b
eor \x2\().16b, \x2\().16b, \t7\().16b
eor \x4\().16b, \x4\().16b, \t6\().16b
eor \x5\().16b, \x5\().16b, \t3\().16b
eor \x3\().16b, \x3\().16b, \t6\().16b
eor \x6\().16b, \x6\().16b, \t4\().16b
eor \x4\().16b, \x4\().16b, \t7\().16b
eor \x5\().16b, \x5\().16b, \t7\().16b
eor \x7\().16b, \x7\().16b, \t5\().16b
mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
.endm
.macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
ushr \t0\().2d, \b0\().2d, #\n
ushr \t1\().2d, \b1\().2d, #\n
eor \t0\().16b, \t0\().16b, \a0\().16b
eor \t1\().16b, \t1\().16b, \a1\().16b
and \t0\().16b, \t0\().16b, \mask\().16b
and \t1\().16b, \t1\().16b, \mask\().16b
eor \a0\().16b, \a0\().16b, \t0\().16b
shl \t0\().2d, \t0\().2d, #\n
eor \a1\().16b, \a1\().16b, \t1\().16b
shl \t1\().2d, \t1\().2d, #\n
eor \b0\().16b, \b0\().16b, \t0\().16b
eor \b1\().16b, \b1\().16b, \t1\().16b
.endm
.macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
movi \t0\().16b, #0x55
movi \t1\().16b, #0x33
swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
movi \t0\().16b, #0x0f
swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
.endm
.align 6
M0: .octa 0x0004080c0105090d02060a0e03070b0f
M0SR: .octa 0x0004080c05090d010a0e02060f03070b
SR: .octa 0x0f0e0d0c0a09080b0504070600030201
SRM0: .octa 0x01060b0c0207080d0304090e00050a0f
M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03
ISR: .octa 0x0f0e0d0c080b0a090504070602010003
ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
/*
* void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
*/
ENTRY(aesbs_convert_key)
ld1 {v7.4s}, [x1], #16 // load round 0 key
ld1 {v17.4s}, [x1], #16 // load round 1 key
movi v8.16b, #0x01 // bit masks
movi v9.16b, #0x02
movi v10.16b, #0x04
movi v11.16b, #0x08
movi v12.16b, #0x10
movi v13.16b, #0x20
movi v14.16b, #0x40
movi v15.16b, #0x80
ldr q16, M0
sub x2, x2, #1
str q7, [x0], #16 // save round 0 key
.Lkey_loop:
tbl v7.16b ,{v17.16b}, v16.16b
ld1 {v17.4s}, [x1], #16 // load next round key
cmtst v0.16b, v7.16b, v8.16b
cmtst v1.16b, v7.16b, v9.16b
cmtst v2.16b, v7.16b, v10.16b
cmtst v3.16b, v7.16b, v11.16b
cmtst v4.16b, v7.16b, v12.16b
cmtst v5.16b, v7.16b, v13.16b
cmtst v6.16b, v7.16b, v14.16b
cmtst v7.16b, v7.16b, v15.16b
not v0.16b, v0.16b
not v1.16b, v1.16b
not v5.16b, v5.16b
not v6.16b, v6.16b
subs x2, x2, #1
stp q0, q1, [x0], #128
stp q2, q3, [x0, #-96]
stp q4, q5, [x0, #-64]
stp q6, q7, [x0, #-32]
b.ne .Lkey_loop
movi v7.16b, #0x63 // compose .L63
eor v17.16b, v17.16b, v7.16b
str q17, [x0]
ret
ENDPROC(aesbs_convert_key)
.align 4
aesbs_encrypt8:
ldr q9, [bskey], #16 // round 0 key
ldr q8, M0SR
ldr q24, SR
eor v10.16b, v0.16b, v9.16b // xor with round0 key
eor v11.16b, v1.16b, v9.16b
tbl v0.16b, {v10.16b}, v8.16b
eor v12.16b, v2.16b, v9.16b
tbl v1.16b, {v11.16b}, v8.16b
eor v13.16b, v3.16b, v9.16b
tbl v2.16b, {v12.16b}, v8.16b
eor v14.16b, v4.16b, v9.16b
tbl v3.16b, {v13.16b}, v8.16b
eor v15.16b, v5.16b, v9.16b
tbl v4.16b, {v14.16b}, v8.16b
eor v10.16b, v6.16b, v9.16b
tbl v5.16b, {v15.16b}, v8.16b
eor v11.16b, v7.16b, v9.16b
tbl v6.16b, {v10.16b}, v8.16b
tbl v7.16b, {v11.16b}, v8.16b
bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
sub rounds, rounds, #1
b .Lenc_sbox
.Lenc_loop:
shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
.Lenc_sbox:
sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
v13, v14, v15
subs rounds, rounds, #1
b.cc .Lenc_done
enc_next_rk
mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
v13, v14, v15
add_round_key v0, v1, v2, v3, v4, v5, v6, v7
b.ne .Lenc_loop
ldr q24, SRM0
b .Lenc_loop
.Lenc_done:
ldr q12, [bskey] // last round key
bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
eor v0.16b, v0.16b, v12.16b
eor v1.16b, v1.16b, v12.16b
eor v4.16b, v4.16b, v12.16b
eor v6.16b, v6.16b, v12.16b
eor v3.16b, v3.16b, v12.16b
eor v7.16b, v7.16b, v12.16b
eor v2.16b, v2.16b, v12.16b
eor v5.16b, v5.16b, v12.16b
ret
ENDPROC(aesbs_encrypt8)
.align 4
aesbs_decrypt8:
lsl x9, rounds, #7
add bskey, bskey, x9
ldr q9, [bskey, #-112]! // round 0 key
ldr q8, M0ISR
ldr q24, ISR
eor v10.16b, v0.16b, v9.16b // xor with round0 key
eor v11.16b, v1.16b, v9.16b
tbl v0.16b, {v10.16b}, v8.16b
eor v12.16b, v2.16b, v9.16b
tbl v1.16b, {v11.16b}, v8.16b
eor v13.16b, v3.16b, v9.16b
tbl v2.16b, {v12.16b}, v8.16b
eor v14.16b, v4.16b, v9.16b
tbl v3.16b, {v13.16b}, v8.16b
eor v15.16b, v5.16b, v9.16b
tbl v4.16b, {v14.16b}, v8.16b
eor v10.16b, v6.16b, v9.16b
tbl v5.16b, {v15.16b}, v8.16b
eor v11.16b, v7.16b, v9.16b
tbl v6.16b, {v10.16b}, v8.16b
tbl v7.16b, {v11.16b}, v8.16b
bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
sub rounds, rounds, #1
b .Ldec_sbox
.Ldec_loop:
shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
.Ldec_sbox:
inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
v13, v14, v15
subs rounds, rounds, #1
b.cc .Ldec_done
dec_next_rk
add_round_key v0, v1, v6, v4, v2, v7, v3, v5
inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
v13, v14, v15
b.ne .Ldec_loop
ldr q24, ISRM0
b .Ldec_loop
.Ldec_done:
ldr q12, [bskey, #-16] // last round key
bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
eor v0.16b, v0.16b, v12.16b
eor v1.16b, v1.16b, v12.16b
eor v6.16b, v6.16b, v12.16b
eor v4.16b, v4.16b, v12.16b
eor v2.16b, v2.16b, v12.16b
eor v7.16b, v7.16b, v12.16b
eor v3.16b, v3.16b, v12.16b
eor v5.16b, v5.16b, v12.16b
ret
ENDPROC(aesbs_decrypt8)
/*
* aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks)
* aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks)
*/
.macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
stp x29, x30, [sp, #-16]!
mov x29, sp
99: mov x5, #1
lsl x5, x5, x4
subs w4, w4, #8
csel x4, x4, xzr, pl
csel x5, x5, xzr, mi
ld1 {v0.16b}, [x1], #16
tbnz x5, #1, 0f
ld1 {v1.16b}, [x1], #16
tbnz x5, #2, 0f
ld1 {v2.16b}, [x1], #16
tbnz x5, #3, 0f
ld1 {v3.16b}, [x1], #16
tbnz x5, #4, 0f
ld1 {v4.16b}, [x1], #16
tbnz x5, #5, 0f
ld1 {v5.16b}, [x1], #16
tbnz x5, #6, 0f
ld1 {v6.16b}, [x1], #16
tbnz x5, #7, 0f
ld1 {v7.16b}, [x1], #16
0: mov bskey, x2
mov rounds, x3
bl \do8
st1 {\o0\().16b}, [x0], #16
tbnz x5, #1, 1f
st1 {\o1\().16b}, [x0], #16
tbnz x5, #2, 1f
st1 {\o2\().16b}, [x0], #16
tbnz x5, #3, 1f
st1 {\o3\().16b}, [x0], #16
tbnz x5, #4, 1f
st1 {\o4\().16b}, [x0], #16
tbnz x5, #5, 1f
st1 {\o5\().16b}, [x0], #16
tbnz x5, #6, 1f
st1 {\o6\().16b}, [x0], #16
tbnz x5, #7, 1f
st1 {\o7\().16b}, [x0], #16
cbnz x4, 99b
1: ldp x29, x30, [sp], #16
ret
.endm
.align 4
ENTRY(aesbs_ecb_encrypt)
__ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
ENDPROC(aesbs_ecb_encrypt)
.align 4
ENTRY(aesbs_ecb_decrypt)
__ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
ENDPROC(aesbs_ecb_decrypt)
/*
* aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
*/
.align 4
ENTRY(aesbs_cbc_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
99: mov x6, #1
lsl x6, x6, x4
subs w4, w4, #8
csel x4, x4, xzr, pl
csel x6, x6, xzr, mi
ld1 {v0.16b}, [x1], #16
mov v25.16b, v0.16b
tbnz x6, #1, 0f
ld1 {v1.16b}, [x1], #16
mov v26.16b, v1.16b
tbnz x6, #2, 0f
ld1 {v2.16b}, [x1], #16
mov v27.16b, v2.16b
tbnz x6, #3, 0f
ld1 {v3.16b}, [x1], #16
mov v28.16b, v3.16b
tbnz x6, #4, 0f
ld1 {v4.16b}, [x1], #16
mov v29.16b, v4.16b
tbnz x6, #5, 0f
ld1 {v5.16b}, [x1], #16
mov v30.16b, v5.16b
tbnz x6, #6, 0f
ld1 {v6.16b}, [x1], #16
mov v31.16b, v6.16b
tbnz x6, #7, 0f
ld1 {v7.16b}, [x1]
0: mov bskey, x2
mov rounds, x3
bl aesbs_decrypt8
ld1 {v24.16b}, [x5] // load IV
eor v1.16b, v1.16b, v25.16b
eor v6.16b, v6.16b, v26.16b
eor v4.16b, v4.16b, v27.16b
eor v2.16b, v2.16b, v28.16b
eor v7.16b, v7.16b, v29.16b
eor v0.16b, v0.16b, v24.16b
eor v3.16b, v3.16b, v30.16b
eor v5.16b, v5.16b, v31.16b
st1 {v0.16b}, [x0], #16
mov v24.16b, v25.16b
tbnz x6, #1, 1f
st1 {v1.16b}, [x0], #16
mov v24.16b, v26.16b
tbnz x6, #2, 1f
st1 {v6.16b}, [x0], #16
mov v24.16b, v27.16b
tbnz x6, #3, 1f
st1 {v4.16b}, [x0], #16
mov v24.16b, v28.16b
tbnz x6, #4, 1f
st1 {v2.16b}, [x0], #16
mov v24.16b, v29.16b
tbnz x6, #5, 1f
st1 {v7.16b}, [x0], #16
mov v24.16b, v30.16b
tbnz x6, #6, 1f
st1 {v3.16b}, [x0], #16
mov v24.16b, v31.16b
tbnz x6, #7, 1f
ld1 {v24.16b}, [x1], #16
st1 {v5.16b}, [x0], #16
1: st1 {v24.16b}, [x5] // store IV
cbnz x4, 99b
ldp x29, x30, [sp], #16
ret
ENDPROC(aesbs_cbc_decrypt)
.macro next_tweak, out, in, const, tmp
sshr \tmp\().2d, \in\().2d, #63
and \tmp\().16b, \tmp\().16b, \const\().16b
add \out\().2d, \in\().2d, \in\().2d
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
eor \out\().16b, \out\().16b, \tmp\().16b
.endm
.align 4
.Lxts_mul_x:
CPU_LE( .quad 1, 0x87 )
CPU_BE( .quad 0x87, 1 )
/*
* aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
* aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
*/
__xts_crypt8:
mov x6, #1
lsl x6, x6, x4
subs w4, w4, #8
csel x4, x4, xzr, pl
csel x6, x6, xzr, mi
ld1 {v0.16b}, [x1], #16
next_tweak v26, v25, v30, v31
eor v0.16b, v0.16b, v25.16b
tbnz x6, #1, 0f
ld1 {v1.16b}, [x1], #16
next_tweak v27, v26, v30, v31
eor v1.16b, v1.16b, v26.16b
tbnz x6, #2, 0f
ld1 {v2.16b}, [x1], #16
next_tweak v28, v27, v30, v31
eor v2.16b, v2.16b, v27.16b
tbnz x6, #3, 0f
ld1 {v3.16b}, [x1], #16
next_tweak v29, v28, v30, v31
eor v3.16b, v3.16b, v28.16b
tbnz x6, #4, 0f
ld1 {v4.16b}, [x1], #16
str q29, [sp, #16]
eor v4.16b, v4.16b, v29.16b
next_tweak v29, v29, v30, v31
tbnz x6, #5, 0f
ld1 {v5.16b}, [x1], #16
str q29, [sp, #32]
eor v5.16b, v5.16b, v29.16b
next_tweak v29, v29, v30, v31
tbnz x6, #6, 0f
ld1 {v6.16b}, [x1], #16
str q29, [sp, #48]
eor v6.16b, v6.16b, v29.16b
next_tweak v29, v29, v30, v31
tbnz x6, #7, 0f
ld1 {v7.16b}, [x1], #16
str q29, [sp, #64]
eor v7.16b, v7.16b, v29.16b
next_tweak v29, v29, v30, v31
0: mov bskey, x2
mov rounds, x3
br x7
ENDPROC(__xts_crypt8)
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
stp x29, x30, [sp, #-80]!
mov x29, sp
ldr q30, .Lxts_mul_x
ld1 {v25.16b}, [x5]
99: adr x7, \do8
bl __xts_crypt8
ldp q16, q17, [sp, #16]
ldp q18, q19, [sp, #48]
eor \o0\().16b, \o0\().16b, v25.16b
eor \o1\().16b, \o1\().16b, v26.16b
eor \o2\().16b, \o2\().16b, v27.16b
eor \o3\().16b, \o3\().16b, v28.16b
st1 {\o0\().16b}, [x0], #16
mov v25.16b, v26.16b
tbnz x6, #1, 1f
st1 {\o1\().16b}, [x0], #16
mov v25.16b, v27.16b
tbnz x6, #2, 1f
st1 {\o2\().16b}, [x0], #16
mov v25.16b, v28.16b
tbnz x6, #3, 1f
st1 {\o3\().16b}, [x0], #16
mov v25.16b, v29.16b
tbnz x6, #4, 1f
eor \o4\().16b, \o4\().16b, v16.16b
eor \o5\().16b, \o5\().16b, v17.16b
eor \o6\().16b, \o6\().16b, v18.16b
eor \o7\().16b, \o7\().16b, v19.16b
st1 {\o4\().16b}, [x0], #16
tbnz x6, #5, 1f
st1 {\o5\().16b}, [x0], #16
tbnz x6, #6, 1f
st1 {\o6\().16b}, [x0], #16
tbnz x6, #7, 1f
st1 {\o7\().16b}, [x0], #16
cbnz x4, 99b
1: st1 {v25.16b}, [x5]
ldp x29, x30, [sp], #80
ret
.endm
ENTRY(aesbs_xts_encrypt)
__xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
ENDPROC(aesbs_xts_encrypt)
ENTRY(aesbs_xts_decrypt)
__xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
ENDPROC(aesbs_xts_decrypt)
.macro next_ctr, v
mov \v\().d[1], x8
adds x8, x8, #1
mov \v\().d[0], x7
adc x7, x7, xzr
rev64 \v\().16b, \v\().16b
.endm
/*
* aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
* int rounds, int blocks, u8 iv[], u8 final[])
*/
ENTRY(aesbs_ctr_encrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
cmp x6, #0
cset x10, ne
add x4, x4, x10 // do one extra block if final
ldp x7, x8, [x5]
ld1 {v0.16b}, [x5]
CPU_LE( rev x7, x7 )
CPU_LE( rev x8, x8 )
adds x8, x8, #1
adc x7, x7, xzr
99: mov x9, #1
lsl x9, x9, x4
subs w4, w4, #8
csel x4, x4, xzr, pl
csel x9, x9, xzr, le
tbnz x9, #1, 0f
next_ctr v1
tbnz x9, #2, 0f
next_ctr v2
tbnz x9, #3, 0f
next_ctr v3
tbnz x9, #4, 0f
next_ctr v4
tbnz x9, #5, 0f
next_ctr v5
tbnz x9, #6, 0f
next_ctr v6
tbnz x9, #7, 0f
next_ctr v7
0: mov bskey, x2
mov rounds, x3
bl aesbs_encrypt8
lsr x9, x9, x10 // disregard the extra block
tbnz x9, #0, 0f
ld1 {v8.16b}, [x1], #16
eor v0.16b, v0.16b, v8.16b
st1 {v0.16b}, [x0], #16
tbnz x9, #1, 1f
ld1 {v9.16b}, [x1], #16
eor v1.16b, v1.16b, v9.16b
st1 {v1.16b}, [x0], #16
tbnz x9, #2, 2f
ld1 {v10.16b}, [x1], #16
eor v4.16b, v4.16b, v10.16b
st1 {v4.16b}, [x0], #16
tbnz x9, #3, 3f
ld1 {v11.16b}, [x1], #16
eor v6.16b, v6.16b, v11.16b
st1 {v6.16b}, [x0], #16
tbnz x9, #4, 4f
ld1 {v12.16b}, [x1], #16
eor v3.16b, v3.16b, v12.16b
st1 {v3.16b}, [x0], #16
tbnz x9, #5, 5f
ld1 {v13.16b}, [x1], #16
eor v7.16b, v7.16b, v13.16b
st1 {v7.16b}, [x0], #16
tbnz x9, #6, 6f
ld1 {v14.16b}, [x1], #16
eor v2.16b, v2.16b, v14.16b
st1 {v2.16b}, [x0], #16
tbnz x9, #7, 7f
ld1 {v15.16b}, [x1], #16
eor v5.16b, v5.16b, v15.16b
st1 {v5.16b}, [x0], #16
8: next_ctr v0
cbnz x4, 99b
0: st1 {v0.16b}, [x5]
ldp x29, x30, [sp], #16
ret
/*
* If we are handling the tail of the input (x6 != NULL), return the
* final keystream block back to the caller.
*/
1: cbz x6, 8b
st1 {v1.16b}, [x6]
b 8b
2: cbz x6, 8b
st1 {v4.16b}, [x6]
b 8b
3: cbz x6, 8b
st1 {v6.16b}, [x6]
b 8b
4: cbz x6, 8b
st1 {v3.16b}, [x6]
b 8b
5: cbz x6, 8b
st1 {v7.16b}, [x6]
b 8b
6: cbz x6, 8b
st1 {v2.16b}, [x6]
b 8b
7: cbz x6, 8b
st1 {v5.16b}, [x6]
b 8b
ENDPROC(aesbs_ctr_encrypt)

View File

@ -0,0 +1,439 @@
/*
* Bit sliced AES using NEON instructions
*
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
#include <linux/module.h>
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");
asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks);
asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks);
asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[], u8 final[]);
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
/* borrowed from aes-neon-blk.ko */
asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, int first);
asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[],
int first);
struct aesbs_ctx {
u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32];
int rounds;
} __aligned(AES_BLOCK_SIZE);
struct aesbs_cbc_ctx {
struct aesbs_ctx key;
u32 enc[AES_MAX_KEYLENGTH_U32];
};
struct aesbs_xts_ctx {
struct aesbs_ctx key;
u32 twkey[AES_MAX_KEYLENGTH_U32];
};
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_aes_ctx rk;
int err;
err = crypto_aes_expand_key(&rk, in_key, key_len);
if (err)
return err;
ctx->rounds = 6 + key_len / 4;
kernel_neon_begin();
aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
kernel_neon_end();
return 0;
}
static int __ecb_crypt(struct skcipher_request *req,
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks))
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
if (walk.nbytes < walk.total)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
ctx->rounds, blocks);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int ecb_encrypt(struct skcipher_request *req)
{
return __ecb_crypt(req, aesbs_ecb_encrypt);
}
static int ecb_decrypt(struct skcipher_request *req)
{
return __ecb_crypt(req, aesbs_ecb_decrypt);
}
static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_aes_ctx rk;
int err;
err = crypto_aes_expand_key(&rk, in_key, key_len);
if (err)
return err;
ctx->key.rounds = 6 + key_len / 4;
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
kernel_neon_begin();
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
kernel_neon_end();
return 0;
}
static int cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err, first = 1;
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
/* fall back to the non-bitsliced NEON implementation */
neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->enc, ctx->key.rounds, blocks, walk.iv,
first);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
first = 0;
}
kernel_neon_end();
return err;
}
static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
if (walk.nbytes < walk.total)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key.rk, ctx->key.rounds, blocks,
walk.iv);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
u8 buf[AES_BLOCK_SIZE];
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while (walk.nbytes > 0) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
if (walk.nbytes < walk.total) {
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
final = NULL;
}
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->rk, ctx->rounds, blocks, walk.iv, final);
if (final) {
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
if (dst != src)
memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, 0);
break;
}
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_aes_ctx rk;
int err;
err = xts_verify_key(tfm, in_key, key_len);
if (err)
return err;
key_len /= 2;
err = crypto_aes_expand_key(&rk, in_key + key_len, key_len);
if (err)
return err;
memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey));
return aesbs_setkey(tfm, in_key, key_len);
}
static int __xts_crypt(struct skcipher_request *req,
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]))
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey,
ctx->key.rounds, 1, 1);
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
if (walk.nbytes < walk.total)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
ctx->key.rounds, blocks, walk.iv);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int xts_encrypt(struct skcipher_request *req)
{
return __xts_crypt(req, aesbs_xts_encrypt);
}
static int xts_decrypt(struct skcipher_request *req)
{
return __xts_crypt(req, aesbs_xts_decrypt);
}
static struct skcipher_alg aes_algs[] = { {
.base.cra_name = "__ecb(aes)",
.base.cra_driver_name = "__ecb-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
.base.cra_module = THIS_MODULE,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.setkey = aesbs_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "__cbc(aes)",
.base.cra_driver_name = "__cbc-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.base.cra_module = THIS_MODULE,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_cbc_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "__ctr(aes)",
.base.cra_driver_name = "__ctr-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
.base.cra_module = THIS_MODULE,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.chunksize = AES_BLOCK_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
.base.cra_name = "ctr(aes)",
.base.cra_driver_name = "ctr-aes-neonbs",
.base.cra_priority = 250 - 1,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.chunksize = AES_BLOCK_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
.base.cra_name = "__xts(aes)",
.base.cra_driver_name = "__xts-aes-neonbs",
.base.cra_priority = 250,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
.base.cra_module = THIS_MODULE,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_xts_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
} };
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
static void aes_exit(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
if (aes_simd_algs[i])
simd_skcipher_free(aes_simd_algs[i]);
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
static int __init aes_init(void)
{
struct simd_skcipher_alg *simd;
const char *basename;
const char *algname;
const char *drvname;
int err;
int i;
if (!(elf_hwcap & HWCAP_ASIMD))
return -ENODEV;
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
if (err)
return err;
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
continue;
algname = aes_algs[i].base.cra_name + 2;
drvname = aes_algs[i].base.cra_driver_name + 2;
basename = aes_algs[i].base.cra_driver_name;
simd = simd_skcipher_create_compat(algname, drvname, basename);
err = PTR_ERR(simd);
if (IS_ERR(simd))
goto unregister_simds;
aes_simd_algs[i] = simd;
}
return 0;
unregister_simds:
aes_exit();
return err;
}
module_init(aes_init);
module_exit(aes_exit);

View File

@ -0,0 +1,450 @@
/*
* ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
*
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Based on:
* ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
*
* Copyright (C) 2015 Martin Willi
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/linkage.h>
.text
.align 6
ENTRY(chacha20_block_xor_neon)
// x0: Input state matrix, s
// x1: 1 data block output, o
// x2: 1 data block input, i
//
// This function encrypts one ChaCha20 block by loading the state matrix
// in four NEON registers. It performs matrix operation on four words in
// parallel, but requires shuffling to rearrange the words after each
// round.
//
// x0..3 = s0..3
adr x3, ROT8
ld1 {v0.4s-v3.4s}, [x0]
ld1 {v8.4s-v11.4s}, [x0]
ld1 {v12.4s}, [x3]
mov x3, #10
.Ldoubleround:
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
add v0.4s, v0.4s, v1.4s
eor v3.16b, v3.16b, v0.16b
rev32 v3.8h, v3.8h
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
add v2.4s, v2.4s, v3.4s
eor v4.16b, v1.16b, v2.16b
shl v1.4s, v4.4s, #12
sri v1.4s, v4.4s, #20
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
add v0.4s, v0.4s, v1.4s
eor v3.16b, v3.16b, v0.16b
tbl v3.16b, {v3.16b}, v12.16b
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
add v2.4s, v2.4s, v3.4s
eor v4.16b, v1.16b, v2.16b
shl v1.4s, v4.4s, #7
sri v1.4s, v4.4s, #25
// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
ext v1.16b, v1.16b, v1.16b, #4
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
ext v2.16b, v2.16b, v2.16b, #8
// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
ext v3.16b, v3.16b, v3.16b, #12
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
add v0.4s, v0.4s, v1.4s
eor v3.16b, v3.16b, v0.16b
rev32 v3.8h, v3.8h
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
add v2.4s, v2.4s, v3.4s
eor v4.16b, v1.16b, v2.16b
shl v1.4s, v4.4s, #12
sri v1.4s, v4.4s, #20
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
add v0.4s, v0.4s, v1.4s
eor v3.16b, v3.16b, v0.16b
tbl v3.16b, {v3.16b}, v12.16b
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
add v2.4s, v2.4s, v3.4s
eor v4.16b, v1.16b, v2.16b
shl v1.4s, v4.4s, #7
sri v1.4s, v4.4s, #25
// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
ext v1.16b, v1.16b, v1.16b, #12
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
ext v2.16b, v2.16b, v2.16b, #8
// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
ext v3.16b, v3.16b, v3.16b, #4
subs x3, x3, #1
b.ne .Ldoubleround
ld1 {v4.16b-v7.16b}, [x2]
// o0 = i0 ^ (x0 + s0)
add v0.4s, v0.4s, v8.4s
eor v0.16b, v0.16b, v4.16b
// o1 = i1 ^ (x1 + s1)
add v1.4s, v1.4s, v9.4s
eor v1.16b, v1.16b, v5.16b
// o2 = i2 ^ (x2 + s2)
add v2.4s, v2.4s, v10.4s
eor v2.16b, v2.16b, v6.16b
// o3 = i3 ^ (x3 + s3)
add v3.4s, v3.4s, v11.4s
eor v3.16b, v3.16b, v7.16b
st1 {v0.16b-v3.16b}, [x1]
ret
ENDPROC(chacha20_block_xor_neon)
.align 6
ENTRY(chacha20_4block_xor_neon)
// x0: Input state matrix, s
// x1: 4 data blocks output, o
// x2: 4 data blocks input, i
//
// This function encrypts four consecutive ChaCha20 blocks by loading
// the state matrix in NEON registers four times. The algorithm performs
// each operation on the corresponding word of each state matrix, hence
// requires no word shuffling. For final XORing step we transpose the
// matrix by interleaving 32- and then 64-bit words, which allows us to
// do XOR in NEON registers.
//
adr x3, CTRINC // ... and ROT8
ld1 {v30.4s-v31.4s}, [x3]
// x0..15[0-3] = s0..3[0..3]
mov x4, x0
ld4r { v0.4s- v3.4s}, [x4], #16
ld4r { v4.4s- v7.4s}, [x4], #16
ld4r { v8.4s-v11.4s}, [x4], #16
ld4r {v12.4s-v15.4s}, [x4]
// x12 += counter values 0-3
add v12.4s, v12.4s, v30.4s
mov x3, #10
.Ldoubleround4:
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
add v0.4s, v0.4s, v4.4s
add v1.4s, v1.4s, v5.4s
add v2.4s, v2.4s, v6.4s
add v3.4s, v3.4s, v7.4s
eor v12.16b, v12.16b, v0.16b
eor v13.16b, v13.16b, v1.16b
eor v14.16b, v14.16b, v2.16b
eor v15.16b, v15.16b, v3.16b
rev32 v12.8h, v12.8h
rev32 v13.8h, v13.8h
rev32 v14.8h, v14.8h
rev32 v15.8h, v15.8h
// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
add v8.4s, v8.4s, v12.4s
add v9.4s, v9.4s, v13.4s
add v10.4s, v10.4s, v14.4s
add v11.4s, v11.4s, v15.4s
eor v16.16b, v4.16b, v8.16b
eor v17.16b, v5.16b, v9.16b
eor v18.16b, v6.16b, v10.16b
eor v19.16b, v7.16b, v11.16b
shl v4.4s, v16.4s, #12
shl v5.4s, v17.4s, #12
shl v6.4s, v18.4s, #12
shl v7.4s, v19.4s, #12
sri v4.4s, v16.4s, #20
sri v5.4s, v17.4s, #20
sri v6.4s, v18.4s, #20
sri v7.4s, v19.4s, #20
// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
add v0.4s, v0.4s, v4.4s
add v1.4s, v1.4s, v5.4s
add v2.4s, v2.4s, v6.4s
add v3.4s, v3.4s, v7.4s
eor v12.16b, v12.16b, v0.16b
eor v13.16b, v13.16b, v1.16b
eor v14.16b, v14.16b, v2.16b
eor v15.16b, v15.16b, v3.16b
tbl v12.16b, {v12.16b}, v31.16b
tbl v13.16b, {v13.16b}, v31.16b
tbl v14.16b, {v14.16b}, v31.16b
tbl v15.16b, {v15.16b}, v31.16b
// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
add v8.4s, v8.4s, v12.4s
add v9.4s, v9.4s, v13.4s
add v10.4s, v10.4s, v14.4s
add v11.4s, v11.4s, v15.4s
eor v16.16b, v4.16b, v8.16b
eor v17.16b, v5.16b, v9.16b
eor v18.16b, v6.16b, v10.16b
eor v19.16b, v7.16b, v11.16b
shl v4.4s, v16.4s, #7
shl v5.4s, v17.4s, #7
shl v6.4s, v18.4s, #7
shl v7.4s, v19.4s, #7
sri v4.4s, v16.4s, #25
sri v5.4s, v17.4s, #25
sri v6.4s, v18.4s, #25
sri v7.4s, v19.4s, #25
// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
add v0.4s, v0.4s, v5.4s
add v1.4s, v1.4s, v6.4s
add v2.4s, v2.4s, v7.4s
add v3.4s, v3.4s, v4.4s
eor v15.16b, v15.16b, v0.16b
eor v12.16b, v12.16b, v1.16b
eor v13.16b, v13.16b, v2.16b
eor v14.16b, v14.16b, v3.16b
rev32 v15.8h, v15.8h
rev32 v12.8h, v12.8h
rev32 v13.8h, v13.8h
rev32 v14.8h, v14.8h
// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
add v10.4s, v10.4s, v15.4s
add v11.4s, v11.4s, v12.4s
add v8.4s, v8.4s, v13.4s
add v9.4s, v9.4s, v14.4s
eor v16.16b, v5.16b, v10.16b
eor v17.16b, v6.16b, v11.16b
eor v18.16b, v7.16b, v8.16b
eor v19.16b, v4.16b, v9.16b
shl v5.4s, v16.4s, #12
shl v6.4s, v17.4s, #12
shl v7.4s, v18.4s, #12
shl v4.4s, v19.4s, #12
sri v5.4s, v16.4s, #20
sri v6.4s, v17.4s, #20
sri v7.4s, v18.4s, #20
sri v4.4s, v19.4s, #20
// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
add v0.4s, v0.4s, v5.4s
add v1.4s, v1.4s, v6.4s
add v2.4s, v2.4s, v7.4s
add v3.4s, v3.4s, v4.4s
eor v15.16b, v15.16b, v0.16b
eor v12.16b, v12.16b, v1.16b
eor v13.16b, v13.16b, v2.16b
eor v14.16b, v14.16b, v3.16b
tbl v15.16b, {v15.16b}, v31.16b
tbl v12.16b, {v12.16b}, v31.16b
tbl v13.16b, {v13.16b}, v31.16b
tbl v14.16b, {v14.16b}, v31.16b
// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
add v10.4s, v10.4s, v15.4s
add v11.4s, v11.4s, v12.4s
add v8.4s, v8.4s, v13.4s
add v9.4s, v9.4s, v14.4s
eor v16.16b, v5.16b, v10.16b
eor v17.16b, v6.16b, v11.16b
eor v18.16b, v7.16b, v8.16b
eor v19.16b, v4.16b, v9.16b
shl v5.4s, v16.4s, #7
shl v6.4s, v17.4s, #7
shl v7.4s, v18.4s, #7
shl v4.4s, v19.4s, #7
sri v5.4s, v16.4s, #25
sri v6.4s, v17.4s, #25
sri v7.4s, v18.4s, #25
sri v4.4s, v19.4s, #25
subs x3, x3, #1
b.ne .Ldoubleround4
ld4r {v16.4s-v19.4s}, [x0], #16
ld4r {v20.4s-v23.4s}, [x0], #16
// x12 += counter values 0-3
add v12.4s, v12.4s, v30.4s
// x0[0-3] += s0[0]
// x1[0-3] += s0[1]
// x2[0-3] += s0[2]
// x3[0-3] += s0[3]
add v0.4s, v0.4s, v16.4s
add v1.4s, v1.4s, v17.4s
add v2.4s, v2.4s, v18.4s
add v3.4s, v3.4s, v19.4s
ld4r {v24.4s-v27.4s}, [x0], #16
ld4r {v28.4s-v31.4s}, [x0]
// x4[0-3] += s1[0]
// x5[0-3] += s1[1]
// x6[0-3] += s1[2]
// x7[0-3] += s1[3]
add v4.4s, v4.4s, v20.4s
add v5.4s, v5.4s, v21.4s
add v6.4s, v6.4s, v22.4s
add v7.4s, v7.4s, v23.4s
// x8[0-3] += s2[0]
// x9[0-3] += s2[1]
// x10[0-3] += s2[2]
// x11[0-3] += s2[3]
add v8.4s, v8.4s, v24.4s
add v9.4s, v9.4s, v25.4s
add v10.4s, v10.4s, v26.4s
add v11.4s, v11.4s, v27.4s
// x12[0-3] += s3[0]
// x13[0-3] += s3[1]
// x14[0-3] += s3[2]
// x15[0-3] += s3[3]
add v12.4s, v12.4s, v28.4s
add v13.4s, v13.4s, v29.4s
add v14.4s, v14.4s, v30.4s
add v15.4s, v15.4s, v31.4s
// interleave 32-bit words in state n, n+1
zip1 v16.4s, v0.4s, v1.4s
zip2 v17.4s, v0.4s, v1.4s
zip1 v18.4s, v2.4s, v3.4s
zip2 v19.4s, v2.4s, v3.4s
zip1 v20.4s, v4.4s, v5.4s
zip2 v21.4s, v4.4s, v5.4s
zip1 v22.4s, v6.4s, v7.4s
zip2 v23.4s, v6.4s, v7.4s
zip1 v24.4s, v8.4s, v9.4s
zip2 v25.4s, v8.4s, v9.4s
zip1 v26.4s, v10.4s, v11.4s
zip2 v27.4s, v10.4s, v11.4s
zip1 v28.4s, v12.4s, v13.4s
zip2 v29.4s, v12.4s, v13.4s
zip1 v30.4s, v14.4s, v15.4s
zip2 v31.4s, v14.4s, v15.4s
// interleave 64-bit words in state n, n+2
zip1 v0.2d, v16.2d, v18.2d
zip2 v4.2d, v16.2d, v18.2d
zip1 v8.2d, v17.2d, v19.2d
zip2 v12.2d, v17.2d, v19.2d
ld1 {v16.16b-v19.16b}, [x2], #64
zip1 v1.2d, v20.2d, v22.2d
zip2 v5.2d, v20.2d, v22.2d
zip1 v9.2d, v21.2d, v23.2d
zip2 v13.2d, v21.2d, v23.2d
ld1 {v20.16b-v23.16b}, [x2], #64
zip1 v2.2d, v24.2d, v26.2d
zip2 v6.2d, v24.2d, v26.2d
zip1 v10.2d, v25.2d, v27.2d
zip2 v14.2d, v25.2d, v27.2d
ld1 {v24.16b-v27.16b}, [x2], #64
zip1 v3.2d, v28.2d, v30.2d
zip2 v7.2d, v28.2d, v30.2d
zip1 v11.2d, v29.2d, v31.2d
zip2 v15.2d, v29.2d, v31.2d
ld1 {v28.16b-v31.16b}, [x2]
// xor with corresponding input, write to output
eor v16.16b, v16.16b, v0.16b
eor v17.16b, v17.16b, v1.16b
eor v18.16b, v18.16b, v2.16b
eor v19.16b, v19.16b, v3.16b
eor v20.16b, v20.16b, v4.16b
eor v21.16b, v21.16b, v5.16b
st1 {v16.16b-v19.16b}, [x1], #64
eor v22.16b, v22.16b, v6.16b
eor v23.16b, v23.16b, v7.16b
eor v24.16b, v24.16b, v8.16b
eor v25.16b, v25.16b, v9.16b
st1 {v20.16b-v23.16b}, [x1], #64
eor v26.16b, v26.16b, v10.16b
eor v27.16b, v27.16b, v11.16b
eor v28.16b, v28.16b, v12.16b
st1 {v24.16b-v27.16b}, [x1], #64
eor v29.16b, v29.16b, v13.16b
eor v30.16b, v30.16b, v14.16b
eor v31.16b, v31.16b, v15.16b
st1 {v28.16b-v31.16b}, [x1]
ret
ENDPROC(chacha20_4block_xor_neon)
CTRINC: .word 0, 1, 2, 3
ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f

View File

@ -0,0 +1,126 @@
/*
* ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
*
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Based on:
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
*
* Copyright (C) 2015 Martin Willi
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <crypto/algapi.h>
#include <crypto/chacha20.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes)
{
u8 buf[CHACHA20_BLOCK_SIZE];
while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
chacha20_4block_xor_neon(state, dst, src);
bytes -= CHACHA20_BLOCK_SIZE * 4;
src += CHACHA20_BLOCK_SIZE * 4;
dst += CHACHA20_BLOCK_SIZE * 4;
state[12] += 4;
}
while (bytes >= CHACHA20_BLOCK_SIZE) {
chacha20_block_xor_neon(state, dst, src);
bytes -= CHACHA20_BLOCK_SIZE;
src += CHACHA20_BLOCK_SIZE;
dst += CHACHA20_BLOCK_SIZE;
state[12]++;
}
if (bytes) {
memcpy(buf, src, bytes);
chacha20_block_xor_neon(state, buf, buf);
memcpy(dst, buf, bytes);
}
}
static int chacha20_neon(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
u32 state[16];
int err;
if (req->cryptlen <= CHACHA20_BLOCK_SIZE)
return crypto_chacha20_crypt(req);
err = skcipher_walk_virt(&walk, req, true);
crypto_chacha20_init(state, ctx, walk.iv);
kernel_neon_begin();
while (walk.nbytes > 0) {
unsigned int nbytes = walk.nbytes;
if (nbytes < walk.total)
nbytes = round_down(nbytes, walk.stride);
chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
nbytes);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
kernel_neon_end();
return err;
}
static struct skcipher_alg alg = {
.base.cra_name = "chacha20",
.base.cra_driver_name = "chacha20-neon",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CHACHA20_KEY_SIZE,
.max_keysize = CHACHA20_KEY_SIZE,
.ivsize = CHACHA20_IV_SIZE,
.chunksize = CHACHA20_BLOCK_SIZE,
.walksize = 4 * CHACHA20_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
.encrypt = chacha20_neon,
.decrypt = chacha20_neon,
};
static int __init chacha20_simd_mod_init(void)
{
if (!(elf_hwcap & HWCAP_ASIMD))
return -ENODEV;
return crypto_register_skcipher(&alg);
}
static void __exit chacha20_simd_mod_fini(void)
{
crypto_unregister_skcipher(&alg);
}
module_init(chacha20_simd_mod_init);
module_exit(chacha20_simd_mod_fini);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("chacha20");

View File

@ -1,290 +0,0 @@
/*
* crc32-arm64.c - CRC32 and CRC32C using optional ARMv8 instructions
*
* Module based on crypto/crc32c_generic.c
*
* CRC32 loop taken from Ed Nevill's Hadoop CRC patch
* http://mail-archives.apache.org/mod_mbox/hadoop-common-dev/201406.mbox/%3C1403687030.3355.19.camel%40localhost.localdomain%3E
*
* Using inline assembly instead of intrinsics in order to be backwards
* compatible with older compilers.
*
* Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/unaligned/access_ok.h>
#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <crypto/internal/hash.h>
MODULE_AUTHOR("Yazen Ghannam <yazen.ghannam@linaro.org>");
MODULE_DESCRIPTION("CRC32 and CRC32C using optional ARMv8 instructions");
MODULE_LICENSE("GPL v2");
#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
static u32 crc32_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
{
s64 length = len;
while ((length -= sizeof(u64)) >= 0) {
CRC32X(crc, get_unaligned_le64(p));
p += sizeof(u64);
}
/* The following is more efficient than the straight loop */
if (length & sizeof(u32)) {
CRC32W(crc, get_unaligned_le32(p));
p += sizeof(u32);
}
if (length & sizeof(u16)) {
CRC32H(crc, get_unaligned_le16(p));
p += sizeof(u16);
}
if (length & sizeof(u8))
CRC32B(crc, *p);
return crc;
}
static u32 crc32c_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
{
s64 length = len;
while ((length -= sizeof(u64)) >= 0) {
CRC32CX(crc, get_unaligned_le64(p));
p += sizeof(u64);
}
/* The following is more efficient than the straight loop */
if (length & sizeof(u32)) {
CRC32CW(crc, get_unaligned_le32(p));
p += sizeof(u32);
}
if (length & sizeof(u16)) {
CRC32CH(crc, get_unaligned_le16(p));
p += sizeof(u16);
}
if (length & sizeof(u8))
CRC32CB(crc, *p);
return crc;
}
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
struct chksum_ctx {
u32 key;
};
struct chksum_desc_ctx {
u32 crc;
};
static int chksum_init(struct shash_desc *desc)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = mctx->key;
return 0;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set
* the seed.
*/
static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen)
{
struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
if (keylen != sizeof(mctx->key)) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
mctx->key = get_unaligned_le32(key);
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc32_arm64_le_hw(ctx->crc, data, length);
return 0;
}
static int chksumc_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc32c_arm64_le_hw(ctx->crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
put_unaligned_le32(ctx->crc, out);
return 0;
}
static int chksumc_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
put_unaligned_le32(~ctx->crc, out);
return 0;
}
static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
{
put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
return 0;
}
static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
{
put_unaligned_le32(~crc32c_arm64_le_hw(crc, data, len), out);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(ctx->crc, data, len, out);
}
static int chksumc_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksumc_finup(ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
return __chksum_finup(mctx->key, data, length, out);
}
static int chksumc_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
return __chksumc_finup(mctx->key, data, length, out);
}
static int crc32_cra_init(struct crypto_tfm *tfm)
{
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
mctx->key = 0;
return 0;
}
static int crc32c_cra_init(struct crypto_tfm *tfm)
{
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
mctx->key = ~0;
return 0;
}
static struct shash_alg crc32_alg = {
.digestsize = CHKSUM_DIGEST_SIZE,
.setkey = chksum_setkey,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crc32",
.cra_driver_name = "crc32-arm64-hw",
.cra_priority = 300,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
.cra_init = crc32_cra_init,
}
};
static struct shash_alg crc32c_alg = {
.digestsize = CHKSUM_DIGEST_SIZE,
.setkey = chksum_setkey,
.init = chksum_init,
.update = chksumc_update,
.final = chksumc_final,
.finup = chksumc_finup,
.digest = chksumc_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-arm64-hw",
.cra_priority = 300,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_alignmask = 0,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
.cra_init = crc32c_cra_init,
}
};
static int __init crc32_mod_init(void)
{
int err;
err = crypto_register_shash(&crc32_alg);
if (err)
return err;
err = crypto_register_shash(&crc32c_alg);
if (err) {
crypto_unregister_shash(&crc32_alg);
return err;
}
return 0;
}
static void __exit crc32_mod_exit(void)
{
crypto_unregister_shash(&crc32_alg);
crypto_unregister_shash(&crc32c_alg);
}
module_cpu_feature_match(CRC32, crc32_mod_init);
module_exit(crc32_mod_exit);

View File

@ -72,6 +72,24 @@ static int crc32_pmull_init(struct shash_desc *desc)
return 0;
}
static int crc32_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
u32 *crc = shash_desc_ctx(desc);
*crc = crc32_armv8_le(*crc, data, length);
return 0;
}
static int crc32c_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
u32 *crc = shash_desc_ctx(desc);
*crc = crc32c_armv8_le(*crc, data, length);
return 0;
}
static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
@ -156,7 +174,7 @@ static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
static struct shash_alg crc32_pmull_algs[] = { {
.setkey = crc32_pmull_setkey,
.init = crc32_pmull_init,
.update = crc32_pmull_update,
.update = crc32_update,
.final = crc32_pmull_final,
.descsize = sizeof(u32),
.digestsize = sizeof(u32),
@ -171,7 +189,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
}, {
.setkey = crc32_pmull_setkey,
.init = crc32_pmull_init,
.update = crc32c_pmull_update,
.update = crc32c_update,
.final = crc32c_pmull_final,
.descsize = sizeof(u32),
.digestsize = sizeof(u32),
@ -187,14 +205,20 @@ static struct shash_alg crc32_pmull_algs[] = { {
static int __init crc32_pmull_mod_init(void)
{
if (elf_hwcap & HWCAP_CRC32) {
fallback_crc32 = crc32_armv8_le;
fallback_crc32c = crc32c_armv8_le;
} else {
fallback_crc32 = crc32_le;
fallback_crc32c = __crc32c_le;
}
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
crc32_pmull_algs[0].update = crc32_pmull_update;
crc32_pmull_algs[1].update = crc32c_pmull_update;
if (elf_hwcap & HWCAP_CRC32) {
fallback_crc32 = crc32_armv8_le;
fallback_crc32c = crc32c_armv8_le;
} else {
fallback_crc32 = crc32_le;
fallback_crc32c = __crc32c_le;
}
} else if (!(elf_hwcap & HWCAP_CRC32)) {
return -ENODEV;
}
return crypto_register_shashes(crc32_pmull_algs,
ARRAY_SIZE(crc32_pmull_algs));
}
@ -205,7 +229,12 @@ static void __exit crc32_pmull_mod_exit(void)
ARRAY_SIZE(crc32_pmull_algs));
}
module_cpu_feature_match(PMULL, crc32_pmull_mod_init);
static const struct cpu_feature crc32_cpu_feature[] = {
{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
};
MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
module_init(crc32_pmull_mod_init);
module_exit(crc32_pmull_mod_exit);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");

View File

@ -46,27 +46,48 @@
#ifdef __x86_64__
.data
# constants in mergeable sections, linker can reorder and merge
.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
.align 16
.Lgf128mul_x_ble_mask:
.octa 0x00000000000000010000000000000087
.section .rodata.cst16.POLY, "aM", @progbits, 16
.align 16
POLY: .octa 0xC2000000000000000000000000000001
.section .rodata.cst16.TWOONE, "aM", @progbits, 16
.align 16
TWOONE: .octa 0x00000001000000000000000000000001
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
.align 16
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
.section .rodata.cst16.MASK1, "aM", @progbits, 16
.align 16
MASK1: .octa 0x0000000000000000ffffffffffffffff
.section .rodata.cst16.MASK2, "aM", @progbits, 16
.align 16
MASK2: .octa 0xffffffffffffffff0000000000000000
.section .rodata.cst16.ONE, "aM", @progbits, 16
.align 16
ONE: .octa 0x00000000000000000000000000000001
.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16
.align 16
F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
.section .rodata.cst16.dec, "aM", @progbits, 16
.align 16
dec: .octa 0x1
.section .rodata.cst16.enc, "aM", @progbits, 16
.align 16
enc: .octa 0x2
# order of these constants should not change.
# more specifically, ALL_F should follow SHIFT_MASK,
# and ZERO should follow ALL_F
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
MASK1: .octa 0x0000000000000000ffffffffffffffff
MASK2: .octa 0xffffffffffffffff0000000000000000
# and zero should follow ALL_F
.section .rodata, "a", @progbits
.align 16
SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
ZERO: .octa 0x00000000000000000000000000000000
ONE: .octa 0x00000000000000000000000000000001
F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
dec: .octa 0x1
enc: .octa 0x2
.octa 0x00000000000000000000000000000000
.text

View File

@ -122,22 +122,38 @@
#include <linux/linkage.h>
#include <asm/inst.h>
.data
# constants in mergeable sections, linker can reorder and merge
.section .rodata.cst16.POLY, "aM", @progbits, 16
.align 16
POLY: .octa 0xC2000000000000000000000000000001
.section .rodata.cst16.POLY2, "aM", @progbits, 16
.align 16
POLY2: .octa 0xC20000000000000000000001C2000000
.section .rodata.cst16.TWOONE, "aM", @progbits, 16
.align 16
TWOONE: .octa 0x00000001000000000000000000000001
# order of these constants should not change.
# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
.align 16
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
.section .rodata.cst16.ONE, "aM", @progbits, 16
.align 16
ONE: .octa 0x00000000000000000000000000000001
.section .rodata.cst16.ONEf, "aM", @progbits, 16
.align 16
ONEf: .octa 0x01000000000000000000000000000000
# order of these constants should not change.
# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F
.section .rodata, "a", @progbits
.align 16
SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
ZERO: .octa 0x00000000000000000000000000000000
ONE: .octa 0x00000000000000000000000000000001
ONEf: .octa 0x01000000000000000000000000000000
.octa 0x00000000000000000000000000000000
.text

View File

@ -740,9 +740,11 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
*((__be32 *)(iv+12)) = counter;
if (sg_is_last(req->src) &&
req->src->offset + req->src->length <= PAGE_SIZE &&
(!PageHighMem(sg_page(req->src)) ||
req->src->offset + req->src->length <= PAGE_SIZE) &&
sg_is_last(req->dst) &&
req->dst->offset + req->dst->length <= PAGE_SIZE) {
(!PageHighMem(sg_page(req->dst)) ||
req->dst->offset + req->dst->length <= PAGE_SIZE)) {
one_entry_in_sg = 1;
scatterwalk_start(&src_sg_walk, req->src);
assoc = scatterwalk_map(&src_sg_walk);
@ -822,9 +824,11 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
*((__be32 *)(iv+12)) = counter;
if (sg_is_last(req->src) &&
req->src->offset + req->src->length <= PAGE_SIZE &&
(!PageHighMem(sg_page(req->src)) ||
req->src->offset + req->src->length <= PAGE_SIZE) &&
sg_is_last(req->dst) &&
req->dst->offset + req->dst->length <= PAGE_SIZE) {
(!PageHighMem(sg_page(req->dst)) ||
req->dst->offset + req->dst->length <= PAGE_SIZE)) {
one_entry_in_sg = 1;
scatterwalk_start(&src_sg_walk, req->src);
assoc = scatterwalk_map(&src_sg_walk);

View File

@ -571,7 +571,9 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
vmovdqu y6, 14 * 16(rio); \
vmovdqu y7, 15 * 16(rio);
.data
/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
.section .rodata.cst16, "aM", @progbits, 16
.align 16
#define SHUFB_BYTES(idx) \
@ -711,6 +713,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
/* 4-bit mask */
.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
.align 4
.L0f0f0f0f:
.long 0x0f0f0f0f

View File

@ -610,20 +610,25 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
vmovdqu y6, 14 * 32(rio); \
vmovdqu y7, 15 * 32(rio);
.data
.align 32
.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32
.align 32
#define SHUFB_BYTES(idx) \
0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
.Lshufb_16x16b:
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
.section .rodata.cst32.pack_bswap, "aM", @progbits, 32
.align 32
.Lpack_bswap:
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
.section .rodata.cst16, "aM", @progbits, 16
.align 16
/* For CTR-mode IV byteswap */
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
@ -750,6 +755,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
.align 4
/* 4-bit mask */
.L0f0f0f0f:

View File

@ -195,19 +195,29 @@
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1;
.data
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
.align 16
.Lbswap_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
.align 16
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.section .rodata.cst16.bswap_iv_mask, "aM", @progbits, 16
.align 16
.Lbswap_iv_mask:
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
.section .rodata.cst4.16_mask, "aM", @progbits, 4
.align 4
.L16_mask:
.byte 16, 16, 16, 16
.section .rodata.cst4.32_mask, "aM", @progbits, 4
.align 4
.L32_mask:
.byte 32, 0, 0, 0
.section .rodata.cst4.first_mask, "aM", @progbits, 4
.align 4
.Lfirst_mask:
.byte 0x1f, 0, 0, 0

View File

@ -225,8 +225,7 @@
vpshufb rmask, x2, x2; \
vpshufb rmask, x3, x3;
.data
.section .rodata.cst16, "aM", @progbits, 16
.align 16
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
@ -244,10 +243,19 @@
.byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0
.Lrkr_dec_QBAR_QBAR_QBAR_QBAR:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.section .rodata.cst4.L16_mask, "aM", @progbits, 4
.align 4
.L16_mask:
.byte 16, 16, 16, 16
.section .rodata.cst4.L32_mask, "aM", @progbits, 4
.align 4
.L32_mask:
.byte 32, 0, 0, 0
.section .rodata.cst4.first_mask, "aM", @progbits, 4
.align 4
.Lfirst_mask:
.byte 0x1f, 0, 0, 0

View File

@ -11,13 +11,18 @@
#include <linux/linkage.h>
.data
.section .rodata.cst32.ROT8, "aM", @progbits, 32
.align 32
ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
.octa 0x0e0d0c0f0a09080b0605040702010003
.section .rodata.cst32.ROT16, "aM", @progbits, 32
.align 32
ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
.octa 0x0d0c0f0e09080b0a0504070601000302
.section .rodata.cst32.CTRINC, "aM", @progbits, 32
.align 32
CTRINC: .octa 0x00000003000000020000000100000000
.octa 0x00000007000000060000000500000004

View File

@ -11,11 +11,14 @@
#include <linux/linkage.h>
.data
.section .rodata.cst16.ROT8, "aM", @progbits, 16
.align 16
ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
.section .rodata.cst16.ROT16, "aM", @progbits, 16
.align 16
ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
.section .rodata.cst16.CTRINC, "aM", @progbits, 16
.align 16
CTRINC: .octa 0x00000003000000020000000100000000
.text

View File

@ -11,7 +11,7 @@
#include <crypto/algapi.h>
#include <crypto/chacha20.h>
#include <linux/crypto.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
@ -63,36 +63,37 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
}
}
static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int chacha20_simd(struct skcipher_request *req)
{
u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1];
struct blkcipher_walk walk;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
u32 *state, state_buf[16 + 2] __aligned(8);
struct skcipher_walk walk;
int err;
if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd())
return crypto_chacha20_crypt(desc, dst, src, nbytes);
BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN);
if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
return crypto_chacha20_crypt(req);
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
err = skcipher_walk_virt(&walk, req, true);
crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
crypto_chacha20_init(state, ctx, walk.iv);
kernel_fpu_begin();
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % CHACHA20_BLOCK_SIZE);
err = skcipher_walk_done(&walk,
walk.nbytes % CHACHA20_BLOCK_SIZE);
}
if (walk.nbytes) {
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes);
err = blkcipher_walk_done(desc, &walk, 0);
err = skcipher_walk_done(&walk, 0);
}
kernel_fpu_end();
@ -100,27 +101,22 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
static struct crypto_alg alg = {
.cra_name = "chacha20",
.cra_driver_name = "chacha20-simd",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_type = &crypto_blkcipher_type,
.cra_ctxsize = sizeof(struct chacha20_ctx),
.cra_alignmask = sizeof(u32) - 1,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CHACHA20_KEY_SIZE,
.max_keysize = CHACHA20_KEY_SIZE,
.ivsize = CHACHA20_IV_SIZE,
.geniv = "seqiv",
.setkey = crypto_chacha20_setkey,
.encrypt = chacha20_simd,
.decrypt = chacha20_simd,
},
},
static struct skcipher_alg alg = {
.base.cra_name = "chacha20",
.base.cra_driver_name = "chacha20-simd",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
.base.cra_alignmask = sizeof(u32) - 1,
.base.cra_module = THIS_MODULE,
.min_keysize = CHACHA20_KEY_SIZE,
.max_keysize = CHACHA20_KEY_SIZE,
.ivsize = CHACHA20_IV_SIZE,
.chunksize = CHACHA20_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
.encrypt = chacha20_simd,
.decrypt = chacha20_simd,
};
static int __init chacha20_simd_mod_init(void)
@ -133,12 +129,12 @@ static int __init chacha20_simd_mod_init(void)
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#endif
return crypto_register_alg(&alg);
return crypto_register_skcipher(&alg);
}
static void __exit chacha20_simd_mod_fini(void)
{
crypto_unregister_alg(&alg);
crypto_unregister_skcipher(&alg);
}
module_init(chacha20_simd_mod_init);

View File

@ -312,7 +312,7 @@ do_return:
ret
ENDPROC(crc_pcl)
.section .rodata, "a", %progbits
.section .rodata, "a", @progbits
################################################################
## jump table Table is 129 entries x 2 bytes each
################################################################

View File

@ -554,12 +554,11 @@ _only_less_than_2:
ENDPROC(crc_t10dif_pcl)
.data
.section .rodata, "a", @progbits
.align 16
# precomputed constants
# these constants are precomputed from the poly:
# 0x8bb70000 (0x8bb7 scaled to 32 bits)
.align 16
# Q = 0x18BB70000
# rk1 = 2^(32*3) mod Q << 32
# rk2 = 2^(32*5) mod Q << 32
@ -613,14 +612,23 @@ rk20:
.section .rodata.cst16.mask1, "aM", @progbits, 16
.align 16
mask1:
.octa 0x80808080808080808080808080808080
.section .rodata.cst16.mask2, "aM", @progbits, 16
.align 16
mask2:
.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
.align 16
SHUF_MASK:
.octa 0x000102030405060708090A0B0C0D0E0F
.section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32
.align 32
pshufb_shf_table:
# use these values for shift constants for the pshufb instruction
# different alignments result in values as shown:

View File

@ -537,7 +537,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
ret;
ENDPROC(des3_ede_x86_64_crypt_blk_3way)
.data
.section .rodata, "a", @progbits
.align 16
.L_s1:
.quad 0x0010100001010400, 0x0000000000000000

View File

@ -20,8 +20,7 @@
#include <asm/inst.h>
#include <asm/frame.h>
.data
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
.align 16
.Lbswap_mask:
.octa 0x000102030405060708090a0b0c0d0e0f

View File

@ -11,11 +11,13 @@
#include <linux/linkage.h>
.data
.section .rodata.cst32.ANMASK, "aM", @progbits, 32
.align 32
ANMASK: .octa 0x0000000003ffffff0000000003ffffff
.octa 0x0000000003ffffff0000000003ffffff
.section .rodata.cst32.ORMASK, "aM", @progbits, 32
.align 32
ORMASK: .octa 0x00000000010000000000000001000000
.octa 0x00000000010000000000000001000000

View File

@ -11,10 +11,12 @@
#include <linux/linkage.h>
.data
.section .rodata.cst16.ANMASK, "aM", @progbits, 16
.align 16
ANMASK: .octa 0x0000000003ffffff0000000003ffffff
.section .rodata.cst16.ORMASK, "aM", @progbits, 16
.align 16
ORMASK: .octa 0x00000000010000000000000001000000
.text

View File

@ -29,11 +29,12 @@
.file "serpent-avx-x86_64-asm_64.S"
.data
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
.align 16
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
.align 16
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0

View File

@ -20,13 +20,18 @@
.file "serpent-avx2-asm_64.S"
.data
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
.align 16
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16
.align 16
.Lxts_gf128mul_and_shl1_mask_0:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16
.align 16
.Lxts_gf128mul_and_shl1_mask_1:
.byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0

View File

@ -281,11 +281,13 @@ ENTRY(sha1_mb_mgr_get_comp_job_avx2)
ret
ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
.data
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
.align 16
clear_low_nibble:
.octa 0x000000000000000000000000FFFFFFF0
.section .rodata.cst8, "aM", @progbits, 8
.align 8
one:
.quad 1
two:

View File

@ -203,8 +203,7 @@ return_null:
ENDPROC(sha1_mb_mgr_submit_avx2)
.data
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
.align 16
clear_low_nibble:
.octa 0x000000000000000000000000FFFFFFF0

View File

@ -461,21 +461,32 @@ lloop:
ENDPROC(sha1_x8_avx2)
.data
.section .rodata.cst32.K00_19, "aM", @progbits, 32
.align 32
K00_19:
.octa 0x5A8279995A8279995A8279995A827999
.octa 0x5A8279995A8279995A8279995A827999
.section .rodata.cst32.K20_39, "aM", @progbits, 32
.align 32
K20_39:
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
.section .rodata.cst32.K40_59, "aM", @progbits, 32
.align 32
K40_59:
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
.section .rodata.cst32.K60_79, "aM", @progbits, 32
.align 32
K60_79:
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
.align 32
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203
.octa 0x0c0d0e0f08090a0b0405060700010203

View File

@ -293,10 +293,12 @@ ENTRY(sha1_ni_transform)
ret
ENDPROC(sha1_ni_transform)
.data
.align 64
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
.align 16
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x000102030405060708090a0b0c0d0e0f
.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
.align 16
UPPER_WORD_MASK:
.octa 0xFFFFFFFF000000000000000000000000

View File

@ -463,7 +463,7 @@ done_hash:
ret
ENDPROC(sha256_transform_avx)
.data
.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
@ -483,14 +483,21 @@ K256:
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
.align 16
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203
.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16
.align 16
# shuffle xBxA -> 00BA
_SHUF_00BA:
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16
.align 16
# shuffle xDxC -> DC00
_SHUF_DC00:
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
#endif

View File

@ -723,7 +723,7 @@ done_hash:
ret
ENDPROC(sha256_transform_rorx)
.data
.section .rodata.cst512.K256, "aM", @progbits, 512
.align 64
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
@ -759,14 +759,21 @@ K256:
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
.align 32
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203
# shuffle xBxA -> 00BA
.section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32
.align 32
_SHUF_00BA:
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100
# shuffle xDxC -> DC00
.section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32
.align 32
_SHUF_DC00:
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF
#endif

View File

@ -284,11 +284,13 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2)
ret
ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
.data
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
.align 16
clear_low_nibble:
.octa 0x000000000000000000000000FFFFFFF0
.section .rodata.cst8, "aM", @progbits, 8
.align 8
one:
.quad 1
two:

View File

@ -208,8 +208,7 @@ return_null:
ENDPROC(sha256_mb_mgr_submit_avx2)
.data
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
.align 16
clear_low_nibble:
.octa 0x000000000000000000000000FFFFFFF0

View File

@ -437,7 +437,8 @@ Lrounds_16_xx:
ret
ENDPROC(sha256_x8_avx2)
.data
.section .rodata.K256_8, "a", @progbits
.align 64
K256_8:
.octa 0x428a2f98428a2f98428a2f98428a2f98
@ -568,10 +569,14 @@ K256_8:
.octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
.octa 0xc67178f2c67178f2c67178f2c67178f2
.octa 0xc67178f2c67178f2c67178f2c67178f2
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
.align 32
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203
.octa 0x0c0d0e0f08090a0b0405060700010203
.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
.global K256
K256:

View File

@ -474,7 +474,7 @@ done_hash:
ret
ENDPROC(sha256_transform_ssse3)
.data
.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
@ -494,13 +494,19 @@ K256:
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
.align 16
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203
.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16
.align 16
# shuffle xBxA -> 00BA
_SHUF_00BA:
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16
.align 16
# shuffle xDxC -> DC00
_SHUF_DC00:
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF

View File

@ -329,7 +329,7 @@ ENTRY(sha256_ni_transform)
ret
ENDPROC(sha256_ni_transform)
.data
.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
@ -349,5 +349,7 @@ K256:
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
.align 16
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203

View File

@ -370,14 +370,17 @@ ENDPROC(sha512_transform_avx)
########################################################################
### Binary Data
.data
.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16
.align 16
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
XMM_QWORD_BSWAP:
.octa 0x08090a0b0c0d0e0f0001020304050607
# Mergeable 640-byte rodata section. This allows linker to merge the table
# with other, exactly the same 640-byte fragment of another rodata section
# (if such section exists).
.section .rodata.cst640.K512, "aM", @progbits, 640
.align 64
# K[t] used in SHA512 hashing
K512:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd

View File

@ -684,8 +684,11 @@ ENDPROC(sha512_transform_rorx)
########################################################################
### Binary Data
.data
# Mergeable 640-byte rodata section. This allows linker to merge the table
# with other, exactly the same 640-byte fragment of another rodata section
# (if such section exists).
.section .rodata.cst640.K512, "aM", @progbits, 640
.align 64
# K[t] used in SHA512 hashing
K512:
@ -730,14 +733,17 @@ K512:
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
.align 32
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x08090a0b0c0d0e0f0001020304050607
.octa 0x18191a1b1c1d1e1f1011121314151617
.section .rodata.cst32.MASK_YMM_LO, "aM", @progbits, 32
.align 32
MASK_YMM_LO:
.octa 0x00000000000000000000000000000000
.octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
#endif

View File

@ -221,7 +221,7 @@ static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
}
static struct sha512_hash_ctx
*sha512_ctx_mgr_get_comp_ctx(struct sha512_ctx_mgr *mgr)
*sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
{
/*
* If get_comp_job returns NULL, there are no jobs complete.
@ -233,11 +233,17 @@ static struct sha512_hash_ctx
* Otherwise, all jobs currently being managed by the hash_ctx_mgr
* still need processing.
*/
struct sha512_ctx_mgr *mgr;
struct sha512_hash_ctx *ctx;
unsigned long flags;
mgr = cstate->mgr;
spin_lock_irqsave(&cstate->work_lock, flags);
ctx = (struct sha512_hash_ctx *)
sha512_job_mgr_get_comp_job(&mgr->mgr);
return sha512_ctx_mgr_resubmit(mgr, ctx);
ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
spin_unlock_irqrestore(&cstate->work_lock, flags);
return ctx;
}
static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
@ -246,12 +252,17 @@ static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
}
static struct sha512_hash_ctx
*sha512_ctx_mgr_submit(struct sha512_ctx_mgr *mgr,
*sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
struct sha512_hash_ctx *ctx,
const void *buffer,
uint32_t len,
int flags)
{
struct sha512_ctx_mgr *mgr;
unsigned long irqflags;
mgr = cstate->mgr;
spin_lock_irqsave(&cstate->work_lock, irqflags);
if (flags & (~HASH_ENTIRE)) {
/*
* User should not pass anything other than FIRST, UPDATE, or
@ -351,20 +362,26 @@ static struct sha512_hash_ctx
}
}
return sha512_ctx_mgr_resubmit(mgr, ctx);
ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
spin_unlock_irqrestore(&cstate->work_lock, irqflags);
return ctx;
}
static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr)
static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
{
struct sha512_ctx_mgr *mgr;
struct sha512_hash_ctx *ctx;
unsigned long flags;
mgr = cstate->mgr;
spin_lock_irqsave(&cstate->work_lock, flags);
while (1) {
ctx = (struct sha512_hash_ctx *)
sha512_job_mgr_flush(&mgr->mgr);
/* If flush returned 0, there are no more jobs in flight. */
if (!ctx)
return NULL;
break;
/*
* If flush returned a job, resubmit the job to finish
@ -378,8 +395,10 @@ static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr)
* the sha512_ctx_mgr still need processing. Loop.
*/
if (ctx)
return ctx;
break;
}
spin_unlock_irqrestore(&cstate->work_lock, flags);
return ctx;
}
static int sha512_mb_init(struct ahash_request *areq)
@ -439,11 +458,11 @@ static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
sha_ctx = (struct sha512_hash_ctx *)
ahash_request_ctx(&rctx->areq);
kernel_fpu_begin();
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx,
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
rctx->walk.data, nbytes, flag);
if (!sha_ctx) {
if (flush)
sha_ctx = sha512_ctx_mgr_flush(cstate->mgr);
sha_ctx = sha512_ctx_mgr_flush(cstate);
}
kernel_fpu_end();
if (sha_ctx)
@ -471,11 +490,12 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
struct sha512_hash_ctx *sha_ctx;
struct mcryptd_hash_request_ctx *req_ctx;
int ret;
unsigned long flags;
/* remove from work list */
spin_lock(&cstate->work_lock);
spin_lock_irqsave(&cstate->work_lock, flags);
list_del(&rctx->waiter);
spin_unlock(&cstate->work_lock);
spin_unlock_irqrestore(&cstate->work_lock, flags);
if (irqs_disabled())
rctx->complete(&req->base, err);
@ -486,14 +506,14 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
}
/* check to see if there are other jobs that are done */
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr);
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
while (sha_ctx) {
req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
ret = sha_finish_walk(&req_ctx, cstate, false);
if (req_ctx) {
spin_lock(&cstate->work_lock);
spin_lock_irqsave(&cstate->work_lock, flags);
list_del(&req_ctx->waiter);
spin_unlock(&cstate->work_lock);
spin_unlock_irqrestore(&cstate->work_lock, flags);
req = cast_mcryptd_ctx_to_req(req_ctx);
if (irqs_disabled())
@ -504,7 +524,7 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
local_bh_enable();
}
}
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr);
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
}
return 0;
@ -515,6 +535,7 @@ static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
{
unsigned long next_flush;
unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
unsigned long flags;
/* initialize tag */
rctx->tag.arrival = jiffies; /* tag the arrival time */
@ -522,9 +543,9 @@ static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
next_flush = rctx->tag.arrival + delay;
rctx->tag.expire = next_flush;
spin_lock(&cstate->work_lock);
spin_lock_irqsave(&cstate->work_lock, flags);
list_add_tail(&rctx->waiter, &cstate->work_list);
spin_unlock(&cstate->work_lock);
spin_unlock_irqrestore(&cstate->work_lock, flags);
mcryptd_arm_flusher(cstate, delay);
}
@ -565,7 +586,7 @@ static int sha512_mb_update(struct ahash_request *areq)
sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
sha512_mb_add_list(rctx, cstate);
kernel_fpu_begin();
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
nbytes, HASH_UPDATE);
kernel_fpu_end();
@ -628,7 +649,7 @@ static int sha512_mb_finup(struct ahash_request *areq)
sha512_mb_add_list(rctx, cstate);
kernel_fpu_begin();
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
nbytes, flag);
kernel_fpu_end();
@ -677,8 +698,7 @@ static int sha512_mb_final(struct ahash_request *areq)
/* flag HASH_FINAL and 0 data size */
sha512_mb_add_list(rctx, cstate);
kernel_fpu_begin();
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
HASH_LAST);
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
kernel_fpu_end();
/* check if anything is returned */
@ -940,7 +960,7 @@ static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
break;
kernel_fpu_begin();
sha_ctx = (struct sha512_hash_ctx *)
sha512_ctx_mgr_flush(cstate->mgr);
sha512_ctx_mgr_flush(cstate);
kernel_fpu_end();
if (!sha_ctx) {
pr_err("sha512_mb error: nothing got flushed for"

View File

@ -280,12 +280,18 @@ ENTRY(sha512_mb_mgr_get_comp_job_avx2)
pop %rbx
ret
ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
.data
.align 16
.section .rodata.cst8.one, "aM", @progbits, 8
.align 8
one:
.quad 1
.section .rodata.cst8.two, "aM", @progbits, 8
.align 8
two:
.quad 2
.section .rodata.cst8.three, "aM", @progbits, 8
.align 8
three:
.quad 3

View File

@ -209,8 +209,9 @@ return_null:
xor job_rax, job_rax
jmp return
ENDPROC(sha512_mb_mgr_submit_avx2)
.data
/* UNUSED?
.section .rodata.cst16, "aM", @progbits, 16
.align 16
H0: .int 0x6a09e667
H1: .int 0xbb67ae85
@ -220,3 +221,4 @@ H4: .int 0x510e527f
H5: .int 0x9b05688c
H6: .int 0x1f83d9ab
H7: .int 0x5be0cd19
*/

View File

@ -361,7 +361,7 @@ Lrounds_16_xx:
ret
ENDPROC(sha512_x4_avx2)
.data
.section .rodata.K512_4, "a", @progbits
.align 64
K512_4:
.octa 0x428a2f98d728ae22428a2f98d728ae22,\
@ -525,5 +525,7 @@ K512_4:
.octa 0x6c44198c4a4758176c44198c4a475817,\
0x6c44198c4a4758176c44198c4a475817
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
.align 32
PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
.octa 0x18191a1b1c1d1e1f1011121314151617

View File

@ -369,14 +369,17 @@ ENDPROC(sha512_transform_ssse3)
########################################################################
### Binary Data
.data
.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16
.align 16
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
XMM_QWORD_BSWAP:
.octa 0x08090a0b0c0d0e0f0001020304050607
# Mergeable 640-byte rodata section. This allows linker to merge the table
# with other, exactly the same 640-byte fragment of another rodata section
# (if such section exists).
.section .rodata.cst640.K512, "aM", @progbits, 640
.align 64
# K[t] used in SHA512 hashing
K512:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd

View File

@ -29,11 +29,13 @@
.file "twofish-avx-x86_64-asm_64.S"
.data
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
.align 16
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
.align 16
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0

View File

@ -263,6 +263,7 @@ comment "Authenticated Encryption with Associated Data"
config CRYPTO_CCM
tristate "CCM support"
select CRYPTO_CTR
select CRYPTO_HASH
select CRYPTO_AEAD
help
Support for Counter with CBC MAC. Required for IPsec.
@ -374,6 +375,7 @@ config CRYPTO_XTS
select CRYPTO_BLKCIPHER
select CRYPTO_MANAGER
select CRYPTO_GF128MUL
select CRYPTO_ECB
help
XTS: IEEE1619/D16 narrow block cipher use with aes-xts-plain,
key size 256, 384 or 512 bits. This implementation currently
@ -895,6 +897,23 @@ config CRYPTO_AES
See <http://csrc.nist.gov/CryptoToolkit/aes/> for more information.
config CRYPTO_AES_TI
tristate "Fixed time AES cipher"
select CRYPTO_ALGAPI
help
This is a generic implementation of AES that attempts to eliminate
data dependent latencies as much as possible without affecting
performance too much. It is intended for use by the generic CCM
and GCM drivers, and other CTR or CMAC/XCBC based modes that rely
solely on encryption (although decryption is supported as well, but
with a more dramatic performance hit)
Instead of using 16 lookup tables of 1 KB each, (8 for encryption and
8 for decryption), this implementation only uses just two S-boxes of
256 bytes each, and attempts to eliminate data dependent latencies by
prefetching the entire table into the cache at the start of each
block.
config CRYPTO_AES_586
tristate "AES cipher algorithms (i586)"
depends on (X86 || UML_X86) && !64BIT

View File

@ -75,6 +75,7 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
@ -98,7 +99,9 @@ obj-$(CONFIG_CRYPTO_BLOWFISH_COMMON) += blowfish_common.o
obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
obj-$(CONFIG_CRYPTO_CAST5) += cast5_generic.o

View File

@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/cryptouser.h>
#include <linux/compiler.h>
#include <net/netlink.h>
#include <crypto/scatterwalk.h>
@ -394,7 +395,7 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
{
struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;
@ -468,7 +469,7 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
{
struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;

View File

@ -20,6 +20,7 @@
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <linux/cryptouser.h>
#include <linux/compiler.h>
#include <net/netlink.h>
#include <crypto/internal/acompress.h>
#include <crypto/internal/scompress.h>
@ -50,7 +51,7 @@ static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
{

View File

@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/cryptouser.h>
#include <linux/compiler.h>
#include <net/netlink.h>
#include "internal.h"
@ -132,7 +133,7 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
{
struct aead_alg *aead = container_of(alg, struct aead_alg, base);

View File

@ -54,6 +54,7 @@
#include <linux/errno.h>
#include <linux/crypto.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>
static inline u8 byte(const u32 x, const unsigned n)
{
@ -1216,7 +1217,6 @@ EXPORT_SYMBOL_GPL(crypto_il_tab);
int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
const __le32 *key = (const __le32 *)in_key;
u32 i, t, u, v, w, j;
if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
@ -1225,10 +1225,15 @@ int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
ctx->key_length = key_len;
ctx->key_dec[key_len + 24] = ctx->key_enc[0] = le32_to_cpu(key[0]);
ctx->key_dec[key_len + 25] = ctx->key_enc[1] = le32_to_cpu(key[1]);
ctx->key_dec[key_len + 26] = ctx->key_enc[2] = le32_to_cpu(key[2]);
ctx->key_dec[key_len + 27] = ctx->key_enc[3] = le32_to_cpu(key[3]);
ctx->key_enc[0] = get_unaligned_le32(in_key);
ctx->key_enc[1] = get_unaligned_le32(in_key + 4);
ctx->key_enc[2] = get_unaligned_le32(in_key + 8);
ctx->key_enc[3] = get_unaligned_le32(in_key + 12);
ctx->key_dec[key_len + 24] = ctx->key_enc[0];
ctx->key_dec[key_len + 25] = ctx->key_enc[1];
ctx->key_dec[key_len + 26] = ctx->key_enc[2];
ctx->key_dec[key_len + 27] = ctx->key_enc[3];
switch (key_len) {
case AES_KEYSIZE_128:
@ -1238,17 +1243,17 @@ int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
break;
case AES_KEYSIZE_192:
ctx->key_enc[4] = le32_to_cpu(key[4]);
t = ctx->key_enc[5] = le32_to_cpu(key[5]);
ctx->key_enc[4] = get_unaligned_le32(in_key + 16);
t = ctx->key_enc[5] = get_unaligned_le32(in_key + 20);
for (i = 0; i < 8; ++i)
loop6(i);
break;
case AES_KEYSIZE_256:
ctx->key_enc[4] = le32_to_cpu(key[4]);
ctx->key_enc[5] = le32_to_cpu(key[5]);
ctx->key_enc[6] = le32_to_cpu(key[6]);
t = ctx->key_enc[7] = le32_to_cpu(key[7]);
ctx->key_enc[4] = get_unaligned_le32(in_key + 16);
ctx->key_enc[5] = get_unaligned_le32(in_key + 20);
ctx->key_enc[6] = get_unaligned_le32(in_key + 24);
t = ctx->key_enc[7] = get_unaligned_le32(in_key + 28);
for (i = 0; i < 6; ++i)
loop8(i);
loop8tophalf(i);
@ -1329,16 +1334,14 @@ EXPORT_SYMBOL_GPL(crypto_aes_set_key);
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
const __le32 *src = (const __le32 *)in;
__le32 *dst = (__le32 *)out;
u32 b0[4], b1[4];
const u32 *kp = ctx->key_enc + 4;
const int key_len = ctx->key_length;
b0[0] = le32_to_cpu(src[0]) ^ ctx->key_enc[0];
b0[1] = le32_to_cpu(src[1]) ^ ctx->key_enc[1];
b0[2] = le32_to_cpu(src[2]) ^ ctx->key_enc[2];
b0[3] = le32_to_cpu(src[3]) ^ ctx->key_enc[3];
b0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
b0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
b0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
b0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
if (key_len > 24) {
f_nround(b1, b0, kp);
@ -1361,10 +1364,10 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
f_nround(b1, b0, kp);
f_lround(b0, b1, kp);
dst[0] = cpu_to_le32(b0[0]);
dst[1] = cpu_to_le32(b0[1]);
dst[2] = cpu_to_le32(b0[2]);
dst[3] = cpu_to_le32(b0[3]);
put_unaligned_le32(b0[0], out);
put_unaligned_le32(b0[1], out + 4);
put_unaligned_le32(b0[2], out + 8);
put_unaligned_le32(b0[3], out + 12);
}
/* decrypt a block of text */
@ -1401,16 +1404,14 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
const __le32 *src = (const __le32 *)in;
__le32 *dst = (__le32 *)out;
u32 b0[4], b1[4];
const int key_len = ctx->key_length;
const u32 *kp = ctx->key_dec + 4;
b0[0] = le32_to_cpu(src[0]) ^ ctx->key_dec[0];
b0[1] = le32_to_cpu(src[1]) ^ ctx->key_dec[1];
b0[2] = le32_to_cpu(src[2]) ^ ctx->key_dec[2];
b0[3] = le32_to_cpu(src[3]) ^ ctx->key_dec[3];
b0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
b0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
b0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
b0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
if (key_len > 24) {
i_nround(b1, b0, kp);
@ -1433,10 +1434,10 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
i_nround(b1, b0, kp);
i_lround(b0, b1, kp);
dst[0] = cpu_to_le32(b0[0]);
dst[1] = cpu_to_le32(b0[1]);
dst[2] = cpu_to_le32(b0[2]);
dst[3] = cpu_to_le32(b0[3]);
put_unaligned_le32(b0[0], out);
put_unaligned_le32(b0[1], out + 4);
put_unaligned_le32(b0[2], out + 8);
put_unaligned_le32(b0[3], out + 12);
}
static struct crypto_alg aes_alg = {
@ -1446,7 +1447,6 @@ static struct crypto_alg aes_alg = {
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 3,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {

375
crypto/aes_ti.c 100644
View File

@ -0,0 +1,375 @@
/*
* Scalar fixed time AES core transform
*
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <crypto/aes.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include <asm/unaligned.h>
/*
* Emit the sbox as volatile const to prevent the compiler from doing
* constant folding on sbox references involving fixed indexes.
*/
static volatile const u8 __cacheline_aligned __aesti_sbox[] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
};
static volatile const u8 __cacheline_aligned __aesti_inv_sbox[] = {
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
};
static u32 mul_by_x(u32 w)
{
u32 x = w & 0x7f7f7f7f;
u32 y = w & 0x80808080;
/* multiply by polynomial 'x' (0b10) in GF(2^8) */
return (x << 1) ^ (y >> 7) * 0x1b;
}
static u32 mul_by_x2(u32 w)
{
u32 x = w & 0x3f3f3f3f;
u32 y = w & 0x80808080;
u32 z = w & 0x40404040;
/* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
}
static u32 mix_columns(u32 x)
{
/*
* Perform the following matrix multiplication in GF(2^8)
*
* | 0x2 0x3 0x1 0x1 | | x[0] |
* | 0x1 0x2 0x3 0x1 | | x[1] |
* | 0x1 0x1 0x2 0x3 | x | x[2] |
* | 0x3 0x1 0x1 0x3 | | x[3] |
*/
u32 y = mul_by_x(x) ^ ror32(x, 16);
return y ^ ror32(x ^ y, 8);
}
static u32 inv_mix_columns(u32 x)
{
/*
* Perform the following matrix multiplication in GF(2^8)
*
* | 0xe 0xb 0xd 0x9 | | x[0] |
* | 0x9 0xe 0xb 0xd | | x[1] |
* | 0xd 0x9 0xe 0xb | x | x[2] |
* | 0xb 0xd 0x9 0xe | | x[3] |
*
* which can conveniently be reduced to
*
* | 0x2 0x3 0x1 0x1 | | 0x5 0x0 0x4 0x0 | | x[0] |
* | 0x1 0x2 0x3 0x1 | | 0x0 0x5 0x0 0x4 | | x[1] |
* | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
* | 0x3 0x1 0x1 0x2 | | 0x0 0x4 0x0 0x5 | | x[3] |
*/
u32 y = mul_by_x2(x);
return mix_columns(x ^ y ^ ror32(y, 16));
}
static __always_inline u32 subshift(u32 in[], int pos)
{
return (__aesti_sbox[in[pos] & 0xff]) ^
(__aesti_sbox[(in[(pos + 1) % 4] >> 8) & 0xff] << 8) ^
(__aesti_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
(__aesti_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
}
static __always_inline u32 inv_subshift(u32 in[], int pos)
{
return (__aesti_inv_sbox[in[pos] & 0xff]) ^
(__aesti_inv_sbox[(in[(pos + 3) % 4] >> 8) & 0xff] << 8) ^
(__aesti_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
(__aesti_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
}
static u32 subw(u32 in)
{
return (__aesti_sbox[in & 0xff]) ^
(__aesti_sbox[(in >> 8) & 0xff] << 8) ^
(__aesti_sbox[(in >> 16) & 0xff] << 16) ^
(__aesti_sbox[(in >> 24) & 0xff] << 24);
}
static int aesti_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
u32 kwords = key_len / sizeof(u32);
u32 rc, i, j;
if (key_len != AES_KEYSIZE_128 &&
key_len != AES_KEYSIZE_192 &&
key_len != AES_KEYSIZE_256)
return -EINVAL;
ctx->key_length = key_len;
for (i = 0; i < kwords; i++)
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
u32 *rki = ctx->key_enc + (i * kwords);
u32 *rko = rki + kwords;
rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
if (key_len == 24) {
if (i >= 7)
break;
rko[4] = rko[3] ^ rki[4];
rko[5] = rko[4] ^ rki[5];
} else if (key_len == 32) {
if (i >= 6)
break;
rko[4] = subw(rko[3]) ^ rki[4];
rko[5] = rko[4] ^ rki[5];
rko[6] = rko[5] ^ rki[6];
rko[7] = rko[6] ^ rki[7];
}
}
/*
* Generate the decryption keys for the Equivalent Inverse Cipher.
* This involves reversing the order of the round keys, and applying
* the Inverse Mix Columns transformation to all but the first and
* the last one.
*/
ctx->key_dec[0] = ctx->key_enc[key_len + 24];
ctx->key_dec[1] = ctx->key_enc[key_len + 25];
ctx->key_dec[2] = ctx->key_enc[key_len + 26];
ctx->key_dec[3] = ctx->key_enc[key_len + 27];
for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
ctx->key_dec[i] = inv_mix_columns(ctx->key_enc[j]);
ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]);
ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]);
ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]);
}
ctx->key_dec[i] = ctx->key_enc[0];
ctx->key_dec[i + 1] = ctx->key_enc[1];
ctx->key_dec[i + 2] = ctx->key_enc[2];
ctx->key_dec[i + 3] = ctx->key_enc[3];
return 0;
}
static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
int err;
err = aesti_expand_key(ctx, in_key, key_len);
if (err)
return err;
/*
* In order to force the compiler to emit data independent Sbox lookups
* at the start of each block, xor the first round key with values at
* fixed indexes in the Sbox. This will need to be repeated each time
* the key is used, which will pull the entire Sbox into the D-cache
* before any data dependent Sbox lookups are performed.
*/
ctx->key_enc[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
ctx->key_enc[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
ctx->key_enc[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
ctx->key_enc[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
ctx->key_dec[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
ctx->key_dec[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
ctx->key_dec[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
ctx->key_dec[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224];
return 0;
}
static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
const u32 *rkp = ctx->key_enc + 4;
int rounds = 6 + ctx->key_length / 4;
u32 st0[4], st1[4];
int round;
st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
st0[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
for (round = 0;; round += 2, rkp += 8) {
st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
if (round == rounds - 2)
break;
st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
}
put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
}
static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
const u32 *rkp = ctx->key_dec + 4;
int rounds = 6 + ctx->key_length / 4;
u32 st0[4], st1[4];
int round;
st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
st0[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224];
for (round = 0;; round += 2, rkp += 8) {
st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
if (round == rounds - 2)
break;
st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
}
put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
}
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-fixed-time",
.cra_priority = 100 + 1,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
.cra_cipher.cia_setkey = aesti_set_key,
.cra_cipher.cia_encrypt = aesti_encrypt,
.cra_cipher.cia_decrypt = aesti_decrypt
};
static int __init aes_init(void)
{
return crypto_register_alg(&aes_alg);
}
static void __exit aes_fini(void)
{
crypto_unregister_alg(&aes_alg);
}
module_init(aes_init);
module_exit(aes_fini);
MODULE_DESCRIPTION("Generic fixed time AES");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");

View File

@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/cryptouser.h>
#include <linux/compiler.h>
#include <net/netlink.h>
#include "internal.h"
@ -493,7 +494,7 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg)
{
seq_printf(m, "type : ahash\n");

View File

@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/crypto.h>
#include <linux/compiler.h>
#include <crypto/algapi.h>
#include <linux/cryptouser.h>
#include <net/netlink.h>
@ -47,7 +48,7 @@ static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg)
{

View File

@ -962,34 +962,66 @@ void crypto_inc(u8 *a, unsigned int size)
__be32 *b = (__be32 *)(a + size);
u32 c;
for (; size >= 4; size -= 4) {
c = be32_to_cpu(*--b) + 1;
*b = cpu_to_be32(c);
if (c)
return;
}
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
!((unsigned long)b & (__alignof__(*b) - 1)))
for (; size >= 4; size -= 4) {
c = be32_to_cpu(*--b) + 1;
*b = cpu_to_be32(c);
if (c)
return;
}
crypto_inc_byte(a, size);
}
EXPORT_SYMBOL_GPL(crypto_inc);
static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size)
void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
{
for (; size; size--)
*a++ ^= *b++;
int relalign = 0;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
int size = sizeof(unsigned long);
int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
relalign = d ? 1 << __ffs(d) : size;
/*
* If we care about alignment, process as many bytes as
* needed to advance dst and src to values whose alignments
* equal their relative alignment. This will allow us to
* process the remainder of the input using optimal strides.
*/
while (((unsigned long)dst & (relalign - 1)) && len > 0) {
*dst++ ^= *src++;
len--;
}
}
while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
*(u64 *)dst ^= *(u64 *)src;
dst += 8;
src += 8;
len -= 8;
}
while (len >= 4 && !(relalign & 3)) {
*(u32 *)dst ^= *(u32 *)src;
dst += 4;
src += 4;
len -= 4;
}
while (len >= 2 && !(relalign & 1)) {
*(u16 *)dst ^= *(u16 *)src;
dst += 2;
src += 2;
len -= 2;
}
while (len--)
*dst++ ^= *src++;
}
void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
{
u32 *a = (u32 *)dst;
u32 *b = (u32 *)src;
for (; size >= 4; size -= 4)
*a++ ^= *b++;
crypto_xor_byte((u8 *)a, (u8 *)b, size);
}
EXPORT_SYMBOL_GPL(crypto_xor);
EXPORT_SYMBOL_GPL(__crypto_xor);
unsigned int crypto_alg_extsize(struct crypto_alg *alg)
{

View File

@ -245,7 +245,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
struct alg_sock *ask = alg_sk(sk);
struct hash_ctx *ctx = ask->private;
struct ahash_request *req = &ctx->req;
char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))];
char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1];
struct sock *sk2;
struct alg_sock *ask2;
struct hash_ctx *ctx2;

View File

@ -1,6 +1,6 @@
/*
* Block chaining cipher operations.
*
*
* Generic encrypt/decrypt wrapper for ciphers, handles operations across
* multiple page boundaries by using temporary blocks. In user context,
* the kernel is given a chance to schedule us once per page.
@ -9,7 +9,7 @@
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/cryptouser.h>
#include <linux/compiler.h>
#include <net/netlink.h>
#include "internal.h"
@ -534,7 +535,7 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
{
seq_printf(m, "type : blkcipher\n");

View File

@ -145,9 +145,6 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.base.cra_blocksize = alg->cra_blocksize;
inst->alg.base.cra_alignmask = alg->cra_alignmask;
/* We access the data as u32s when xoring. */
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
inst->alg.ivsize = alg->cra_blocksize;
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;

View File

@ -11,6 +11,7 @@
*/
#include <crypto/internal/aead.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
@ -23,11 +24,11 @@
struct ccm_instance_ctx {
struct crypto_skcipher_spawn ctr;
struct crypto_spawn cipher;
struct crypto_ahash_spawn mac;
};
struct crypto_ccm_ctx {
struct crypto_cipher *cipher;
struct crypto_ahash *mac;
struct crypto_skcipher *ctr;
};
@ -44,15 +45,21 @@ struct crypto_rfc4309_req_ctx {
struct crypto_ccm_req_priv_ctx {
u8 odata[16];
u8 idata[16];
u8 auth_tag[16];
u32 ilen;
u32 flags;
struct scatterlist src[3];
struct scatterlist dst[3];
struct skcipher_request skreq;
};
struct cbcmac_tfm_ctx {
struct crypto_cipher *child;
};
struct cbcmac_desc_ctx {
unsigned int len;
};
static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx(
struct aead_request *req)
{
@ -84,7 +91,7 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
{
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
struct crypto_skcipher *ctr = ctx->ctr;
struct crypto_cipher *tfm = ctx->cipher;
struct crypto_ahash *mac = ctx->mac;
int err = 0;
crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
@ -96,11 +103,11 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
if (err)
goto out;
crypto_cipher_clear_flags(tfm, CRYPTO_TFM_REQ_MASK);
crypto_cipher_set_flags(tfm, crypto_aead_get_flags(aead) &
crypto_ahash_clear_flags(mac, CRYPTO_TFM_REQ_MASK);
crypto_ahash_set_flags(mac, crypto_aead_get_flags(aead) &
CRYPTO_TFM_REQ_MASK);
err = crypto_cipher_setkey(tfm, key, keylen);
crypto_aead_set_flags(aead, crypto_cipher_get_flags(tfm) &
err = crypto_ahash_setkey(mac, key, keylen);
crypto_aead_set_flags(aead, crypto_ahash_get_flags(mac) &
CRYPTO_TFM_RES_MASK);
out:
@ -167,119 +174,61 @@ static int format_adata(u8 *adata, unsigned int a)
return len;
}
static void compute_mac(struct crypto_cipher *tfm, u8 *data, int n,
struct crypto_ccm_req_priv_ctx *pctx)
{
unsigned int bs = 16;
u8 *odata = pctx->odata;
u8 *idata = pctx->idata;
int datalen, getlen;
datalen = n;
/* first time in here, block may be partially filled. */
getlen = bs - pctx->ilen;
if (datalen >= getlen) {
memcpy(idata + pctx->ilen, data, getlen);
crypto_xor(odata, idata, bs);
crypto_cipher_encrypt_one(tfm, odata, odata);
datalen -= getlen;
data += getlen;
pctx->ilen = 0;
}
/* now encrypt rest of data */
while (datalen >= bs) {
crypto_xor(odata, data, bs);
crypto_cipher_encrypt_one(tfm, odata, odata);
datalen -= bs;
data += bs;
}
/* check and see if there's leftover data that wasn't
* enough to fill a block.
*/
if (datalen) {
memcpy(idata + pctx->ilen, data, datalen);
pctx->ilen += datalen;
}
}
static void get_data_to_compute(struct crypto_cipher *tfm,
struct crypto_ccm_req_priv_ctx *pctx,
struct scatterlist *sg, unsigned int len)
{
struct scatter_walk walk;
u8 *data_src;
int n;
scatterwalk_start(&walk, sg);
while (len) {
n = scatterwalk_clamp(&walk, len);
if (!n) {
scatterwalk_start(&walk, sg_next(walk.sg));
n = scatterwalk_clamp(&walk, len);
}
data_src = scatterwalk_map(&walk);
compute_mac(tfm, data_src, n, pctx);
len -= n;
scatterwalk_unmap(data_src);
scatterwalk_advance(&walk, n);
scatterwalk_done(&walk, 0, len);
if (len)
crypto_yield(pctx->flags);
}
/* any leftover needs padding and then encrypted */
if (pctx->ilen) {
int padlen;
u8 *odata = pctx->odata;
u8 *idata = pctx->idata;
padlen = 16 - pctx->ilen;
memset(idata + pctx->ilen, 0, padlen);
crypto_xor(odata, idata, 16);
crypto_cipher_encrypt_one(tfm, odata, odata);
pctx->ilen = 0;
}
}
static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
unsigned int cryptlen)
{
struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
struct crypto_cipher *cipher = ctx->cipher;
AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
unsigned int assoclen = req->assoclen;
u8 *odata = pctx->odata;
u8 *idata = pctx->idata;
int err;
struct scatterlist sg[3];
u8 odata[16];
u8 idata[16];
int ilen, err;
/* format control data for input */
err = format_input(odata, req, cryptlen);
if (err)
goto out;
/* encrypt first block to use as start in computing mac */
crypto_cipher_encrypt_one(cipher, odata, odata);
sg_init_table(sg, 3);
sg_set_buf(&sg[0], odata, 16);
/* format associated data and compute into mac */
if (assoclen) {
pctx->ilen = format_adata(idata, assoclen);
get_data_to_compute(cipher, pctx, req->src, req->assoclen);
ilen = format_adata(idata, assoclen);
sg_set_buf(&sg[1], idata, ilen);
sg_chain(sg, 3, req->src);
} else {
pctx->ilen = 0;
ilen = 0;
sg_chain(sg, 2, req->src);
}
/* compute plaintext into mac */
if (cryptlen)
get_data_to_compute(cipher, pctx, plain, cryptlen);
ahash_request_set_tfm(ahreq, ctx->mac);
ahash_request_set_callback(ahreq, pctx->flags, NULL, NULL);
ahash_request_set_crypt(ahreq, sg, NULL, assoclen + ilen + 16);
err = crypto_ahash_init(ahreq);
if (err)
goto out;
err = crypto_ahash_update(ahreq);
if (err)
goto out;
/* we need to pad the MAC input to a round multiple of the block size */
ilen = 16 - (assoclen + ilen) % 16;
if (ilen < 16) {
memset(idata, 0, ilen);
sg_init_table(sg, 2);
sg_set_buf(&sg[0], idata, ilen);
if (plain)
sg_chain(sg, 2, plain);
plain = sg;
cryptlen += ilen;
}
ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen);
err = crypto_ahash_finup(ahreq);
out:
return err;
}
@ -453,21 +402,21 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
struct aead_instance *inst = aead_alg_instance(tfm);
struct ccm_instance_ctx *ictx = aead_instance_ctx(inst);
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
struct crypto_cipher *cipher;
struct crypto_ahash *mac;
struct crypto_skcipher *ctr;
unsigned long align;
int err;
cipher = crypto_spawn_cipher(&ictx->cipher);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
mac = crypto_spawn_ahash(&ictx->mac);
if (IS_ERR(mac))
return PTR_ERR(mac);
ctr = crypto_spawn_skcipher(&ictx->ctr);
err = PTR_ERR(ctr);
if (IS_ERR(ctr))
goto err_free_cipher;
goto err_free_mac;
ctx->cipher = cipher;
ctx->mac = mac;
ctx->ctr = ctr;
align = crypto_aead_alignmask(tfm);
@ -479,8 +428,8 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
return 0;
err_free_cipher:
crypto_free_cipher(cipher);
err_free_mac:
crypto_free_ahash(mac);
return err;
}
@ -488,7 +437,7 @@ static void crypto_ccm_exit_tfm(struct crypto_aead *tfm)
{
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
crypto_free_cipher(ctx->cipher);
crypto_free_ahash(ctx->mac);
crypto_free_skcipher(ctx->ctr);
}
@ -496,7 +445,7 @@ static void crypto_ccm_free(struct aead_instance *inst)
{
struct ccm_instance_ctx *ctx = aead_instance_ctx(inst);
crypto_drop_spawn(&ctx->cipher);
crypto_drop_ahash(&ctx->mac);
crypto_drop_skcipher(&ctx->ctr);
kfree(inst);
}
@ -505,12 +454,13 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
struct rtattr **tb,
const char *full_name,
const char *ctr_name,
const char *cipher_name)
const char *mac_name)
{
struct crypto_attr_type *algt;
struct aead_instance *inst;
struct skcipher_alg *ctr;
struct crypto_alg *cipher;
struct crypto_alg *mac_alg;
struct hash_alg_common *mac;
struct ccm_instance_ctx *ictx;
int err;
@ -521,25 +471,26 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
return -EINVAL;
cipher = crypto_alg_mod_lookup(cipher_name, CRYPTO_ALG_TYPE_CIPHER,
CRYPTO_ALG_TYPE_MASK);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
mac_alg = crypto_find_alg(mac_name, &crypto_ahash_type,
CRYPTO_ALG_TYPE_HASH,
CRYPTO_ALG_TYPE_AHASH_MASK |
CRYPTO_ALG_ASYNC);
if (IS_ERR(mac_alg))
return PTR_ERR(mac_alg);
mac = __crypto_hash_alg_common(mac_alg);
err = -EINVAL;
if (cipher->cra_blocksize != 16)
goto out_put_cipher;
if (mac->digestsize != 16)
goto out_put_mac;
inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
err = -ENOMEM;
if (!inst)
goto out_put_cipher;
goto out_put_mac;
ictx = aead_instance_ctx(inst);
err = crypto_init_spawn(&ictx->cipher, cipher,
aead_crypto_instance(inst),
CRYPTO_ALG_TYPE_MASK);
err = crypto_init_ahash_spawn(&ictx->mac, mac,
aead_crypto_instance(inst));
if (err)
goto err_free_inst;
@ -548,7 +499,7 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
crypto_requires_sync(algt->type,
algt->mask));
if (err)
goto err_drop_cipher;
goto err_drop_mac;
ctr = crypto_spawn_skcipher_alg(&ictx->ctr);
@ -564,18 +515,17 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
err = -ENAMETOOLONG;
if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
"ccm_base(%s,%s)", ctr->base.cra_driver_name,
cipher->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
goto err_drop_ctr;
memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC;
inst->alg.base.cra_priority = (cipher->cra_priority +
inst->alg.base.cra_priority = (mac->base.cra_priority +
ctr->base.cra_priority) / 2;
inst->alg.base.cra_blocksize = 1;
inst->alg.base.cra_alignmask = cipher->cra_alignmask |
ctr->base.cra_alignmask |
(__alignof__(u32) - 1);
inst->alg.base.cra_alignmask = mac->base.cra_alignmask |
ctr->base.cra_alignmask;
inst->alg.ivsize = 16;
inst->alg.chunksize = crypto_skcipher_alg_chunksize(ctr);
inst->alg.maxauthsize = 16;
@ -593,23 +543,24 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
if (err)
goto err_drop_ctr;
out_put_cipher:
crypto_mod_put(cipher);
out_put_mac:
crypto_mod_put(mac_alg);
return err;
err_drop_ctr:
crypto_drop_skcipher(&ictx->ctr);
err_drop_cipher:
crypto_drop_spawn(&ictx->cipher);
err_drop_mac:
crypto_drop_ahash(&ictx->mac);
err_free_inst:
kfree(inst);
goto out_put_cipher;
goto out_put_mac;
}
static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
{
const char *cipher_name;
char ctr_name[CRYPTO_MAX_ALG_NAME];
char mac_name[CRYPTO_MAX_ALG_NAME];
char full_name[CRYPTO_MAX_ALG_NAME];
cipher_name = crypto_attr_alg_name(tb[1]);
@ -620,12 +571,16 @@ static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
cipher_name) >= CRYPTO_MAX_ALG_NAME)
return -ENAMETOOLONG;
if (snprintf(mac_name, CRYPTO_MAX_ALG_NAME, "cbcmac(%s)",
cipher_name) >= CRYPTO_MAX_ALG_NAME)
return -ENAMETOOLONG;
if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm(%s)", cipher_name) >=
CRYPTO_MAX_ALG_NAME)
return -ENAMETOOLONG;
return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name,
cipher_name);
mac_name);
}
static struct crypto_template crypto_ccm_tmpl = {
@ -899,14 +854,164 @@ static struct crypto_template crypto_rfc4309_tmpl = {
.module = THIS_MODULE,
};
static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent,
const u8 *inkey, unsigned int keylen)
{
struct cbcmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
return crypto_cipher_setkey(ctx->child, inkey, keylen);
}
static int crypto_cbcmac_digest_init(struct shash_desc *pdesc)
{
struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
int bs = crypto_shash_digestsize(pdesc->tfm);
u8 *dg = (u8 *)ctx + crypto_shash_descsize(pdesc->tfm) - bs;
ctx->len = 0;
memset(dg, 0, bs);
return 0;
}
static int crypto_cbcmac_digest_update(struct shash_desc *pdesc, const u8 *p,
unsigned int len)
{
struct crypto_shash *parent = pdesc->tfm;
struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
struct crypto_cipher *tfm = tctx->child;
int bs = crypto_shash_digestsize(parent);
u8 *dg = (u8 *)ctx + crypto_shash_descsize(parent) - bs;
while (len > 0) {
unsigned int l = min(len, bs - ctx->len);
crypto_xor(dg + ctx->len, p, l);
ctx->len +=l;
len -= l;
p += l;
if (ctx->len == bs) {
crypto_cipher_encrypt_one(tfm, dg, dg);
ctx->len = 0;
}
}
return 0;
}
static int crypto_cbcmac_digest_final(struct shash_desc *pdesc, u8 *out)
{
struct crypto_shash *parent = pdesc->tfm;
struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
struct crypto_cipher *tfm = tctx->child;
int bs = crypto_shash_digestsize(parent);
u8 *dg = (u8 *)ctx + crypto_shash_descsize(parent) - bs;
if (ctx->len)
crypto_cipher_encrypt_one(tfm, dg, dg);
memcpy(out, dg, bs);
return 0;
}
static int cbcmac_init_tfm(struct crypto_tfm *tfm)
{
struct crypto_cipher *cipher;
struct crypto_instance *inst = (void *)tfm->__crt_alg;
struct crypto_spawn *spawn = crypto_instance_ctx(inst);
struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
cipher = crypto_spawn_cipher(spawn);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
ctx->child = cipher;
return 0;
};
static void cbcmac_exit_tfm(struct crypto_tfm *tfm)
{
struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
crypto_free_cipher(ctx->child);
}
static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
{
struct shash_instance *inst;
struct crypto_alg *alg;
int err;
err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
if (err)
return err;
alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
CRYPTO_ALG_TYPE_MASK);
if (IS_ERR(alg))
return PTR_ERR(alg);
inst = shash_alloc_instance("cbcmac", alg);
err = PTR_ERR(inst);
if (IS_ERR(inst))
goto out_put_alg;
err = crypto_init_spawn(shash_instance_ctx(inst), alg,
shash_crypto_instance(inst),
CRYPTO_ALG_TYPE_MASK);
if (err)
goto out_free_inst;
inst->alg.base.cra_priority = alg->cra_priority;
inst->alg.base.cra_blocksize = 1;
inst->alg.digestsize = alg->cra_blocksize;
inst->alg.descsize = ALIGN(sizeof(struct cbcmac_desc_ctx),
alg->cra_alignmask + 1) +
alg->cra_blocksize;
inst->alg.base.cra_ctxsize = sizeof(struct cbcmac_tfm_ctx);
inst->alg.base.cra_init = cbcmac_init_tfm;
inst->alg.base.cra_exit = cbcmac_exit_tfm;
inst->alg.init = crypto_cbcmac_digest_init;
inst->alg.update = crypto_cbcmac_digest_update;
inst->alg.final = crypto_cbcmac_digest_final;
inst->alg.setkey = crypto_cbcmac_digest_setkey;
err = shash_register_instance(tmpl, inst);
out_free_inst:
if (err)
shash_free_instance(shash_crypto_instance(inst));
out_put_alg:
crypto_mod_put(alg);
return err;
}
static struct crypto_template crypto_cbcmac_tmpl = {
.name = "cbcmac",
.create = cbcmac_create,
.free = shash_free_instance,
.module = THIS_MODULE,
};
static int __init crypto_ccm_module_init(void)
{
int err;
err = crypto_register_template(&crypto_ccm_base_tmpl);
err = crypto_register_template(&crypto_cbcmac_tmpl);
if (err)
goto out;
err = crypto_register_template(&crypto_ccm_base_tmpl);
if (err)
goto out_undo_cbcmac;
err = crypto_register_template(&crypto_ccm_tmpl);
if (err)
goto out_undo_base;
@ -922,6 +1027,8 @@ out_undo_ccm:
crypto_unregister_template(&crypto_ccm_tmpl);
out_undo_base:
crypto_unregister_template(&crypto_ccm_base_tmpl);
out_undo_cbcmac:
crypto_register_template(&crypto_cbcmac_tmpl);
goto out;
}
@ -930,6 +1037,7 @@ static void __exit crypto_ccm_module_exit(void)
crypto_unregister_template(&crypto_rfc4309_tmpl);
crypto_unregister_template(&crypto_ccm_tmpl);
crypto_unregister_template(&crypto_ccm_base_tmpl);
crypto_unregister_template(&crypto_cbcmac_tmpl);
}
module_init(crypto_ccm_module_init);

View File

@ -10,10 +10,9 @@
*/
#include <crypto/algapi.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <crypto/chacha20.h>
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
static inline u32 le32_to_cpuvp(const void *p)
{
@ -63,10 +62,10 @@ void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
}
EXPORT_SYMBOL_GPL(crypto_chacha20_init);
int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keysize)
{
struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm);
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
int i;
if (keysize != CHACHA20_KEY_SIZE)
@ -79,66 +78,54 @@ int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
}
EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
int crypto_chacha20_crypt(struct skcipher_request *req)
{
struct blkcipher_walk walk;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
u32 state[16];
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
err = skcipher_walk_virt(&walk, req, true);
crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
crypto_chacha20_init(state, ctx, walk.iv);
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % CHACHA20_BLOCK_SIZE);
}
if (walk.nbytes) {
while (walk.nbytes > 0) {
chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes);
err = blkcipher_walk_done(desc, &walk, 0);
err = skcipher_walk_done(&walk, 0);
}
return err;
}
EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
static struct crypto_alg alg = {
.cra_name = "chacha20",
.cra_driver_name = "chacha20-generic",
.cra_priority = 100,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_type = &crypto_blkcipher_type,
.cra_ctxsize = sizeof(struct chacha20_ctx),
.cra_alignmask = sizeof(u32) - 1,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CHACHA20_KEY_SIZE,
.max_keysize = CHACHA20_KEY_SIZE,
.ivsize = CHACHA20_IV_SIZE,
.geniv = "seqiv",
.setkey = crypto_chacha20_setkey,
.encrypt = crypto_chacha20_crypt,
.decrypt = crypto_chacha20_crypt,
},
},
static struct skcipher_alg alg = {
.base.cra_name = "chacha20",
.base.cra_driver_name = "chacha20-generic",
.base.cra_priority = 100,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
.base.cra_alignmask = sizeof(u32) - 1,
.base.cra_module = THIS_MODULE,
.min_keysize = CHACHA20_KEY_SIZE,
.max_keysize = CHACHA20_KEY_SIZE,
.ivsize = CHACHA20_IV_SIZE,
.chunksize = CHACHA20_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
.encrypt = crypto_chacha20_crypt,
.decrypt = crypto_chacha20_crypt,
};
static int __init chacha20_generic_mod_init(void)
{
return crypto_register_alg(&alg);
return crypto_register_skcipher(&alg);
}
static void __exit chacha20_generic_mod_fini(void)
{
crypto_unregister_alg(&alg);
crypto_unregister_skcipher(&alg);
}
module_init(chacha20_generic_mod_init);

View File

@ -260,8 +260,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
if (err)
goto out_free_inst;
/* We access the data as u32s when xoring. */
alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
alignmask = alg->cra_alignmask;
inst->alg.base.cra_alignmask = alignmask;
inst->alg.base.cra_priority = alg->cra_priority;
inst->alg.base.cra_blocksize = alg->cra_blocksize;

View File

@ -209,7 +209,7 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb)
inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
inst->alg.cra_priority = alg->cra_priority;
inst->alg.cra_blocksize = 1;
inst->alg.cra_alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
inst->alg.cra_alignmask = alg->cra_alignmask;
inst->alg.cra_type = &crypto_blkcipher_type;
inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;

View File

@ -49,6 +49,7 @@
#include <linux/scatterlist.h>
#include <crypto/scatterwalk.h>
#include <linux/slab.h>
#include <linux/compiler.h>
struct crypto_cts_ctx {
struct crypto_skcipher *child;
@ -103,7 +104,7 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct skcipher_request *subreq = &rctx->subreq;
int bsize = crypto_skcipher_blocksize(tfm);
u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32))));
u8 d[bsize * 2] __aligned(__alignof__(u32));
struct scatterlist *sg;
unsigned int offset;
int lastn;
@ -183,7 +184,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct skcipher_request *subreq = &rctx->subreq;
int bsize = crypto_skcipher_blocksize(tfm);
u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32))));
u8 d[bsize * 2] __aligned(__alignof__(u32));
struct scatterlist *sg;
unsigned int offset;
u8 *space;
@ -373,9 +374,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
/* We access the data as u32s when xoring. */
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
inst->alg.ivsize = alg->base.cra_blocksize;
inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);

View File

@ -19,6 +19,7 @@
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <linux/cryptouser.h>
#include <linux/compiler.h>
#include <net/netlink.h>
#include <crypto/kpp.h>
#include <crypto/internal/kpp.h>
@ -47,7 +48,7 @@ static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg)
{

View File

@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/compiler.h>
struct crypto_pcbc_ctx {
struct crypto_cipher *child;
@ -146,7 +147,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
unsigned int nbytes = walk->nbytes;
u8 *src = walk->src.virt.addr;
u8 *iv = walk->iv;
u8 tmpbuf[bsize] __attribute__ ((aligned(__alignof__(u32))));
u8 tmpbuf[bsize] __aligned(__alignof__(u32));
do {
memcpy(tmpbuf, src, bsize);
@ -259,9 +260,6 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.base.cra_blocksize = alg->cra_blocksize;
inst->alg.base.cra_alignmask = alg->cra_alignmask;
/* We access the data as u32s when xoring. */
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
inst->alg.ivsize = alg->cra_blocksize;
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;

View File

@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/cryptouser.h>
#include <linux/compiler.h>
#include <net/netlink.h>
#include "internal.h"
@ -95,7 +96,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg)
{
seq_printf(m, "type : rng\n");

View File

@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/crypto.h>
#include <linux/compiler.h>
#include <linux/vmalloc.h>
#include <crypto/algapi.h>
#include <linux/cryptouser.h>
@ -57,7 +58,7 @@ static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
{

View File

@ -153,8 +153,6 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
if (IS_ERR(inst))
return PTR_ERR(inst);
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
spawn = aead_instance_ctx(inst);
alg = crypto_spawn_aead_alg(spawn);

View File

@ -19,6 +19,7 @@
#include <linux/seq_file.h>
#include <linux/cryptouser.h>
#include <net/netlink.h>
#include <linux/compiler.h>
#include "internal.h"
@ -67,7 +68,7 @@ EXPORT_SYMBOL_GPL(crypto_shash_setkey);
static inline unsigned int shash_align_buffer_size(unsigned len,
unsigned long mask)
{
typedef u8 __attribute__ ((aligned)) u8_aligned;
typedef u8 __aligned_largest u8_aligned;
return len + (mask & ~(__alignof__(u8_aligned) - 1));
}
@ -80,7 +81,7 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
unsigned int unaligned_len = alignmask + 1 -
((unsigned long)data & alignmask);
u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)]
__attribute__ ((aligned));
__aligned_largest;
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
int err;
@ -116,7 +117,7 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
struct shash_alg *shash = crypto_shash_alg(tfm);
unsigned int ds = crypto_shash_digestsize(tfm);
u8 ubuf[shash_align_buffer_size(ds, alignmask)]
__attribute__ ((aligned));
__aligned_largest;
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
int err;
@ -403,7 +404,7 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg)
#endif
static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
{
struct shash_alg *salg = __crypto_shash_alg(alg);

View File

@ -19,6 +19,7 @@
#include <crypto/scatterwalk.h>
#include <linux/bug.h>
#include <linux/cryptouser.h>
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/rtnetlink.h>
@ -185,12 +186,12 @@ void skcipher_walk_complete(struct skcipher_walk *walk, int err)
data = p->data;
if (!data) {
data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1);
data = skcipher_get_spot(data, walk->chunksize);
data = skcipher_get_spot(data, walk->stride);
}
scatterwalk_copychunks(data, &p->dst, p->len, 1);
if (offset_in_page(p->data) + p->len + walk->chunksize >
if (offset_in_page(p->data) + p->len + walk->stride >
PAGE_SIZE)
free_page((unsigned long)p->data);
@ -299,7 +300,7 @@ static int skcipher_next_copy(struct skcipher_walk *walk)
p->len = walk->nbytes;
skcipher_queue_write(walk, p);
if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize >
if (offset_in_page(walk->page) + walk->nbytes + walk->stride >
PAGE_SIZE)
walk->page = NULL;
else
@ -344,7 +345,7 @@ static int skcipher_walk_next(struct skcipher_walk *walk)
SKCIPHER_WALK_DIFF);
n = walk->total;
bsize = min(walk->chunksize, max(n, walk->blocksize));
bsize = min(walk->stride, max(n, walk->blocksize));
n = scatterwalk_clamp(&walk->in, n);
n = scatterwalk_clamp(&walk->out, n);
@ -393,7 +394,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk)
unsigned a = crypto_tfm_ctx_alignment() - 1;
unsigned alignmask = walk->alignmask;
unsigned ivsize = walk->ivsize;
unsigned bs = walk->chunksize;
unsigned bs = walk->stride;
unsigned aligned_bs;
unsigned size;
u8 *iv;
@ -463,7 +464,7 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
SKCIPHER_WALK_SLEEP : 0;
walk->blocksize = crypto_skcipher_blocksize(tfm);
walk->chunksize = crypto_skcipher_chunksize(tfm);
walk->stride = crypto_skcipher_walksize(tfm);
walk->ivsize = crypto_skcipher_ivsize(tfm);
walk->alignmask = crypto_skcipher_alignmask(tfm);
@ -525,7 +526,7 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
walk->flags &= ~SKCIPHER_WALK_SLEEP;
walk->blocksize = crypto_aead_blocksize(tfm);
walk->chunksize = crypto_aead_chunksize(tfm);
walk->stride = crypto_aead_chunksize(tfm);
walk->ivsize = crypto_aead_ivsize(tfm);
walk->alignmask = crypto_aead_alignmask(tfm);
@ -807,7 +808,7 @@ static void crypto_skcipher_free_instance(struct crypto_instance *inst)
}
static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
__attribute__ ((unused));
__maybe_unused;
static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
{
struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg,
@ -821,6 +822,7 @@ static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
seq_printf(m, "max keysize : %u\n", skcipher->max_keysize);
seq_printf(m, "ivsize : %u\n", skcipher->ivsize);
seq_printf(m, "chunksize : %u\n", skcipher->chunksize);
seq_printf(m, "walksize : %u\n", skcipher->walksize);
}
#ifdef CONFIG_NET
@ -893,11 +895,14 @@ static int skcipher_prepare_alg(struct skcipher_alg *alg)
{
struct crypto_alg *base = &alg->base;
if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8)
if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8 ||
alg->walksize > PAGE_SIZE / 8)
return -EINVAL;
if (!alg->chunksize)
alg->chunksize = base->cra_blocksize;
if (!alg->walksize)
alg->walksize = alg->chunksize;
base->cra_type = &crypto_skcipher_type2;
base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;

View File

@ -22,6 +22,8 @@
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/aead.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
@ -1010,6 +1012,8 @@ static inline int tcrypt_test(const char *alg)
{
int ret;
pr_debug("testing %s\n", alg);
ret = alg_test(alg, alg, 0, 0);
/* non-fips algs return -EINVAL in fips mode */
if (fips_enabled && ret == -EINVAL)
@ -2059,6 +2063,8 @@ static int __init tcrypt_mod_init(void)
if (err) {
printk(KERN_ERR "tcrypt: one or more tests failed!\n");
goto err_free_tv;
} else {
pr_debug("all tests passed\n");
}
/* We intentionaly return -EAGAIN to prevent keeping the module,

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More