1
0
Fork 0

crypto: aegis128 - duplicate init() and final() hooks in SIMD code

In order to speed up aegis128 processing even more, duplicate the init()
and final() routines as SIMD versions in their entirety. This results
in a 2x speedup on ARM Cortex-A57 for ~1500 byte packets (using AES
instructions).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
alistair/sunxi64-5.5-dsi
Ard Biesheuvel 2019-10-14 18:16:45 +02:00 committed by Herbert Xu
parent 2698bce1f0
commit 528282630c
3 changed files with 97 additions and 12 deletions

View File

@ -60,10 +60,16 @@ static bool aegis128_do_simd(void)
bool crypto_aegis128_have_simd(void);
void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg);
void crypto_aegis128_init_simd(struct aegis_state *state,
const union aegis_block *key,
const u8 *iv);
void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size);
void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size);
void crypto_aegis128_final_simd(struct aegis_state *state,
union aegis_block *tag_xor,
u64 assoclen, u64 cryptlen);
static void crypto_aegis128_update(struct aegis_state *state)
{
@ -395,17 +401,21 @@ static int crypto_aegis128_encrypt(struct aead_request *req)
struct skcipher_walk walk;
struct aegis_state state;
crypto_aegis128_init(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
skcipher_walk_aead_encrypt(&walk, req, false);
if (aegis128_do_simd())
if (aegis128_do_simd()) {
crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_encrypt_chunk_simd);
else
crypto_aegis128_final_simd(&state, &tag, req->assoclen,
cryptlen);
} else {
crypto_aegis128_init(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_encrypt_chunk);
crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
}
scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
authsize, 1);
@ -426,17 +436,21 @@ static int crypto_aegis128_decrypt(struct aead_request *req)
scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
authsize, 0);
crypto_aegis128_init(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
skcipher_walk_aead_decrypt(&walk, req, false);
if (aegis128_do_simd())
if (aegis128_do_simd()) {
crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_decrypt_chunk_simd);
else
crypto_aegis128_final_simd(&state, &tag, req->assoclen,
cryptlen);
} else {
crypto_aegis128_init(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, &walk,
crypto_aegis128_decrypt_chunk);
crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
}
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
}

View File

@ -132,6 +132,36 @@ void preload_sbox(void)
:: "r"(crypto_aes_sbox));
}
void crypto_aegis128_init_neon(void *state, const void *key, const void *iv)
{
static const uint8_t const0[] = {
0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
};
static const uint8_t const1[] = {
0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
};
uint8x16_t k = vld1q_u8(key);
uint8x16_t kiv = k ^ vld1q_u8(iv);
struct aegis128_state st = {{
kiv,
vld1q_u8(const1),
vld1q_u8(const0),
k ^ vld1q_u8(const0),
k ^ vld1q_u8(const1),
}};
int i;
preload_sbox();
for (i = 0; i < 5; i++) {
st = aegis128_update_neon(st, k);
st = aegis128_update_neon(st, kiv);
}
aegis128_save_state_neon(st, state);
}
void crypto_aegis128_update_neon(void *state, const void *msg)
{
struct aegis128_state st = aegis128_load_state_neon(state);
@ -210,3 +240,23 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
aegis128_save_state_neon(st, state);
}
void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
uint64_t cryptlen)
{
struct aegis128_state st = aegis128_load_state_neon(state);
uint8x16_t v;
int i;
preload_sbox();
v = st.v[3] ^ (uint8x16_t)vcombine_u64(vmov_n_u64(8 * assoclen),
vmov_n_u64(8 * cryptlen));
for (i = 0; i < 7; i++)
st = aegis128_update_neon(st, v);
v = vld1q_u8(tag_xor);
v ^= st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4];
vst1q_u8(tag_xor, v);
}

View File

@ -8,11 +8,14 @@
#include "aegis.h"
void crypto_aegis128_init_neon(void *state, const void *key, const void *iv);
void crypto_aegis128_update_neon(void *state, const void *msg);
void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
unsigned int size);
void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
unsigned int size);
void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
uint64_t cryptlen);
int aegis128_have_aes_insn __ro_after_init;
@ -25,6 +28,15 @@ bool crypto_aegis128_have_simd(void)
return IS_ENABLED(CONFIG_ARM64);
}
void crypto_aegis128_init_simd(union aegis_block *state,
const union aegis_block *key,
const u8 *iv)
{
kernel_neon_begin();
crypto_aegis128_init_neon(state, key, iv);
kernel_neon_end();
}
void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)
{
kernel_neon_begin();
@ -47,3 +59,12 @@ void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst,
crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
kernel_neon_end();
}
void crypto_aegis128_final_simd(union aegis_block *state,
union aegis_block *tag_xor,
u64 assoclen, u64 cryptlen)
{
kernel_neon_begin();
crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen);
kernel_neon_end();
}