From 0e1227d356e9b2fe0500d6cc7084f752040a1e0e Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 19 Oct 2009 11:53:06 +0900 Subject: [PATCH 01/20] crypto: ghash - Add PCLMULQDQ accelerated implementation PCLMULQDQ is used to accelerate the most time-consuming part of GHASH, carry-less multiplication. More information about PCLMULQDQ can be found at: http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ Because PCLMULQDQ changes XMM state, its usage must be enclosed with kernel_fpu_begin/end, which can be used only in process context, the acceleration is implemented as crypto_ahash. That is, request in soft IRQ context will be defered to the cryptd kernel thread. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 + arch/x86/crypto/ghash-clmulni-intel_asm.S | 157 ++++++++++ arch/x86/crypto/ghash-clmulni-intel_glue.c | 333 +++++++++++++++++++++ arch/x86/include/asm/cpufeature.h | 1 + crypto/Kconfig | 8 + crypto/cryptd.c | 7 + include/crypto/cryptd.h | 1 + 7 files changed, 510 insertions(+) create mode 100644 arch/x86/crypto/ghash-clmulni-intel_asm.S create mode 100644 arch/x86/crypto/ghash-clmulni-intel_glue.c diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index cfb0010fa940..1a58ad89fdf7 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o +obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o @@ -24,3 +25,5 @@ twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o + +ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S new file mode 100644 index 000000000000..b9e787a511da --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -0,0 +1,157 @@ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains accelerated part of ghash + * implementation. More information about PCLMULQDQ can be found at: + * + * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Erdinc Ozturk + * Deniz Karakoyunlu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include + +.align 16 +.Lbswap_mask: + .octa 0x000102030405060708090a0b0c0d0e0f +.Lpoly: + .octa 0xc2000000000000000000000000000001 +.Ltwo_one: + .octa 0x00000001000000000000000000000001 + +#define DATA %xmm0 +#define SHASH %xmm1 +#define T1 %xmm2 +#define T2 %xmm3 +#define T3 %xmm4 +#define BSWAP %xmm5 +#define IN1 %xmm6 + +.text + +/* + * __clmul_gf128mul_ble: internal ABI + * input: + * DATA: operand1 + * SHASH: operand2, hash_key << 1 mod poly + * output: + * DATA: operand1 * operand2 mod poly + * changed: + * T1 + * T2 + * T3 + */ +__clmul_gf128mul_ble: + movaps DATA, T1 + pshufd $0b01001110, DATA, T2 + pshufd $0b01001110, SHASH, T3 + pxor DATA, T2 + pxor SHASH, T3 + + # pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0 + .byte 0x66, 0x0f, 0x3a, 0x44, 0xc1, 0x00 + # pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1 + .byte 0x66, 0x0f, 0x3a, 0x44, 0xd1, 0x11 + # pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0) + .byte 0x66, 0x0f, 0x3a, 0x44, 0xdc, 0x00 + pxor DATA, T2 + pxor T1, T2 # T2 = a0 * b1 + a1 * b0 + + movaps T2, T3 + pslldq $8, T3 + psrldq $8, T2 + pxor T3, DATA + pxor T2, T1 # is result of + # carry-less multiplication + + # first phase of the reduction + movaps DATA, T3 + psllq $1, T3 + pxor DATA, T3 + psllq $5, T3 + pxor DATA, T3 + psllq $57, T3 + movaps T3, T2 + pslldq $8, T2 + psrldq $8, T3 + pxor T2, DATA + pxor T3, T1 + + # second phase of the reduction + movaps DATA, T2 + psrlq $5, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor T2, T1 + pxor T1, DATA + ret + +/* void clmul_ghash_mul(char *dst, const be128 *shash) */ +ENTRY(clmul_ghash_mul) + movups (%rdi), DATA + movups (%rsi), SHASH + movaps .Lbswap_mask, BSWAP + pshufb BSWAP, DATA + call __clmul_gf128mul_ble + pshufb BSWAP, DATA + movups DATA, (%rdi) + ret + +/* + * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + * const be128 *shash); + */ +ENTRY(clmul_ghash_update) + cmp $16, %rdx + jb .Lupdate_just_ret # check length + movaps .Lbswap_mask, BSWAP + movups (%rdi), DATA + movups (%rcx), SHASH + pshufb BSWAP, DATA +.align 4 +.Lupdate_loop: + movups (%rsi), IN1 + pshufb BSWAP, IN1 + pxor IN1, DATA + call __clmul_gf128mul_ble + sub $16, %rdx + add $16, %rsi + cmp $16, %rdx + jge .Lupdate_loop + pshufb BSWAP, DATA + movups DATA, (%rdi) +.Lupdate_just_ret: + ret + +/* + * void clmul_ghash_setkey(be128 *shash, const u8 *key); + * + * Calculate hash_key << 1 mod poly + */ +ENTRY(clmul_ghash_setkey) + movaps .Lbswap_mask, BSWAP + movups (%rsi), %xmm0 + pshufb BSWAP, %xmm0 + movaps %xmm0, %xmm1 + psllq $1, %xmm0 + psrlq $63, %xmm1 + movaps %xmm1, %xmm2 + pslldq $8, %xmm1 + psrldq $8, %xmm2 + por %xmm1, %xmm0 + # reduction + pshufd $0b00100100, %xmm2, %xmm1 + pcmpeqd .Ltwo_one, %xmm1 + pand .Lpoly, %xmm1 + pxor %xmm1, %xmm0 + movups %xmm0, (%rdi) + ret diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c new file mode 100644 index 000000000000..65d409644d72 --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -0,0 +1,333 @@ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains glue code. + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +void clmul_ghash_mul(char *dst, const be128 *shash); + +void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + const be128 *shash); + +void clmul_ghash_setkey(be128 *shash, const u8 *key); + +struct ghash_async_ctx { + struct cryptd_ahash *cryptd_tfm; +}; + +struct ghash_ctx { + be128 shash; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + clmul_ghash_setkey(&ctx->shash, key); + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *dst = dctx->buffer; + + kernel_fpu_begin(); + if (dctx->bytes) { + int n = min(srclen, dctx->bytes); + u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + dctx->bytes -= n; + srclen -= n; + + while (n--) + *pos++ ^= *src++; + + if (!dctx->bytes) + clmul_ghash_mul(dst, &ctx->shash); + } + + clmul_ghash_update(dst, src, srclen, &ctx->shash); + kernel_fpu_end(); + + if (srclen & 0xf) { + src += srclen - (srclen & 0xf); + srclen &= 0xf; + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + while (srclen--) + *dst++ ^= *src++; + } + + return 0; +} + +static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *dst = dctx->buffer; + + if (dctx->bytes) { + u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + while (dctx->bytes--) + *tmp++ ^= 0; + + kernel_fpu_begin(); + clmul_ghash_mul(dst, &ctx->shash); + kernel_fpu_end(); + } + + dctx->bytes = 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *buf = dctx->buffer; + + ghash_flush(ctx, dctx); + memcpy(dst, buf, GHASH_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "__ghash", + .cra_driver_name = "__ghash-pclmulqdqni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), + }, +}; + +static int ghash_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (irq_fpu_usable()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_init(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return crypto_shash_init(desc); + } +} + +static int ghash_async_update(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (irq_fpu_usable()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_update(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return shash_ahash_update(req, desc); + } +} + +static int ghash_async_final(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (irq_fpu_usable()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_final(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return crypto_shash_final(desc, req->result); + } +} + +static int ghash_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (irq_fpu_usable()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_digest(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return shash_ahash_digest(req, desc); + } +} + +static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct crypto_ahash *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ahash_setkey(child, key, keylen); + crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) + & CRYPTO_TFM_RES_MASK); + + return 0; +} + +static int ghash_async_init_tfm(struct crypto_tfm *tfm) +{ + struct cryptd_ahash *cryptd_tfm; + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ctx->cryptd_tfm = cryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&cryptd_tfm->base)); + + return 0; +} + +static void ghash_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ahash(ctx->cryptd_tfm); +} + +static struct ahash_alg ghash_async_alg = { + .init = ghash_async_init, + .update = ghash_async_update, + .final = ghash_async_final, + .setkey = ghash_async_setkey, + .digest = ghash_async_digest, + .halg = { + .digestsize = GHASH_DIGEST_SIZE, + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-clmulni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_type = &crypto_ahash_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), + .cra_init = ghash_async_init_tfm, + .cra_exit = ghash_async_exit_tfm, + }, + }, +}; + +static int __init ghash_pclmulqdqni_mod_init(void) +{ + int err; + + if (!cpu_has_pclmulqdq) { + printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not" + " detected.\n"); + return -ENODEV; + } + + err = crypto_register_shash(&ghash_alg); + if (err) + goto err_out; + err = crypto_register_ahash(&ghash_async_alg); + if (err) + goto err_shash; + + return 0; + +err_shash: + crypto_unregister_shash(&ghash_alg); +err_out: + return err; +} + +static void __exit ghash_pclmulqdqni_mod_exit(void) +{ + crypto_unregister_ahash(&ghash_async_alg); + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_pclmulqdqni_mod_init); +module_exit(ghash_pclmulqdqni_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " + "acclerated by PCLMULQDQ-NI"); +MODULE_ALIAS("ghash"); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 9cfc88b97742..613700f27a4a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -248,6 +248,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) +#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/crypto/Kconfig b/crypto/Kconfig index 26b5dd0cb564..fd6871102b60 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -440,6 +440,14 @@ config CRYPTO_WP512 See also: +config CRYPTO_GHASH_CLMUL_NI_INTEL + tristate "GHASH digest algorithm (CLMUL-NI accelerated)" + select CRYPTO_SHASH + select CRYPTO_CRYPTD + help + GHASH is message digest algorithm for GCM (Galois/Counter Mode). + The implementation is accelerated by CLMUL-NI of Intel. + comment "Ciphers" config CRYPTO_AES diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 35335825a4ef..f8ae0d94a647 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -711,6 +711,13 @@ struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm) } EXPORT_SYMBOL_GPL(cryptd_ahash_child); +struct shash_desc *cryptd_shash_desc(struct ahash_request *req) +{ + struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + return &rctx->desc; +} +EXPORT_SYMBOL_GPL(cryptd_shash_desc); + void cryptd_free_ahash(struct cryptd_ahash *tfm) { crypto_free_ahash(&tfm->base); diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h index 2f65a6e8ea4d..1c96b255017c 100644 --- a/include/crypto/cryptd.h +++ b/include/crypto/cryptd.h @@ -39,6 +39,7 @@ static inline struct cryptd_ahash *__cryptd_ahash_cast( struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask); struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm); +struct shash_desc *cryptd_shash_desc(struct ahash_request *req); void cryptd_free_ahash(struct cryptd_ahash *tfm); #endif From 667b6294bf088445996c8395b723ae9c9467e72b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 19 Oct 2009 11:57:02 +0900 Subject: [PATCH 02/20] crypto: ansi_cprng - Add FIPS wrapper Patch to add fips(ansi_cprng) alg, which is ansi_cprng plus a continuous test Signed-off-by: Neil Horman Acked-by: Jarod Wilson Signed-off-by: Herbert Xu --- crypto/ansi_cprng.c | 79 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 9 deletions(-) diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index 3aa6e3834bfe..027176a0e889 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -85,7 +85,7 @@ static void xor_vectors(unsigned char *in1, unsigned char *in2, * Returns DEFAULT_BLK_SZ bytes of random data per call * returns 0 if generation succeded, <0 if something went wrong */ -static int _get_more_prng_bytes(struct prng_context *ctx) +static int _get_more_prng_bytes(struct prng_context *ctx, int cont_test) { int i; unsigned char tmp[DEFAULT_BLK_SZ]; @@ -132,7 +132,7 @@ static int _get_more_prng_bytes(struct prng_context *ctx) */ if (!memcmp(ctx->rand_data, ctx->last_rand_data, DEFAULT_BLK_SZ)) { - if (fips_enabled) { + if (cont_test) { panic("cprng %p Failed repetition check!\n", ctx); } @@ -185,7 +185,8 @@ static int _get_more_prng_bytes(struct prng_context *ctx) } /* Our exported functions */ -static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx) +static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx, + int do_cont_test) { unsigned char *ptr = buf; unsigned int byte_count = (unsigned int)nbytes; @@ -220,7 +221,7 @@ static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx) remainder: if (ctx->rand_data_valid == DEFAULT_BLK_SZ) { - if (_get_more_prng_bytes(ctx) < 0) { + if (_get_more_prng_bytes(ctx, do_cont_test) < 0) { memset(buf, 0, nbytes); err = -EINVAL; goto done; @@ -247,7 +248,7 @@ empty_rbuf: */ for (; byte_count >= DEFAULT_BLK_SZ; byte_count -= DEFAULT_BLK_SZ) { if (ctx->rand_data_valid == DEFAULT_BLK_SZ) { - if (_get_more_prng_bytes(ctx) < 0) { + if (_get_more_prng_bytes(ctx, do_cont_test) < 0) { memset(buf, 0, nbytes); err = -EINVAL; goto done; @@ -356,7 +357,15 @@ static int cprng_get_random(struct crypto_rng *tfm, u8 *rdata, { struct prng_context *prng = crypto_rng_ctx(tfm); - return get_prng_bytes(rdata, dlen, prng); + return get_prng_bytes(rdata, dlen, prng, 0); +} + +static int fips_cprng_get_random(struct crypto_rng *tfm, u8 *rdata, + unsigned int dlen) +{ + struct prng_context *prng = crypto_rng_ctx(tfm); + + return get_prng_bytes(rdata, dlen, prng, 1); } /* @@ -384,6 +393,26 @@ static int cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) return 0; } +static int fips_cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) +{ + u8 rdata[DEFAULT_BLK_SZ]; + int rc; + + struct prng_context *prng = crypto_rng_ctx(tfm); + + rc = cprng_reset(tfm, seed, slen); + + if (!rc) + goto out; + + /* this primes our continuity test */ + rc = get_prng_bytes(rdata, DEFAULT_BLK_SZ, prng, 0); + prng->rand_data_valid = DEFAULT_BLK_SZ; + +out: + return rc; +} + static struct crypto_alg rng_alg = { .cra_name = "stdrng", .cra_driver_name = "ansi_cprng", @@ -404,19 +433,51 @@ static struct crypto_alg rng_alg = { } }; +#ifdef CONFIG_CRYPTO_FIPS +static struct crypto_alg fips_rng_alg = { + .cra_name = "fips(ansi_cprng)", + .cra_driver_name = "fips_ansi_cprng", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_RNG, + .cra_ctxsize = sizeof(struct prng_context), + .cra_type = &crypto_rng_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(rng_alg.cra_list), + .cra_init = cprng_init, + .cra_exit = cprng_exit, + .cra_u = { + .rng = { + .rng_make_random = fips_cprng_get_random, + .rng_reset = fips_cprng_reset, + .seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ, + } + } +}; +#endif /* Module initalization */ static int __init prng_mod_init(void) { - if (fips_enabled) - rng_alg.cra_priority += 200; + int rc = 0; - return crypto_register_alg(&rng_alg); + rc = crypto_register_alg(&rng_alg); +#ifdef CONFIG_CRYPTO_FIPS + if (rc) + goto out; + + rc = crypto_register_alg(&fips_rng_alg); + +out: +#endif + return rc; } static void __exit prng_mod_fini(void) { crypto_unregister_alg(&rng_alg); +#ifdef CONFIG_CRYPTO_FIPS + crypto_unregister_alg(&fips_rng_alg); +#endif return; } From 2141b6309b1fce535329c195cb5e5274a4c84ebc Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Mon, 19 Oct 2009 12:53:37 +0900 Subject: [PATCH 03/20] crypto: hash - Remove legacy hash/digest code 6941c3a0 disabled compilation of the legacy digest code but didn't actually remove it. Rectify this. Also, remove the crypto_hash_type extern declaration from algapi.h now that the struct is gone. Signed-off-by: Benjamin Gilbert Signed-off-by: Herbert Xu --- crypto/digest.c | 240 ---------------------------------------- crypto/hash.c | 183 ------------------------------ include/crypto/algapi.h | 1 - 3 files changed, 424 deletions(-) delete mode 100644 crypto/digest.c delete mode 100644 crypto/hash.c diff --git a/crypto/digest.c b/crypto/digest.c deleted file mode 100644 index 5d3f1303da98..000000000000 --- a/crypto/digest.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Cryptographic API. - * - * Digest operations. - * - * Copyright (c) 2002 James Morris - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "internal.h" - -static int init(struct hash_desc *desc) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); - - tfm->__crt_alg->cra_digest.dia_init(tfm); - return 0; -} - -static int update2(struct hash_desc *desc, - struct scatterlist *sg, unsigned int nbytes) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); - unsigned int alignmask = crypto_tfm_alg_alignmask(tfm); - - if (!nbytes) - return 0; - - for (;;) { - struct page *pg = sg_page(sg); - unsigned int offset = sg->offset; - unsigned int l = sg->length; - - if (unlikely(l > nbytes)) - l = nbytes; - nbytes -= l; - - do { - unsigned int bytes_from_page = min(l, ((unsigned int) - (PAGE_SIZE)) - - offset); - char *src = crypto_kmap(pg, 0); - char *p = src + offset; - - if (unlikely(offset & alignmask)) { - unsigned int bytes = - alignmask + 1 - (offset & alignmask); - bytes = min(bytes, bytes_from_page); - tfm->__crt_alg->cra_digest.dia_update(tfm, p, - bytes); - p += bytes; - bytes_from_page -= bytes; - l -= bytes; - } - tfm->__crt_alg->cra_digest.dia_update(tfm, p, - bytes_from_page); - crypto_kunmap(src, 0); - crypto_yield(desc->flags); - offset = 0; - pg++; - l -= bytes_from_page; - } while (l > 0); - - if (!nbytes) - break; - sg = scatterwalk_sg_next(sg); - } - - return 0; -} - -static int update(struct hash_desc *desc, - struct scatterlist *sg, unsigned int nbytes) -{ - if (WARN_ON_ONCE(in_irq())) - return -EDEADLK; - return update2(desc, sg, nbytes); -} - -static int final(struct hash_desc *desc, u8 *out) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); - unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); - struct digest_alg *digest = &tfm->__crt_alg->cra_digest; - - if (unlikely((unsigned long)out & alignmask)) { - unsigned long align = alignmask + 1; - unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm); - u8 *dst = (u8 *)ALIGN(addr, align) + - ALIGN(tfm->__crt_alg->cra_ctxsize, align); - - digest->dia_final(tfm, dst); - memcpy(out, dst, digest->dia_digestsize); - } else - digest->dia_final(tfm, out); - - return 0; -} - -static int nosetkey(struct crypto_hash *tfm, const u8 *key, unsigned int keylen) -{ - crypto_hash_clear_flags(tfm, CRYPTO_TFM_RES_MASK); - return -ENOSYS; -} - -static int setkey(struct crypto_hash *hash, const u8 *key, unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(hash); - - crypto_hash_clear_flags(hash, CRYPTO_TFM_RES_MASK); - return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen); -} - -static int digest(struct hash_desc *desc, - struct scatterlist *sg, unsigned int nbytes, u8 *out) -{ - if (WARN_ON_ONCE(in_irq())) - return -EDEADLK; - - init(desc); - update2(desc, sg, nbytes); - return final(desc, out); -} - -int crypto_init_digest_ops(struct crypto_tfm *tfm) -{ - struct hash_tfm *ops = &tfm->crt_hash; - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - if (dalg->dia_digestsize > PAGE_SIZE / 8) - return -EINVAL; - - ops->init = init; - ops->update = update; - ops->final = final; - ops->digest = digest; - ops->setkey = dalg->dia_setkey ? setkey : nosetkey; - ops->digestsize = dalg->dia_digestsize; - - return 0; -} - -void crypto_exit_digest_ops(struct crypto_tfm *tfm) -{ -} - -static int digest_async_nosetkey(struct crypto_ahash *tfm_async, const u8 *key, - unsigned int keylen) -{ - crypto_ahash_clear_flags(tfm_async, CRYPTO_TFM_RES_MASK); - return -ENOSYS; -} - -static int digest_async_setkey(struct crypto_ahash *tfm_async, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_ahash_tfm(tfm_async); - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - crypto_ahash_clear_flags(tfm_async, CRYPTO_TFM_RES_MASK); - return dalg->dia_setkey(tfm, key, keylen); -} - -static int digest_async_init(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - dalg->dia_init(tfm); - return 0; -} - -static int digest_async_update(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - update(&desc, req->src, req->nbytes); - return 0; -} - -static int digest_async_final(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - final(&desc, req->result); - return 0; -} - -static int digest_async_digest(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return digest(&desc, req->src, req->nbytes, req->result); -} - -int crypto_init_digest_ops_async(struct crypto_tfm *tfm) -{ - struct ahash_tfm *crt = &tfm->crt_ahash; - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - if (dalg->dia_digestsize > PAGE_SIZE / 8) - return -EINVAL; - - crt->init = digest_async_init; - crt->update = digest_async_update; - crt->final = digest_async_final; - crt->digest = digest_async_digest; - crt->setkey = dalg->dia_setkey ? digest_async_setkey : - digest_async_nosetkey; - crt->digestsize = dalg->dia_digestsize; - - return 0; -} diff --git a/crypto/hash.c b/crypto/hash.c deleted file mode 100644 index cb86b19fd105..000000000000 --- a/crypto/hash.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Cryptographic Hash operations. - * - * Copyright (c) 2006 Herbert Xu - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - -#include -#include -#include -#include -#include -#include - -#include "internal.h" - -static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg, u32 type, - u32 mask) -{ - return alg->cra_ctxsize; -} - -static int hash_setkey_unaligned(struct crypto_hash *crt, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(crt); - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - unsigned long alignmask = crypto_hash_alignmask(crt); - int ret; - u8 *buffer, *alignbuffer; - unsigned long absize; - - absize = keylen + alignmask; - buffer = kmalloc(absize, GFP_ATOMIC); - if (!buffer) - return -ENOMEM; - - alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); - memcpy(alignbuffer, key, keylen); - ret = alg->setkey(crt, alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); - return ret; -} - -static int hash_setkey(struct crypto_hash *crt, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(crt); - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - unsigned long alignmask = crypto_hash_alignmask(crt); - - if ((unsigned long)key & alignmask) - return hash_setkey_unaligned(crt, key, keylen); - - return alg->setkey(crt, key, keylen); -} - -static int hash_async_setkey(struct crypto_ahash *tfm_async, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_ahash_tfm(tfm_async); - struct crypto_hash *tfm_hash = __crypto_hash_cast(tfm); - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - return alg->setkey(tfm_hash, key, keylen); -} - -static int hash_async_init(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->init(&desc); -} - -static int hash_async_update(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->update(&desc, req->src, req->nbytes); -} - -static int hash_async_final(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->final(&desc, req->result); -} - -static int hash_async_digest(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->digest(&desc, req->src, req->nbytes, req->result); -} - -static int crypto_init_hash_ops_async(struct crypto_tfm *tfm) -{ - struct ahash_tfm *crt = &tfm->crt_ahash; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - crt->init = hash_async_init; - crt->update = hash_async_update; - crt->final = hash_async_final; - crt->digest = hash_async_digest; - crt->setkey = hash_async_setkey; - crt->digestsize = alg->digestsize; - - return 0; -} - -static int crypto_init_hash_ops_sync(struct crypto_tfm *tfm) -{ - struct hash_tfm *crt = &tfm->crt_hash; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - crt->init = alg->init; - crt->update = alg->update; - crt->final = alg->final; - crt->digest = alg->digest; - crt->setkey = hash_setkey; - crt->digestsize = alg->digestsize; - - return 0; -} - -static int crypto_init_hash_ops(struct crypto_tfm *tfm, u32 type, u32 mask) -{ - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - if (alg->digestsize > PAGE_SIZE / 8) - return -EINVAL; - - if ((mask & CRYPTO_ALG_TYPE_HASH_MASK) != CRYPTO_ALG_TYPE_HASH_MASK) - return crypto_init_hash_ops_async(tfm); - else - return crypto_init_hash_ops_sync(tfm); -} - -static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); -static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg) -{ - seq_printf(m, "type : hash\n"); - seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); - seq_printf(m, "digestsize : %u\n", alg->cra_hash.digestsize); -} - -const struct crypto_type crypto_hash_type = { - .ctxsize = crypto_hash_ctxsize, - .init = crypto_init_hash_ops, -#ifdef CONFIG_PROC_FS - .show = crypto_hash_show, -#endif -}; -EXPORT_SYMBOL_GPL(crypto_hash_type); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Generic cryptographic hash type"); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 1ffb53f74d37..fc0d575c71e0 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -106,7 +106,6 @@ struct blkcipher_walk { extern const struct crypto_type crypto_ablkcipher_type; extern const struct crypto_type crypto_aead_type; extern const struct crypto_type crypto_blkcipher_type; -extern const struct crypto_type crypto_hash_type; void crypto_mod_put(struct crypto_alg *alg); From 085751b96897280cc3087920f8c6e7d1283f6d00 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Mon, 19 Oct 2009 12:57:20 +0900 Subject: [PATCH 04/20] crypto: api - Remove digest case from procfs show handler Remove special handling of old-style digest algorithms from the procfs show handler. Signed-off-by: Benjamin Gilbert Signed-off-by: Herbert Xu --- crypto/proc.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/crypto/proc.c b/crypto/proc.c index 5dc07e442fca..ff4cb4a357f9 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -115,13 +115,6 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "max keysize : %u\n", alg->cra_cipher.cia_max_keysize); break; - - case CRYPTO_ALG_TYPE_DIGEST: - seq_printf(m, "type : digest\n"); - seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); - seq_printf(m, "digestsize : %u\n", - alg->cra_digest.dia_digestsize); - break; case CRYPTO_ALG_TYPE_COMPRESS: seq_printf(m, "type : compression\n"); break; From 8ffd1be6779c86ebc2a1013f43fdcee8bdbba2b7 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Mon, 19 Oct 2009 12:58:55 +0900 Subject: [PATCH 05/20] crypto: hash - Remove cra_u.{digest,hash} Remove unused digest_alg and hash_alg structs from crypto_alg union and kill their definitions. This also ensures that old-style digest/hash algorithms maintained out of tree will break at build time rather than oopsing at runtime. Signed-off-by: Benjamin Gilbert Signed-off-by: Herbert Xu --- include/linux/crypto.h | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index fd929889e8dc..24d2e30f1b46 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -250,29 +250,6 @@ struct cipher_alg { void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -struct digest_alg { - unsigned int dia_digestsize; - void (*dia_init)(struct crypto_tfm *tfm); - void (*dia_update)(struct crypto_tfm *tfm, const u8 *data, - unsigned int len); - void (*dia_final)(struct crypto_tfm *tfm, u8 *out); - int (*dia_setkey)(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); -}; - -struct hash_alg { - int (*init)(struct hash_desc *desc); - int (*update)(struct hash_desc *desc, struct scatterlist *sg, - unsigned int nbytes); - int (*final)(struct hash_desc *desc, u8 *out); - int (*digest)(struct hash_desc *desc, struct scatterlist *sg, - unsigned int nbytes, u8 *out); - int (*setkey)(struct crypto_hash *tfm, const u8 *key, - unsigned int keylen); - - unsigned int digestsize; -}; - struct compress_alg { int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen); @@ -293,8 +270,6 @@ struct rng_alg { #define cra_aead cra_u.aead #define cra_blkcipher cra_u.blkcipher #define cra_cipher cra_u.cipher -#define cra_digest cra_u.digest -#define cra_hash cra_u.hash #define cra_compress cra_u.compress #define cra_rng cra_u.rng @@ -320,8 +295,6 @@ struct crypto_alg { struct aead_alg aead; struct blkcipher_alg blkcipher; struct cipher_alg cipher; - struct digest_alg digest; - struct hash_alg hash; struct compress_alg compress; struct rng_alg rng; } cra_u; From 2024e7d6804b3f6251b28126eceb7f6bf2e3a4e8 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 27 Oct 2009 18:51:33 +0800 Subject: [PATCH 06/20] crypto: ansi_cprng - Fix test in get_prng_bytes size_t nbytes cannot be less than 0 and the test was redundant. Signed-off-by: Roel Kluin Acked-by: Neil Horman Signed-off-by: Herbert Xu --- crypto/ansi_cprng.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index 027176a0e889..45bd2182cb36 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -193,9 +193,6 @@ static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx, int err; - if (nbytes < 0) - return -EINVAL; - spin_lock_bh(&ctx->prng_lock); err = -EINVAL; From fa4ef8a6af4745bbf3a25789bc7d4f14a3a6d803 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Tue, 27 Oct 2009 19:04:42 +0800 Subject: [PATCH 07/20] crypto: testmgr - Fix warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit crypto/testmgr.c: In function ‘test_cprng’: crypto/testmgr.c:1204: warning: ‘err’ may be used uninitialized in this function Signed-off-by: Felipe Contreras Signed-off-by: Herbert Xu --- crypto/testmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 6d5b746637be..1f2357bc6424 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1201,7 +1201,7 @@ static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template, unsigned int tcount) { const char *algo = crypto_tfm_alg_driver_name(crypto_rng_tfm(tfm)); - int err, i, j, seedsize; + int err = 0, i, j, seedsize; u8 *seed; char result[32]; From 3e02e5cb47e049727a26c9c110867a26972bd0d6 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 27 Oct 2009 19:07:24 +0800 Subject: [PATCH 08/20] crypto: ghash-intel - Fix building failure on x86_32 CLMUL-NI accelerated GHASH should be turned off on non-x86_64 machine. Reported-by: Dave Young Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/Kconfig b/crypto/Kconfig index fd6871102b60..81c185a6971f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -442,6 +442,7 @@ config CRYPTO_WP512 config CRYPTO_GHASH_CLMUL_NI_INTEL tristate "GHASH digest algorithm (CLMUL-NI accelerated)" + depends on (X86 || UML_X86) && 64BIT select CRYPTO_SHASH select CRYPTO_CRYPTD help From 2d06ef7f42ed8c9969c9aa84e95df5d5c6378327 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 1 Nov 2009 12:49:44 -0500 Subject: [PATCH 09/20] crypto: ghash-intel - Hard-code pshufb Old gases don't have a clue what pshufb stands for so we have to hard-code it for now. Reported-by: Andrew Morton Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index b9e787a511da..71768d543dbb 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -100,9 +100,11 @@ ENTRY(clmul_ghash_mul) movups (%rdi), DATA movups (%rsi), SHASH movaps .Lbswap_mask, BSWAP - pshufb BSWAP, DATA + # pshufb BSWAP, DATA + .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 call __clmul_gf128mul_ble - pshufb BSWAP, DATA + # pshufb BSWAP, DATA + .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 movups DATA, (%rdi) ret @@ -116,18 +118,21 @@ ENTRY(clmul_ghash_update) movaps .Lbswap_mask, BSWAP movups (%rdi), DATA movups (%rcx), SHASH - pshufb BSWAP, DATA + # pshufb BSWAP, DATA + .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 .align 4 .Lupdate_loop: movups (%rsi), IN1 - pshufb BSWAP, IN1 + # pshufb BSWAP, IN1 + .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 pxor IN1, DATA call __clmul_gf128mul_ble sub $16, %rdx add $16, %rsi cmp $16, %rdx jge .Lupdate_loop - pshufb BSWAP, DATA + # pshufb BSWAP, DATA + .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 movups DATA, (%rdi) .Lupdate_just_ret: ret @@ -140,7 +145,8 @@ ENTRY(clmul_ghash_update) ENTRY(clmul_ghash_setkey) movaps .Lbswap_mask, BSWAP movups (%rsi), %xmm0 - pshufb BSWAP, %xmm0 + # pshufb BSWAP, %xmm0 + .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 movaps %xmm0, %xmm1 psllq $1, %xmm0 psrlq $63, %xmm1 From 3b0d65969b549b796abc6f0230f6142fed365d49 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 Nov 2009 09:11:15 -0500 Subject: [PATCH 10/20] crypto: ghash-intel - Add PSHUFB macros Add PSHUFB macros instead of repeating byte sequences, suggested by Ingo. Signed-off-by: Herbert Xu Acked-by: Ingo Molnar --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 11 ++++++----- arch/x86/include/asm/i387.h | 7 +++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 71768d543dbb..59584982fb75 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -17,6 +17,7 @@ */ #include +#include .align 16 .Lbswap_mask: @@ -101,7 +102,7 @@ ENTRY(clmul_ghash_mul) movups (%rsi), SHASH movaps .Lbswap_mask, BSWAP # pshufb BSWAP, DATA - .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 + PSHUFB_XMM5_XMM0 call __clmul_gf128mul_ble # pshufb BSWAP, DATA .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 @@ -119,12 +120,12 @@ ENTRY(clmul_ghash_update) movups (%rdi), DATA movups (%rcx), SHASH # pshufb BSWAP, DATA - .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 + PSHUFB_XMM5_XMM0 .align 4 .Lupdate_loop: movups (%rsi), IN1 # pshufb BSWAP, IN1 - .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 + PSHUFB_XMM5_XMM6 pxor IN1, DATA call __clmul_gf128mul_ble sub $16, %rdx @@ -132,7 +133,7 @@ ENTRY(clmul_ghash_update) cmp $16, %rdx jge .Lupdate_loop # pshufb BSWAP, DATA - .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 + PSHUFB_XMM5_XMM0 movups DATA, (%rdi) .Lupdate_just_ret: ret @@ -146,7 +147,7 @@ ENTRY(clmul_ghash_setkey) movaps .Lbswap_mask, BSWAP movups (%rsi), %xmm0 # pshufb BSWAP, %xmm0 - .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 + PSHUFB_XMM5_XMM0 movaps %xmm0, %xmm1 psllq $1, %xmm0 psrlq $63, %xmm1 diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 0b20bbb758f2..ebfb8a9e11f7 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -10,6 +10,8 @@ #ifndef _ASM_X86_I387_H #define _ASM_X86_I387_H +#ifndef __ASSEMBLY__ + #include #include #include @@ -411,4 +413,9 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } +#endif /* __ASSEMBLY__ */ + +#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 +#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 + #endif /* _ASM_X86_I387_H */ From 01dd95827726534230d8f03f7e6faafe24e49260 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 3 Nov 2009 10:55:20 -0500 Subject: [PATCH 11/20] crypto: ghash-intel - Fix irq_fpu_usable usage When renaming kernel_fpu_using to irq_fpu_usable, the semantics of the function is changed too, from mesuring whether kernel is using FPU, that is, the FPU is NOT available, to measuring whether FPU is usable, that is, the FPU is available. But the usage of irq_fpu_usable in ghash-clmulni-intel_glue.c is not changed accordingly. This patch fixes this. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_glue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 65d409644d72..cbcc8d8ea93a 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -159,7 +159,7 @@ static int ghash_async_init(struct ahash_request *req) struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); return crypto_ahash_init(cryptd_req); @@ -177,7 +177,7 @@ static int ghash_async_update(struct ahash_request *req) { struct ahash_request *cryptd_req = ahash_request_ctx(req); - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; @@ -195,7 +195,7 @@ static int ghash_async_final(struct ahash_request *req) { struct ahash_request *cryptd_req = ahash_request_ctx(req); - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; @@ -216,7 +216,7 @@ static int ghash_async_digest(struct ahash_request *req) struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); return crypto_ahash_digest(cryptd_req); From fd650a6394b3242edf125ba9c4d500349a6d7178 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 9 Nov 2009 13:52:26 -0500 Subject: [PATCH 12/20] x86: Generate .byte code for some new instructions via gas macro It will take some time for binutils (gas) to support some newly added instructions, such as SSE4.1 instructions or the AES-NI instructions found in upcoming Intel CPU. To make the source code can be compiled by old binutils, .byte code is used instead of the assembly instruction. But the readability and flexibility of raw .byte code is not good. This patch solves the issue of raw .byte code via generating it via assembly instruction like gas macro. The syntax is as close as possible to real assembly instruction. Some helper macros such as MODRM is not a full feature implementation. It can be extended when necessary. Signed-off-by: Huang Ying Acked-by: H. Peter Anvin Signed-off-by: Herbert Xu --- arch/x86/include/asm/inst.h | 150 ++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 arch/x86/include/asm/inst.h diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h new file mode 100644 index 000000000000..14cf526091f9 --- /dev/null +++ b/arch/x86/include/asm/inst.h @@ -0,0 +1,150 @@ +/* + * Generate .byte code for some instructions not supported by old + * binutils. + */ +#ifndef X86_ASM_INST_H +#define X86_ASM_INST_H + +#ifdef __ASSEMBLY__ + + .macro XMM_NUM opd xmm + .ifc \xmm,%xmm0 + \opd = 0 + .endif + .ifc \xmm,%xmm1 + \opd = 1 + .endif + .ifc \xmm,%xmm2 + \opd = 2 + .endif + .ifc \xmm,%xmm3 + \opd = 3 + .endif + .ifc \xmm,%xmm4 + \opd = 4 + .endif + .ifc \xmm,%xmm5 + \opd = 5 + .endif + .ifc \xmm,%xmm6 + \opd = 6 + .endif + .ifc \xmm,%xmm7 + \opd = 7 + .endif + .ifc \xmm,%xmm8 + \opd = 8 + .endif + .ifc \xmm,%xmm9 + \opd = 9 + .endif + .ifc \xmm,%xmm10 + \opd = 10 + .endif + .ifc \xmm,%xmm11 + \opd = 11 + .endif + .ifc \xmm,%xmm12 + \opd = 12 + .endif + .ifc \xmm,%xmm13 + \opd = 13 + .endif + .ifc \xmm,%xmm14 + \opd = 14 + .endif + .ifc \xmm,%xmm15 + \opd = 15 + .endif + .endm + + .macro PFX_OPD_SIZE + .byte 0x66 + .endm + + .macro PFX_REX opd1 opd2 + .if (\opd1 | \opd2) & 8 + .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) + .endif + .endm + + .macro MODRM mod opd1 opd2 + .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) + .endm + + .macro PSHUFB_XMM xmm1 xmm2 + XMM_NUM pshufb_opd1 \xmm1 + XMM_NUM pshufb_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX pshufb_opd1 pshufb_opd2 + .byte 0x0f, 0x38, 0x00 + MODRM 0xc0 pshufb_opd1 pshufb_opd2 + .endm + + .macro PCLMULQDQ imm8 xmm1 xmm2 + XMM_NUM clmul_opd1 \xmm1 + XMM_NUM clmul_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX clmul_opd1 clmul_opd2 + .byte 0x0f, 0x3a, 0x44 + MODRM 0xc0 clmul_opd1 clmul_opd2 + .byte \imm8 + .endm + + .macro AESKEYGENASSIST rcon xmm1 xmm2 + XMM_NUM aeskeygen_opd1 \xmm1 + XMM_NUM aeskeygen_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aeskeygen_opd1 aeskeygen_opd2 + .byte 0x0f, 0x3a, 0xdf + MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2 + .byte \rcon + .endm + + .macro AESIMC xmm1 xmm2 + XMM_NUM aesimc_opd1 \xmm1 + XMM_NUM aesimc_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesimc_opd1 aesimc_opd2 + .byte 0x0f, 0x38, 0xdb + MODRM 0xc0 aesimc_opd1 aesimc_opd2 + .endm + + .macro AESENC xmm1 xmm2 + XMM_NUM aesenc_opd1 \xmm1 + XMM_NUM aesenc_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesenc_opd1 aesenc_opd2 + .byte 0x0f, 0x38, 0xdc + MODRM 0xc0 aesenc_opd1 aesenc_opd2 + .endm + + .macro AESENCLAST xmm1 xmm2 + XMM_NUM aesenclast_opd1 \xmm1 + XMM_NUM aesenclast_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesenclast_opd1 aesenclast_opd2 + .byte 0x0f, 0x38, 0xdd + MODRM 0xc0 aesenclast_opd1 aesenclast_opd2 + .endm + + .macro AESDEC xmm1 xmm2 + XMM_NUM aesdec_opd1 \xmm1 + XMM_NUM aesdec_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesdec_opd1 aesdec_opd2 + .byte 0x0f, 0x38, 0xde + MODRM 0xc0 aesdec_opd1 aesdec_opd2 + .endm + + .macro AESDECLAST xmm1 xmm2 + XMM_NUM aesdeclast_opd1 \xmm1 + XMM_NUM aesdeclast_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesdeclast_opd1 aesdeclast_opd2 + .byte 0x0f, 0x38, 0xdf + MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 + .endm +#endif + +#endif From b369e521237d6ef21c453f3ac4f4b8577ec14f87 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 23 Nov 2009 19:54:06 +0800 Subject: [PATCH 13/20] crypto: aesni-intel - Use gas macro for AES-NI instructions Old binutils do not support AES-NI instructions, to make kernel can be compiled by them, .byte code is used instead of AES-NI assembly instructions. But the readability and flexibility of raw .byte code is not good. So corresponding assembly instruction like gas macro is used instead. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 517 ++++++++++-------------------- 1 file changed, 173 insertions(+), 344 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index eb0566e83319..20bb0e1ac681 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -16,6 +16,7 @@ */ #include +#include .text @@ -122,103 +123,72 @@ ENTRY(aesni_set_key) movups 0x10(%rsi), %xmm2 # other user key movaps %xmm2, (%rcx) add $0x10, %rcx - # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 + AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 call _key_expansion_256a - # aeskeygenassist $0x1, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 + AESKEYGENASSIST 0x1 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 + AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 call _key_expansion_256a - # aeskeygenassist $0x2, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 + AESKEYGENASSIST 0x2 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 + AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 call _key_expansion_256a - # aeskeygenassist $0x4, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 + AESKEYGENASSIST 0x4 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 + AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 call _key_expansion_256a - # aeskeygenassist $0x8, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 + AESKEYGENASSIST 0x8 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 + AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 call _key_expansion_256a - # aeskeygenassist $0x10, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 + AESKEYGENASSIST 0x10 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 + AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 call _key_expansion_256a - # aeskeygenassist $0x20, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 + AESKEYGENASSIST 0x20 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 + AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 call _key_expansion_256a jmp .Ldec_key .Lenc_key192: movq 0x10(%rsi), %xmm2 # other user key - # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 + AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 call _key_expansion_192a - # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 + AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 call _key_expansion_192b - # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 + AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 call _key_expansion_192a - # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 + AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 call _key_expansion_192b - # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 + AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 call _key_expansion_192a - # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 + AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 call _key_expansion_192b - # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 + AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 call _key_expansion_192a - # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 + AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 call _key_expansion_192b jmp .Ldec_key .Lenc_key128: - # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 + AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 call _key_expansion_128 - # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 + AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 call _key_expansion_128 - # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 + AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 call _key_expansion_128 - # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 + AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 call _key_expansion_128 - # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 + AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 call _key_expansion_128 - # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 + AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 call _key_expansion_128 - # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 + AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 call _key_expansion_128 - # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 + AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 call _key_expansion_128 - # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b + AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 call _key_expansion_128 - # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 + AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 call _key_expansion_128 .Ldec_key: sub $0x10, %rcx @@ -231,8 +201,7 @@ ENTRY(aesni_set_key) .align 4 .Ldec_key_loop: movaps (%rdi), %xmm0 - # aesimc %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 + AESIMC %xmm0 %xmm1 movaps %xmm1, (%rsi) add $0x10, %rdi sub $0x10, %rsi @@ -274,51 +243,37 @@ _aesni_enc1: je .Lenc192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps -0x50(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE .align 4 .Lenc192: movaps -0x40(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps -0x30(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE .align 4 .Lenc128: movaps -0x20(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps -0x10(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps (TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x10(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x20(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x30(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x40(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x50(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x60(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x70(TKEYP), KEY - # aesenclast KEY, STATE # last round - .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 + AESENCLAST KEY STATE ret /* @@ -353,135 +308,79 @@ _aesni_enc4: je .L4enc192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps -0x50(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 #.align 4 .L4enc192: movaps -0x40(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps -0x30(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 #.align 4 .L4enc128: movaps -0x20(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps -0x10(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps (TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x10(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x20(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x30(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x40(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x50(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x60(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x70(TKEYP), KEY - # aesenclast KEY, STATE1 # last round - .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 - # aesenclast KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2 - # aesenclast KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdd, 0xea - # aesenclast KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2 + AESENCLAST KEY STATE1 # last round + AESENCLAST KEY STATE2 + AESENCLAST KEY STATE3 + AESENCLAST KEY STATE4 ret /* @@ -518,51 +417,37 @@ _aesni_dec1: je .Ldec192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps -0x50(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE .align 4 .Ldec192: movaps -0x40(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps -0x30(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE .align 4 .Ldec128: movaps -0x20(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps -0x10(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps (TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x10(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x20(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x30(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x40(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x50(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x60(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x70(TKEYP), KEY - # aesdeclast KEY, STATE # last round - .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 + AESDECLAST KEY STATE ret /* @@ -597,135 +482,79 @@ _aesni_dec4: je .L4dec192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps -0x50(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 .align 4 .L4dec192: movaps -0x40(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps -0x30(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 .align 4 .L4dec128: movaps -0x20(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps -0x10(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps (TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x10(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x20(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x30(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x40(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x50(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x60(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x70(TKEYP), KEY - # aesdeclast KEY, STATE1 # last round - .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 - # aesdeclast KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2 - # aesdeclast KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdf, 0xea - # aesdeclast KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2 + AESDECLAST KEY STATE1 # last round + AESDECLAST KEY STATE2 + AESDECLAST KEY STATE3 + AESDECLAST KEY STATE4 ret /* From 564ec0ec05ac6ee409bde81f7ef27a3dadbf3a6a Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 23 Nov 2009 19:55:22 +0800 Subject: [PATCH 14/20] crypto: ghash-clmulni-intel - Use gas macro for PCLMULQDQ-NI and PSHUFB Old binutils do not support PCLMULQDQ-NI and PSHUFB, to make kernel can be compiled by them, .byte code is used instead of assembly instructions. But the readability and flexibility of raw .byte code is not good. So corresponding assembly instruction like gas macro is used instead. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 29 ++++++++--------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 59584982fb75..1528dc4886cf 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -17,7 +17,7 @@ */ #include -#include +#include .align 16 .Lbswap_mask: @@ -56,12 +56,9 @@ __clmul_gf128mul_ble: pxor DATA, T2 pxor SHASH, T3 - # pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0 - .byte 0x66, 0x0f, 0x3a, 0x44, 0xc1, 0x00 - # pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1 - .byte 0x66, 0x0f, 0x3a, 0x44, 0xd1, 0x11 - # pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0) - .byte 0x66, 0x0f, 0x3a, 0x44, 0xdc, 0x00 + PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 + PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 + PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) pxor DATA, T2 pxor T1, T2 # T2 = a0 * b1 + a1 * b0 @@ -101,11 +98,9 @@ ENTRY(clmul_ghash_mul) movups (%rdi), DATA movups (%rsi), SHASH movaps .Lbswap_mask, BSWAP - # pshufb BSWAP, DATA - PSHUFB_XMM5_XMM0 + PSHUFB_XMM BSWAP DATA call __clmul_gf128mul_ble - # pshufb BSWAP, DATA - .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 + PSHUFB_XMM BSWAP DATA movups DATA, (%rdi) ret @@ -119,21 +114,18 @@ ENTRY(clmul_ghash_update) movaps .Lbswap_mask, BSWAP movups (%rdi), DATA movups (%rcx), SHASH - # pshufb BSWAP, DATA - PSHUFB_XMM5_XMM0 + PSHUFB_XMM BSWAP DATA .align 4 .Lupdate_loop: movups (%rsi), IN1 - # pshufb BSWAP, IN1 - PSHUFB_XMM5_XMM6 + PSHUFB_XMM BSWAP IN1 pxor IN1, DATA call __clmul_gf128mul_ble sub $16, %rdx add $16, %rsi cmp $16, %rdx jge .Lupdate_loop - # pshufb BSWAP, DATA - PSHUFB_XMM5_XMM0 + PSHUFB_XMM BSWAP DATA movups DATA, (%rdi) .Lupdate_just_ret: ret @@ -146,8 +138,7 @@ ENTRY(clmul_ghash_update) ENTRY(clmul_ghash_setkey) movaps .Lbswap_mask, BSWAP movups (%rsi), %xmm0 - # pshufb BSWAP, %xmm0 - PSHUFB_XMM5_XMM0 + PSHUFB_XMM BSWAP %xmm0 movaps %xmm0, %xmm1 psllq $1, %xmm0 psrlq $63, %xmm1 From 68ee87164e73f68cf09070043c97e7f61e6966d4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 23 Nov 2009 20:19:47 +0800 Subject: [PATCH 15/20] crypto: ghash-clmulni-intel - Put proper .data section in place Lbswap_mask, Lpoly and Ltwo_one should clearly belong to .data section, not .text. Signed-off-by: Jiri Kosina Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 1528dc4886cf..1eb7f90cb7b9 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -19,6 +19,8 @@ #include #include +.data + .align 16 .Lbswap_mask: .octa 0x000102030405060708090a0b0c0d0e0f From 507069c91e36786b3fa5d9515c35ed6bb0ce469b Mon Sep 17 00:00:00 2001 From: "Youquan, Song" Date: Mon, 23 Nov 2009 20:23:04 +0800 Subject: [PATCH 16/20] crypto: testmgr - Add ghash algorithm test before provide to users Add ghash algorithm test before provide it to users Signed-off-by: Youquan, Song Signed-off-by: Herbert Xu --- crypto/testmgr.c | 9 +++++++++ crypto/testmgr.h | 15 +++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 1f2357bc6424..7620bfce92f2 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1942,6 +1942,15 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "ghash", + .test = alg_test_hash, + .suite = { + .hash = { + .vecs = ghash_tv_template, + .count = GHASH_TEST_VECTORS + } + } }, { .alg = "hmac(md5)", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 9963b18983ab..fb765173d41c 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -1003,6 +1003,21 @@ static struct hash_testvec tgr128_tv_template[] = { }, }; +#define GHASH_TEST_VECTORS 1 + +static struct hash_testvec ghash_tv_template[] = +{ + { + + .key = "\xdf\xa6\xbf\x4d\xed\x81\xdb\x03\xff\xca\xff\x95\xf8\x30\xf0\x61", + .ksize = 16, + .plaintext = "\x95\x2b\x2a\x56\xa5\x60\x04a\xc0\xb3\x2b\x66\x56\xa0\x5b\x40\xb6", + .psize = 16, + .digest = "\xda\x53\xeb\x0a\xd2\xc5\x5b\xb6" + "\x4f\xc4\x80\x2c\xc3\xfe\xda\x60", + }, +}; + /* * HMAC-MD5 test vectors from RFC2202 * (These need to be fixed to not use strlen). From 2f32bfd834d5d7eb230bcbf39aaacccd2a01d767 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 23 Nov 2009 20:25:50 +0800 Subject: [PATCH 17/20] crypto: ansi_cprng - Move FIPS functions under CONFIG_CRYPTO_FIPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fips_cprng_get_random and fips_cprng_reset is used only by CONFIG_CRYPTO_FIPS. This also fixes compilation warnings: crypto/ansi_cprng.c:360: warning: ‘fips_cprng_get_random’ defined but not used crypto/ansi_cprng.c:393: warning: ‘fips_cprng_reset’ defined but not used Signed-off-by: Jaswinder Singh Rajput Acked-by: Neil Horman Signed-off-by: Herbert Xu --- crypto/ansi_cprng.c | 56 ++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index 45bd2182cb36..2bc332142849 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -357,14 +357,6 @@ static int cprng_get_random(struct crypto_rng *tfm, u8 *rdata, return get_prng_bytes(rdata, dlen, prng, 0); } -static int fips_cprng_get_random(struct crypto_rng *tfm, u8 *rdata, - unsigned int dlen) -{ - struct prng_context *prng = crypto_rng_ctx(tfm); - - return get_prng_bytes(rdata, dlen, prng, 1); -} - /* * This is the cprng_registered reset method the seed value is * interpreted as the tuple { V KEY DT} @@ -390,26 +382,6 @@ static int cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) return 0; } -static int fips_cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) -{ - u8 rdata[DEFAULT_BLK_SZ]; - int rc; - - struct prng_context *prng = crypto_rng_ctx(tfm); - - rc = cprng_reset(tfm, seed, slen); - - if (!rc) - goto out; - - /* this primes our continuity test */ - rc = get_prng_bytes(rdata, DEFAULT_BLK_SZ, prng, 0); - prng->rand_data_valid = DEFAULT_BLK_SZ; - -out: - return rc; -} - static struct crypto_alg rng_alg = { .cra_name = "stdrng", .cra_driver_name = "ansi_cprng", @@ -431,6 +403,34 @@ static struct crypto_alg rng_alg = { }; #ifdef CONFIG_CRYPTO_FIPS +static int fips_cprng_get_random(struct crypto_rng *tfm, u8 *rdata, + unsigned int dlen) +{ + struct prng_context *prng = crypto_rng_ctx(tfm); + + return get_prng_bytes(rdata, dlen, prng, 1); +} + +static int fips_cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) +{ + u8 rdata[DEFAULT_BLK_SZ]; + int rc; + + struct prng_context *prng = crypto_rng_ctx(tfm); + + rc = cprng_reset(tfm, seed, slen); + + if (!rc) + goto out; + + /* this primes our continuity test */ + rc = get_prng_bytes(rdata, DEFAULT_BLK_SZ, prng, 0); + prng->rand_data_valid = DEFAULT_BLK_SZ; + +out: + return rc; +} + static struct crypto_alg fips_rng_alg = { .cra_name = "fips(ansi_cprng)", .cra_driver_name = "fips_ansi_cprng", From 9996508b3353063f2d6c48c1a28a84543d72d70b Mon Sep 17 00:00:00 2001 From: Ian Molton Date: Tue, 1 Dec 2009 14:47:32 +0800 Subject: [PATCH 18/20] hwrng: core - Replace u32 in driver API with byte array This patch implements a new method by which hw_random hardware drivers can pass data to the core more efficiently, using a shared buffer. The old methods have been retained as a compatability layer until all the drivers have been updated. Signed-off-by: Ian Molton Acked-by: Matt Mackall Acked-by: Rusty Russell Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 111 ++++++++++++++++++++-------------- include/linux/hw_random.h | 7 ++- 2 files changed, 71 insertions(+), 47 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index fc93e2fc7c71..82367262f3a8 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -52,7 +52,8 @@ static struct hwrng *current_rng; static LIST_HEAD(rng_list); static DEFINE_MUTEX(rng_mutex); - +static int data_avail; +static u8 rng_buffer[SMP_CACHE_BYTES] __cacheline_aligned; static inline int hwrng_init(struct hwrng *rng) { @@ -67,19 +68,6 @@ static inline void hwrng_cleanup(struct hwrng *rng) rng->cleanup(rng); } -static inline int hwrng_data_present(struct hwrng *rng, int wait) -{ - if (!rng->data_present) - return 1; - return rng->data_present(rng, wait); -} - -static inline int hwrng_data_read(struct hwrng *rng, u32 *data) -{ - return rng->data_read(rng, data); -} - - static int rng_dev_open(struct inode *inode, struct file *filp) { /* enforce read-only access to this chrdev */ @@ -91,54 +79,87 @@ static int rng_dev_open(struct inode *inode, struct file *filp) return 0; } +static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, + int wait) { + int present; + + if (rng->read) + return rng->read(rng, (void *)buffer, size, wait); + + if (rng->data_present) + present = rng->data_present(rng, wait); + else + present = 1; + + if (present) + return rng->data_read(rng, (u32 *)buffer); + + return 0; +} + static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { - u32 data; ssize_t ret = 0; int err = 0; - int bytes_read; + int bytes_read, len; while (size) { - err = -ERESTARTSYS; - if (mutex_lock_interruptible(&rng_mutex)) + if (mutex_lock_interruptible(&rng_mutex)) { + err = -ERESTARTSYS; goto out; + } + if (!current_rng) { - mutex_unlock(&rng_mutex); err = -ENODEV; - goto out; + goto out_unlock; + } + + if (!data_avail) { + bytes_read = rng_get_data(current_rng, rng_buffer, + sizeof(rng_buffer), + !(filp->f_flags & O_NONBLOCK)); + if (bytes_read < 0) { + err = bytes_read; + goto out_unlock; + } + data_avail = bytes_read; + } + + if (!data_avail) { + if (filp->f_flags & O_NONBLOCK) { + err = -EAGAIN; + goto out_unlock; + } + } else { + len = data_avail; + if (len > size) + len = size; + + data_avail -= len; + + if (copy_to_user(buf + ret, rng_buffer + data_avail, + len)) { + err = -EFAULT; + goto out_unlock; + } + + size -= len; + ret += len; } - bytes_read = 0; - if (hwrng_data_present(current_rng, - !(filp->f_flags & O_NONBLOCK))) - bytes_read = hwrng_data_read(current_rng, &data); mutex_unlock(&rng_mutex); - err = -EAGAIN; - if (!bytes_read && (filp->f_flags & O_NONBLOCK)) - goto out; - if (bytes_read < 0) { - err = bytes_read; - goto out; - } - - err = -EFAULT; - while (bytes_read && size) { - if (put_user((u8)data, buf++)) - goto out; - size--; - ret++; - bytes_read--; - data >>= 8; - } - if (need_resched()) schedule_timeout_interruptible(1); - err = -ERESTARTSYS; - if (signal_pending(current)) + + if (signal_pending(current)) { + err = -ERESTARTSYS; goto out; + } } +out_unlock: + mutex_unlock(&rng_mutex); out: return ret ? : err; } @@ -280,7 +301,7 @@ int hwrng_register(struct hwrng *rng) struct hwrng *old_rng, *tmp; if (rng->name == NULL || - rng->data_read == NULL) + (rng->data_read == NULL && rng->read == NULL)) goto out; mutex_lock(&rng_mutex); diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 7244456e7e65..9bede7633f74 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -22,10 +22,12 @@ * @cleanup: Cleanup callback (can be NULL). * @data_present: Callback to determine if data is available * on the RNG. If NULL, it is assumed that - * there is always data available. + * there is always data available. *OBSOLETE* * @data_read: Read data from the RNG device. * Returns the number of lower random bytes in "data". - * Must not be NULL. + * Must not be NULL. *OSOLETE* + * @read: New API. drivers can fill up to max bytes of data + * into the buffer. The buffer is aligned for any type. * @priv: Private data, for use by the RNG driver. */ struct hwrng { @@ -34,6 +36,7 @@ struct hwrng { void (*cleanup)(struct hwrng *rng); int (*data_present)(struct hwrng *rng, int wait); int (*data_read)(struct hwrng *rng, u32 *data); + int (*read)(struct hwrng *rng, void *data, size_t max, bool wait); unsigned long priv; /* internal. */ From bb347d98079a547e80bd4722dee1de61e4dca0e8 Mon Sep 17 00:00:00 2001 From: Ian Molton Date: Tue, 1 Dec 2009 15:26:33 +0800 Subject: [PATCH 19/20] hwrng: virtio-rng - Convert to new API This patch converts virtio-rng to the new hw_rng API. In the process it fixes a previously untriggered buffering bug where the buffer is not drained correctly if it has a non-multiple-of-4 length. Performance has improved under qemu-kvm testing also. Signed-off-by: Ian Molton Acked-by: Matt Mackall Acked-by: Rusty Russell Signed-off-by: Herbert Xu --- drivers/char/hw_random/virtio-rng.c | 78 ++++++++++------------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 915157fcff98..bdaef8e94021 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -16,6 +16,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + #include #include #include @@ -23,78 +24,64 @@ #include #include -/* The host will fill any buffer we give it with sweet, sweet randomness. We - * give it 64 bytes at a time, and the hwrng framework takes it 4 bytes at a - * time. */ -#define RANDOM_DATA_SIZE 64 - static struct virtqueue *vq; -static u32 *random_data; -static unsigned int data_left; +static unsigned int data_avail; static DECLARE_COMPLETION(have_data); +static bool busy; static void random_recv_done(struct virtqueue *vq) { - unsigned int len; - /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */ - if (!vq->vq_ops->get_buf(vq, &len)) + if (!vq->vq_ops->get_buf(vq, &data_avail)) return; - data_left += len; complete(&have_data); } -static void register_buffer(void) +/* The host will fill any buffer we give it with sweet, sweet randomness. */ +static void register_buffer(u8 *buf, size_t size) { struct scatterlist sg; - sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left); + sg_init_one(&sg, buf, size); + /* There should always be room for one buffer. */ - if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) < 0) + if (vq->vq_ops->add_buf(vq, &sg, 0, 1, buf) < 0) BUG(); + vq->vq_ops->kick(vq); } -/* At least we don't udelay() in a loop like some other drivers. */ -static int virtio_data_present(struct hwrng *rng, int wait) +static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { - if (data_left >= sizeof(u32)) - return 1; -again: + if (!busy) { + busy = true; + init_completion(&have_data); + register_buffer(buf, size); + } + if (!wait) return 0; wait_for_completion(&have_data); - /* Not enough? Re-register. */ - if (unlikely(data_left < sizeof(u32))) { - register_buffer(); - goto again; - } + busy = false; - return 1; + return data_avail; } -/* virtio_data_present() must have succeeded before this is called. */ -static int virtio_data_read(struct hwrng *rng, u32 *data) +static void virtio_cleanup(struct hwrng *rng) { - BUG_ON(data_left < sizeof(u32)); - data_left -= sizeof(u32); - *data = random_data[data_left / 4]; - - if (data_left < sizeof(u32)) { - init_completion(&have_data); - register_buffer(); - } - return sizeof(*data); + if (busy) + wait_for_completion(&have_data); } + static struct hwrng virtio_hwrng = { - .name = "virtio", - .data_present = virtio_data_present, - .data_read = virtio_data_read, + .name = "virtio", + .cleanup = virtio_cleanup, + .read = virtio_read, }; static int virtrng_probe(struct virtio_device *vdev) @@ -112,7 +99,6 @@ static int virtrng_probe(struct virtio_device *vdev) return err; } - register_buffer(); return 0; } @@ -138,21 +124,11 @@ static struct virtio_driver virtio_rng = { static int __init init(void) { - int err; - - random_data = kmalloc(RANDOM_DATA_SIZE, GFP_KERNEL); - if (!random_data) - return -ENOMEM; - - err = register_virtio_driver(&virtio_rng); - if (err) - kfree(random_data); - return err; + return register_virtio_driver(&virtio_rng); } static void __exit fini(void) { - kfree(random_data); unregister_virtio_driver(&virtio_rng); } module_init(init); From eed89d0f9d3383851cec634565a6414fae70fe91 Mon Sep 17 00:00:00 2001 From: Ian Molton Date: Thu, 3 Dec 2009 13:50:42 +0800 Subject: [PATCH 20/20] hwrng: core - Prevent too-small buffer sizes This patch prevents the hw_random core using too small of a buffer on machines with small cacheline sizes. Signed-off-by: Ian Molton Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 5c2d13c6e189..8b7d56a0fe3a 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -53,7 +53,8 @@ static struct hwrng *current_rng; static LIST_HEAD(rng_list); static DEFINE_MUTEX(rng_mutex); static int data_avail; -static u8 rng_buffer[SMP_CACHE_BYTES] __cacheline_aligned; +static u8 rng_buffer[SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES] + __cacheline_aligned; static inline int hwrng_init(struct hwrng *rng) {