From 42ef3bedf30020a3b5298c2b918ccd7b6e1794d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20T=C3=A9nart?= Date: Wed, 19 Jul 2017 11:02:30 +0200 Subject: [PATCH 1/8] crypto: inside-secure - fix invalidation check in hmac_sha1_setkey The safexcel_hmac_sha1_setkey function checks if an invalidation command should be issued, i.e. when the context ipad/opad change. This checks is done after filling the ipad/opad which and it can't be true. The patch fixes this by moving the check before the ipad/opad memcpy operations. Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel_hash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 8527a5899a2f..a11b2edb41b9 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -883,9 +883,6 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key, if (ret) return ret; - memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE); - memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE); - for (i = 0; i < ARRAY_SIZE(istate.state); i++) { if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) || ctx->opad[i] != le32_to_cpu(ostate.state[i])) { @@ -894,6 +891,9 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key, } } + memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE); + memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE); + return 0; } From 60c4081ec4195389f4fa1fce83eaad5e4b948748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20T=C3=A9nart?= Date: Wed, 19 Jul 2017 11:02:31 +0200 Subject: [PATCH 2/8] crypto: inside-secure - fix the sha state length in hmac_sha1_setkey A check is performed on the ipad/opad in the safexcel_hmac_sha1_setkey function, but the index used by the loop doing it is wrong. It is currently the size of the state array while it should be the size of a sha1 state. This patch fixes it. Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Reported-by: Dan Carpenter Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index a11b2edb41b9..3f819399cd95 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -883,7 +883,7 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key, if (ret) return ret; - for (i = 0; i < ARRAY_SIZE(istate.state); i++) { + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) { if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) || ctx->opad[i] != le32_to_cpu(ostate.state[i])) { ctx->base.needs_inv = true; From 28389575a8cf933a5f3c378556b9f4d3cce0efd2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 2 Aug 2017 16:40:47 +0800 Subject: [PATCH 3/8] crypto: ixp4xx - Fix error handling path in 'aead_perform()' In commit 0f987e25cb8a, the source processing has been moved in front of the destination processing, but the error handling path has not been modified accordingly. Free resources in the correct order to avoid some leaks. Cc: Fixes: 0f987e25cb8a ("crypto: ixp4xx - Fix false lastlen uninitialised warning") Reported-by: Christophe JAILLET Signed-off-by: Herbert Xu Reviewed-by: Arnd Bergmann --- drivers/crypto/ixp4xx_crypto.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 427cbe012729..dadc4a808df5 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -1073,7 +1073,7 @@ static int aead_perform(struct aead_request *req, int encrypt, req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags, &crypt->icv_rev_aes); if (unlikely(!req_ctx->hmac_virt)) - goto free_buf_src; + goto free_buf_dst; if (!encrypt) { scatterwalk_map_and_copy(req_ctx->hmac_virt, req->src, cryptlen, authsize, 0); @@ -1088,10 +1088,10 @@ static int aead_perform(struct aead_request *req, int encrypt, BUG_ON(qmgr_stat_overflow(SEND_QID)); return -EINPROGRESS; -free_buf_src: - free_buf_chain(dev, req_ctx->src, crypt->src_buf); free_buf_dst: free_buf_chain(dev, req_ctx->dst, crypt->dst_buf); +free_buf_src: + free_buf_chain(dev, req_ctx->src, crypt->src_buf); crypt->ctl_flags = CTL_FLAG_UNUSED; return -ENOMEM; } From 8861249c740fc4af9ddc5aee321eafefb960d7c6 Mon Sep 17 00:00:00 2001 From: "megha.dey@linux.intel.com" Date: Wed, 2 Aug 2017 13:49:09 -0700 Subject: [PATCH 4/8] crypto: x86/sha1 - Fix reads beyond the number of blocks passed It was reported that the sha1 AVX2 function(sha1_transform_avx2) is reading ahead beyond its intended data, and causing a crash if the next block is beyond page boundary: http://marc.info/?l=linux-crypto-vger&m=149373371023377 This patch makes sure that there is no overflow for any buffer length. It passes the tests written by Jan Stancek that revealed this problem: https://github.com/jstancek/sha1-avx2-crash I have re-enabled sha1-avx2 by reverting commit b82ce24426a4071da9529d726057e4e642948667 Cc: Fixes: b82ce24426a4 ("crypto: sha1-ssse3 - Disable avx2") Originally-by: Ilya Albrekht Tested-by: Jan Stancek Signed-off-by: Megha Dey Reported-by: Jan Stancek Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 67 ++++++++++++++------------ arch/x86/crypto/sha1_ssse3_glue.c | 2 +- 2 files changed, 37 insertions(+), 32 deletions(-) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 1cd792db15ef..1eab79c9ac48 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -117,11 +117,10 @@ .set T1, REG_T1 .endm -#define K_BASE %r8 #define HASH_PTR %r9 +#define BLOCKS_CTR %r8 #define BUFFER_PTR %r10 #define BUFFER_PTR2 %r13 -#define BUFFER_END %r11 #define PRECALC_BUF %r14 #define WK_BUF %r15 @@ -205,14 +204,14 @@ * blended AVX2 and ALU instruction scheduling * 1 vector iteration per 8 rounds */ - vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP + vmovdqu (i * 2)(BUFFER_PTR), W_TMP .elseif ((i & 7) == 1) - vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\ + vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\ WY_TMP, WY_TMP .elseif ((i & 7) == 2) vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY .elseif ((i & 7) == 4) - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP .elseif ((i & 7) == 7) vmovdqu WY_TMP, PRECALC_WK(i&~7) @@ -255,7 +254,7 @@ vpxor WY, WY_TMP, WY_TMP .elseif ((i & 7) == 7) vpxor WY_TMP2, WY_TMP, WY - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP vmovdqu WY_TMP, PRECALC_WK(i&~7) PRECALC_ROTATE_WY @@ -291,7 +290,7 @@ vpsrld $30, WY, WY vpor WY, WY_TMP, WY .elseif ((i & 7) == 7) - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP vmovdqu WY_TMP, PRECALC_WK(i&~7) PRECALC_ROTATE_WY @@ -446,6 +445,16 @@ .endm +/* Add constant only if (%2 > %3) condition met (uses RTA as temp) + * %1 + %2 >= %3 ? %4 : 0 + */ +.macro ADD_IF_GE a, b, c, d + mov \a, RTA + add $\d, RTA + cmp $\c, \b + cmovge RTA, \a +.endm + /* * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining */ @@ -463,13 +472,16 @@ lea (2*4*80+32)(%rsp), WK_BUF # Precalc WK for first 2 blocks - PRECALC_OFFSET = 0 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64 .set i, 0 .rept 160 PRECALC i .set i, i + 1 .endr - PRECALC_OFFSET = 128 + + /* Go to next block if needed */ + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 xchg WK_BUF, PRECALC_BUF .align 32 @@ -479,8 +491,8 @@ _loop: * we use K_BASE value as a signal of a last block, * it is set below by: cmovae BUFFER_PTR, K_BASE */ - cmp K_BASE, BUFFER_PTR - jne _begin + test BLOCKS_CTR, BLOCKS_CTR + jnz _begin .align 32 jmp _end .align 32 @@ -512,10 +524,10 @@ _loop0: .set j, j+2 .endr - add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */ - cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */ - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ - + /* Update Counter */ + sub $1, BLOCKS_CTR + /* Move to the next block only if needed*/ + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128 /* * rounds * 60,62,64,66,68 @@ -532,8 +544,8 @@ _loop0: UPDATE_HASH 12(HASH_PTR), D UPDATE_HASH 16(HASH_PTR), E - cmp K_BASE, BUFFER_PTR /* is current block the last one? */ - je _loop + test BLOCKS_CTR, BLOCKS_CTR + jz _loop mov TB, B @@ -575,10 +587,10 @@ _loop2: .set j, j+2 .endr - add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */ - - cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */ - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ + /* update counter */ + sub $1, BLOCKS_CTR + /* Move to the next block only if needed*/ + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 jmp _loop3 _loop3: @@ -641,19 +653,12 @@ _loop3: avx2_zeroupper - lea K_XMM_AR(%rip), K_BASE - + /* Setup initial values */ mov CTX, HASH_PTR mov BUF, BUFFER_PTR - lea 64(BUF), BUFFER_PTR2 - shl $6, CNT /* mul by 64 */ - add BUF, CNT - add $64, CNT - mov CNT, BUFFER_END - - cmp BUFFER_END, BUFFER_PTR2 - cmovae K_BASE, BUFFER_PTR2 + mov BUF, BUFFER_PTR2 + mov CNT, BLOCKS_CTR xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index f960a043cdeb..fc61739150e7 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, static bool avx2_usable(void) { - if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI2)) return true; From dea3eb8b452e36cf2dd572b0a797915ccf452ae6 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 10 Aug 2017 08:06:18 +0200 Subject: [PATCH 5/8] lib/mpi: kunmap after finishing accessing buffer Using sg_miter_start and sg_miter_next, the buffer of an SG is kmap'ed to *buff. The current code calls sg_miter_stop (and thus kunmap) on the SG entry before the last access of *buff. The patch moves the sg_miter_stop call after the last access to *buff to ensure that the memory pointed to by *buff is still mapped. Fixes: 4816c9406430 ("lib/mpi: Fix SG miter leak") Cc: Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- lib/mpi/mpicoder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 5a0f75a3bf01..eead4b339466 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -364,11 +364,11 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) } miter.consumed = lzeros; - sg_miter_stop(&miter); nbytes -= lzeros; nbits = nbytes * 8; if (nbits > MAX_EXTERN_MPI_BITS) { + sg_miter_stop(&miter); pr_info("MPI: mpi too large (%u bits)\n", nbits); return NULL; } @@ -376,6 +376,8 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) if (nbytes > 0) nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8); + sg_miter_stop(&miter); + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) From 4de437265eaac18f880d827f8e105b10de9b87a3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Aug 2017 14:28:14 +0100 Subject: [PATCH 6/8] crypto: chacha20 - fix handling of chunked input Commit 9ae433bc79f9 ("crypto: chacha20 - convert generic and x86 versions to skcipher") ported the existing chacha20 code to use the new skcipher API, and introduced a bug along the way. Unfortunately, the tcrypt tests did not catch the error, and it was only found recently by Tobias. Stefan kindly diagnosed the error, and proposed a fix which is similar to the one below, with the exception that 'walk.stride' is used rather than the hardcoded block size. This does not actually matter in this case, but it's a better example of how to use the skcipher walk API. Fixes: 9ae433bc79f9 ("crypto: chacha20 - convert generic and x86 ...") Cc: # v4.11+ Cc: Steffen Klassert Reported-by: Tobias Brunner Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/chacha20_generic.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index 8b3c04d625c3..4a45fa4890c0 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -91,9 +91,14 @@ int crypto_chacha20_crypt(struct skcipher_request *req) crypto_chacha20_init(state, ctx, walk.iv); while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes); - err = skcipher_walk_done(&walk, 0); + nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; From 549f64153c354e69fc19534f7d7e867de1992f95 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Aug 2017 14:28:15 +0100 Subject: [PATCH 7/8] crypto: testmgr - add chunked test cases for chacha20 We failed to catch a bug in the chacha20 code after porting it to the skcipher API. We would have caught it if any chunked tests had been defined, so define some now so we will catch future regressions. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/testmgr.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 6ceb0e2758bb..d54971d2d1c8 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -32675,6 +32675,10 @@ static const struct cipher_testvec chacha20_enc_tv_template[] = { "\x5b\x86\x2f\x37\x30\xe3\x7c\xfd" "\xc4\xfd\x80\x6c\x22\xf2\x21", .rlen = 375, + .also_non_np = 1, + .np = 3, + .tap = { 375 - 20, 4, 16 }, + }, { /* RFC7539 A.2. Test Vector #3 */ .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" @@ -33049,6 +33053,9 @@ static const struct cipher_testvec chacha20_enc_tv_template[] = { "\xa1\xed\xad\xd5\x76\xfa\x24\x8f" "\x98", .rlen = 1281, + .also_non_np = 1, + .np = 3, + .tap = { 1200, 1, 80 }, }, }; From 445a582738de6802669aeed9c33ca406c23c3b1f Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Wed, 16 Aug 2017 11:56:24 +0200 Subject: [PATCH 8/8] crypto: algif_skcipher - only call put_page on referenced and used pages For asynchronous operation, SGs are allocated without a page mapped to them or with a page that is not used (ref-counted). If the SGL is freed, the code must only call put_page for an SG if there was a page assigned and ref-counted in the first place. This fixes a kernel crash when using io_submit with more than one iocb using the sendmsg and sendpage (vmsplice/splice) interface. Cc: Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/algif_skcipher.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 43839b00fe6c..903605dbc1a5 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -87,8 +87,13 @@ static void skcipher_free_async_sgls(struct skcipher_async_req *sreq) } sgl = sreq->tsg; n = sg_nents(sgl); - for_each_sg(sgl, sg, n, i) - put_page(sg_page(sg)); + for_each_sg(sgl, sg, n, i) { + struct page *page = sg_page(sg); + + /* some SGs may not have a page mapped */ + if (page && page_ref_count(page)) + put_page(page); + } kfree(sreq->tsg); }