diff --git a/crypto/digest.c b/crypto/digest.c index ac0919460d14..5d3f1303da98 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -225,7 +225,7 @@ int crypto_init_digest_ops_async(struct crypto_tfm *tfm) struct ahash_tfm *crt = &tfm->crt_ahash; struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - if (dalg->dia_digestsize > crypto_tfm_alg_blocksize(tfm)) + if (dalg->dia_digestsize > PAGE_SIZE / 8) return -EINVAL; crt->init = digest_async_init; diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 59821a22d752..66368022e0bf 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -481,21 +481,31 @@ next_one: for (k = 0, temp = 0; k < template[i].np; k++) { printk(KERN_INFO "page %u\n", k); - q = &axbuf[IDX[k]]; - hexdump(q, template[i].tap[k]); + q = &xbuf[IDX[k]]; + + n = template[i].tap[k]; + if (k == template[i].np - 1) + n += enc ? authsize : -authsize; + hexdump(q, n); printk(KERN_INFO "%s\n", - memcmp(q, template[i].result + temp, - template[i].tap[k] - - (k < template[i].np - 1 || enc ? - 0 : authsize)) ? + memcmp(q, template[i].result + temp, n) ? "fail" : "pass"); - for (n = 0; q[template[i].tap[k] + n]; n++) - ; + q += n; + if (k == template[i].np - 1 && !enc) { + if (memcmp(q, template[i].input + + temp + n, authsize)) + n = authsize; + else + n = 0; + } else { + for (n = 0; q[n]; n++) + ; + } if (n) { printk("Result buffer corruption %u " "bytes:\n", n); - hexdump(&q[template[i].tap[k]], n); + hexdump(q, n); } temp += template[i].tap[k]; diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index f7feae4ebb5e..128202e18fc9 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -31,6 +31,7 @@ #include #include #include +#include #define PFX KBUILD_MODNAME ": " @@ -67,16 +68,23 @@ enum { * Another possible performance boost may come from simply buffering * until we have 4 bytes, thus returning a u32 at a time, * instead of the current u8-at-a-time. + * + * Padlock instructions can generate a spurious DNA fault, so + * we have to call them in the context of irq_ts_save/restore() */ static inline u32 xstore(u32 *addr, u32 edx_in) { u32 eax_out; + int ts_state; + + ts_state = irq_ts_save(); asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */" :"=m"(*addr), "=a"(eax_out) :"D"(addr), "d"(edx_in)); + irq_ts_restore(ts_state); return eax_out; } diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 54a2a166e566..bf2917d197a0 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "padlock.h" /* Control word. */ @@ -141,6 +142,12 @@ static inline void padlock_reset_key(void) asm volatile ("pushfl; popfl"); } +/* + * While the padlock instructions don't use FP/SSE registers, they + * generate a spurious DNA fault when cr0.ts is '1'. These instructions + * should be used only inside the irq_ts_save/restore() context + */ + static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, void *control_word) { @@ -205,15 +212,23 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct aes_ctx *ctx = aes_ctx(tfm); + int ts_state; padlock_reset_key(); + + ts_state = irq_ts_save(); aes_crypt(in, out, ctx->E, &ctx->cword.encrypt); + irq_ts_restore(ts_state); } static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct aes_ctx *ctx = aes_ctx(tfm); + int ts_state; padlock_reset_key(); + + ts_state = irq_ts_save(); aes_crypt(in, out, ctx->D, &ctx->cword.decrypt); + irq_ts_restore(ts_state); } static struct crypto_alg aes_alg = { @@ -244,12 +259,14 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; + int ts_state; padlock_reset_key(); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, ctx->E, &ctx->cword.encrypt, @@ -257,6 +274,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } + irq_ts_restore(ts_state); return err; } @@ -268,12 +286,14 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; + int ts_state; padlock_reset_key(); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, ctx->D, &ctx->cword.decrypt, @@ -281,7 +301,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - + irq_ts_restore(ts_state); return err; } @@ -314,12 +334,14 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; + int ts_state; padlock_reset_key(); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, ctx->E, @@ -329,6 +351,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } + irq_ts_restore(ts_state); return err; } @@ -340,12 +363,14 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; + int ts_state; padlock_reset_key(); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, ctx->D, walk.iv, &ctx->cword.decrypt, @@ -354,6 +379,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_done(desc, &walk, nbytes); } + irq_ts_restore(ts_state); return err; } diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 40d5680fa013..a7fbadebf623 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "padlock.h" #define SHA1_DEFAULT_FALLBACK "sha1-generic" @@ -102,6 +103,7 @@ static void padlock_do_sha1(const char *in, char *out, int count) * PadLock microcode needs it that big. */ char buf[128+16]; char *result = NEAREST_ALIGNED(buf); + int ts_state; ((uint32_t *)result)[0] = SHA1_H0; ((uint32_t *)result)[1] = SHA1_H1; @@ -109,9 +111,12 @@ static void padlock_do_sha1(const char *in, char *out, int count) ((uint32_t *)result)[3] = SHA1_H3; ((uint32_t *)result)[4] = SHA1_H4; + /* prevent taking the spurious DNA fault with padlock. */ + ts_state = irq_ts_save(); asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ : "+S"(in), "+D"(result) : "c"(count), "a"(0)); + irq_ts_restore(ts_state); padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); } @@ -123,6 +128,7 @@ static void padlock_do_sha256(const char *in, char *out, int count) * PadLock microcode needs it that big. */ char buf[128+16]; char *result = NEAREST_ALIGNED(buf); + int ts_state; ((uint32_t *)result)[0] = SHA256_H0; ((uint32_t *)result)[1] = SHA256_H1; @@ -133,9 +139,12 @@ static void padlock_do_sha256(const char *in, char *out, int count) ((uint32_t *)result)[6] = SHA256_H6; ((uint32_t *)result)[7] = SHA256_H7; + /* prevent taking the spurious DNA fault with padlock. */ + ts_state = irq_ts_save(); asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ : "+S"(in), "+D"(result) : "c"(count), "a"(0)); + irq_ts_restore(ts_state); padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); } diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 681c15f42083..ee827a7f7c6a 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -96,6 +96,9 @@ struct talitos_private { unsigned int exec_units; unsigned int desc_types; + /* SEC Compatibility info */ + unsigned long features; + /* next channel to be assigned next incoming descriptor */ atomic_t last_chan; @@ -133,6 +136,9 @@ struct talitos_private { struct hwrng rng; }; +/* .features flag */ +#define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001 + /* * map virtual single (contiguous) pointer to h/w descriptor pointer */ @@ -785,7 +791,7 @@ static void ipsec_esp_encrypt_done(struct device *dev, /* copy the generated ICV to dst */ if (edesc->dma_len) { icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 1]; + edesc->dst_nents + 2]; sg = sg_last(areq->dst, edesc->dst_nents); memcpy((char *)sg_virt(sg) + sg->length - ctx->authsize, icvdata, ctx->authsize); @@ -814,7 +820,7 @@ static void ipsec_esp_decrypt_done(struct device *dev, /* auth check */ if (edesc->dma_len) icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 1]; + edesc->dst_nents + 2]; else icvdata = &edesc->link_tbl[0]; @@ -921,10 +927,30 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq, sg_count = sg_to_link_tbl(areq->src, sg_count, cryptlen, &edesc->link_tbl[0]); if (sg_count > 1) { + struct talitos_ptr *link_tbl_ptr = + &edesc->link_tbl[sg_count-1]; + struct scatterlist *sg; + struct talitos_private *priv = dev_get_drvdata(dev); + desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; desc->ptr[4].ptr = cpu_to_be32(edesc->dma_link_tbl); dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); + /* If necessary for this SEC revision, + * add a link table entry for ICV. + */ + if ((priv->features & + TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT) && + (edesc->desc.hdr & DESC_HDR_MODE0_ENCRYPT) == 0) { + link_tbl_ptr->j_extent = 0; + link_tbl_ptr++; + link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; + link_tbl_ptr->len = cpu_to_be16(authsize); + sg = sg_last(areq->src, edesc->src_nents ? : 1); + link_tbl_ptr->ptr = cpu_to_be32( + (char *)sg_dma_address(sg) + + sg->length - authsize); + } } else { /* Only one segment now, so no link tbl needed */ desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src)); @@ -944,12 +970,11 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq, desc->ptr[5].ptr = cpu_to_be32(sg_dma_address(areq->dst)); } else { struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents]; - struct scatterlist *sg; + &edesc->link_tbl[edesc->src_nents + 1]; desc->ptr[5].ptr = cpu_to_be32((struct talitos_ptr *) edesc->dma_link_tbl + - edesc->src_nents); + edesc->src_nents + 1); if (areq->src == areq->dst) { memcpy(link_tbl_ptr, &edesc->link_tbl[0], edesc->src_nents * sizeof(struct talitos_ptr)); @@ -957,14 +982,10 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq, sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen, link_tbl_ptr); } + /* Add an entry to the link table for ICV data */ link_tbl_ptr += sg_count - 1; - - /* handle case where sg_last contains the ICV exclusively */ - sg = sg_last(areq->dst, edesc->dst_nents); - if (sg->length == ctx->authsize) - link_tbl_ptr--; - link_tbl_ptr->j_extent = 0; + sg_count++; link_tbl_ptr++; link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; link_tbl_ptr->len = cpu_to_be16(authsize); @@ -973,7 +994,7 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq, link_tbl_ptr->ptr = cpu_to_be32((struct talitos_ptr *) edesc->dma_link_tbl + edesc->src_nents + - edesc->dst_nents + 1); + edesc->dst_nents + 2); desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP; dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, @@ -1040,12 +1061,12 @@ static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq, /* * allocate space for base edesc plus the link tables, - * allowing for a separate entry for the generated ICV (+ 1), + * allowing for two separate entries for ICV and generated ICV (+ 2), * and the ICV data itself */ alloc_len = sizeof(struct ipsec_esp_edesc); if (src_nents || dst_nents) { - dma_len = (src_nents + dst_nents + 1) * + dma_len = (src_nents + dst_nents + 2) * sizeof(struct talitos_ptr) + ctx->authsize; alloc_len += dma_len; } else { @@ -1104,7 +1125,7 @@ static int aead_authenc_decrypt(struct aead_request *req) /* stash incoming ICV for later cmp with ICV generated by the h/w */ if (edesc->dma_len) icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 1]; + edesc->dst_nents + 2]; else icvdata = &edesc->link_tbl[0]; @@ -1480,6 +1501,9 @@ static int talitos_probe(struct of_device *ofdev, goto err_out; } + if (of_device_is_compatible(np, "fsl,sec3.0")) + priv->features |= TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT; + priv->head_lock = kmalloc(sizeof(spinlock_t) * priv->num_channels, GFP_KERNEL); priv->tail_lock = kmalloc(sizeof(spinlock_t) * priv->num_channels, diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 96fa8449ff11..6d3b21063419 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -236,6 +237,37 @@ static inline void kernel_fpu_end(void) preempt_enable(); } +/* + * Some instructions like VIA's padlock instructions generate a spurious + * DNA fault but don't modify SSE registers. And these instructions + * get used from interrupt context aswell. To prevent these kernel instructions + * in interrupt context interact wrongly with other user/kernel fpu usage, we + * should use them only in the context of irq_ts_save/restore() + */ +static inline int irq_ts_save(void) +{ + /* + * If we are in process context, we are ok to take a spurious DNA fault. + * Otherwise, doing clts() in process context require pre-emption to + * be disabled or some heavy lifting like kernel_fpu_begin() + */ + if (!in_interrupt()) + return 0; + + if (read_cr0() & X86_CR0_TS) { + clts(); + return 1; + } + + return 0; +} + +static inline void irq_ts_restore(int TS_state) +{ + if (TS_state) + stts(); +} + #ifdef CONFIG_X86_64 static inline void save_init_fpu(struct task_struct *tsk) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index d12498ec8a4e..ee48ef8fb2ea 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -101,6 +101,24 @@ static inline int crypto_ahash_digest(struct ahash_request *req) return crt->digest(req); } +static inline int crypto_ahash_init(struct ahash_request *req) +{ + struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); + return crt->init(req); +} + +static inline int crypto_ahash_update(struct ahash_request *req) +{ + struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); + return crt->update(req); +} + +static inline int crypto_ahash_final(struct ahash_request *req) +{ + struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); + return crt->final(req); +} + static inline void ahash_request_set_tfm(struct ahash_request *req, struct crypto_ahash *tfm) {