diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c index 04facc0690aa..dc012129c063 100644 --- a/crypto/gf128mul.c +++ b/crypto/gf128mul.c @@ -130,43 +130,12 @@ static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le); static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be); /* - * The following functions multiply a field element by x or by x^8 in + * The following functions multiply a field element by x^8 in * the polynomial field representation. They use 64-bit word operations * to gain speed but compensate for machine endianness and hence work * correctly on both styles of machine. */ -static void gf128mul_x_lle(be128 *r, const be128 *x) -{ - u64 a = be64_to_cpu(x->a); - u64 b = be64_to_cpu(x->b); - u64 _tt = gf128mul_table_le[(b << 7) & 0xff]; - - r->b = cpu_to_be64((b >> 1) | (a << 63)); - r->a = cpu_to_be64((a >> 1) ^ (_tt << 48)); -} - -static void gf128mul_x_bbe(be128 *r, const be128 *x) -{ - u64 a = be64_to_cpu(x->a); - u64 b = be64_to_cpu(x->b); - u64 _tt = gf128mul_table_be[a >> 63]; - - r->a = cpu_to_be64((a << 1) | (b >> 63)); - r->b = cpu_to_be64((b << 1) ^ _tt); -} - -void gf128mul_x_ble(be128 *r, const be128 *x) -{ - u64 a = le64_to_cpu(x->a); - u64 b = le64_to_cpu(x->b); - u64 _tt = gf128mul_table_be[b >> 63]; - - r->a = cpu_to_le64((a << 1) ^ _tt); - r->b = cpu_to_le64((b << 1) | (a >> 63)); -} -EXPORT_SYMBOL(gf128mul_x_ble); - static void gf128mul_x8_lle(be128 *x) { u64 a = be64_to_cpu(x->a); diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h index 0bc9b5f1c45e..35ced9db70ea 100644 --- a/include/crypto/gf128mul.h +++ b/include/crypto/gf128mul.h @@ -49,6 +49,7 @@ #ifndef _CRYPTO_GF128MUL_H #define _CRYPTO_GF128MUL_H +#include #include #include @@ -163,8 +164,58 @@ void gf128mul_lle(be128 *a, const be128 *b); void gf128mul_bbe(be128 *a, const be128 *b); -/* multiply by x in ble format, needed by XTS */ -void gf128mul_x_ble(be128 *a, const be128 *b); +/* + * The following functions multiply a field element by x in + * the polynomial field representation. They use 64-bit word operations + * to gain speed but compensate for machine endianness and hence work + * correctly on both styles of machine. + * + * They are defined here for performance. + */ + +static inline u64 gf128mul_mask_from_bit(u64 x, int which) +{ + /* a constant-time version of 'x & ((u64)1 << which) ? (u64)-1 : 0' */ + return ((s64)(x << (63 - which)) >> 63); +} + +static inline void gf128mul_x_lle(be128 *r, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + + /* equivalent to gf128mul_table_le[(b << 7) & 0xff] << 48 + * (see crypto/gf128mul.c): */ + u64 _tt = gf128mul_mask_from_bit(b, 0) & ((u64)0xe1 << 56); + + r->b = cpu_to_be64((b >> 1) | (a << 63)); + r->a = cpu_to_be64((a >> 1) ^ _tt); +} + +static inline void gf128mul_x_bbe(be128 *r, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + + /* equivalent to gf128mul_table_be[a >> 63] (see crypto/gf128mul.c): */ + u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; + + r->a = cpu_to_be64((a << 1) | (b >> 63)); + r->b = cpu_to_be64((b << 1) ^ _tt); +} + +/* needed by XTS */ +static inline void gf128mul_x_ble(be128 *r, const be128 *x) +{ + u64 a = le64_to_cpu(x->a); + u64 b = le64_to_cpu(x->b); + + /* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */ + u64 _tt = gf128mul_mask_from_bit(b, 63) & 0x87; + + r->a = cpu_to_le64((a << 1) ^ _tt); + r->b = cpu_to_le64((b << 1) | (a >> 63)); +} /* 4k table optimization */