d540725871
Without this patch, the chacha block counter is not incremented on neon rounds, resulting in incorrect calculations and corrupt packets. This also switches to using `--no-numbered --zero-commit` so that future diffs are smaller. Reported-by: Hans Geiblinger <cybrnook2002@yahoo.com> Reviewed-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> Cc: David Bauer <mail@david-bauer.net> Cc: Petr Štetiar <ynezz@true.cz> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
252 lines
7.8 KiB
Diff
252 lines
7.8 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Ard Biesheuvel <ardb@kernel.org>
|
|
Date: Fri, 8 Nov 2019 13:22:20 +0100
|
|
Subject: [PATCH] crypto: x86/poly1305 - unify Poly1305 state struct with
|
|
generic code
|
|
|
|
commit ad8f5b88383ea685f2b8df2a12ee3e08089a1287 upstream.
|
|
|
|
In preparation of exposing a Poly1305 library interface directly from
|
|
the accelerated x86 driver, align the state descriptor of the x86 code
|
|
with the one used by the generic driver. This is needed to make the
|
|
library interface unified between all implementations.
|
|
|
|
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
|
|
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
|
|
---
|
|
arch/x86/crypto/poly1305_glue.c | 88 ++++++++++--------------------
|
|
crypto/poly1305_generic.c | 6 +-
|
|
include/crypto/internal/poly1305.h | 4 +-
|
|
include/crypto/poly1305.h | 18 +++---
|
|
4 files changed, 43 insertions(+), 73 deletions(-)
|
|
|
|
--- a/arch/x86/crypto/poly1305_glue.c
|
|
+++ b/arch/x86/crypto/poly1305_glue.c
|
|
@@ -14,40 +14,14 @@
|
|
#include <linux/module.h>
|
|
#include <asm/simd.h>
|
|
|
|
-struct poly1305_simd_desc_ctx {
|
|
- struct poly1305_desc_ctx base;
|
|
- /* derived key u set? */
|
|
- bool uset;
|
|
-#ifdef CONFIG_AS_AVX2
|
|
- /* derived keys r^3, r^4 set? */
|
|
- bool wset;
|
|
-#endif
|
|
- /* derived Poly1305 key r^2 */
|
|
- u32 u[5];
|
|
- /* ... silently appended r^3 and r^4 when using AVX2 */
|
|
-};
|
|
-
|
|
asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
|
|
const u32 *r, unsigned int blocks);
|
|
asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
|
|
unsigned int blocks, const u32 *u);
|
|
-#ifdef CONFIG_AS_AVX2
|
|
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
|
|
unsigned int blocks, const u32 *u);
|
|
-static bool poly1305_use_avx2;
|
|
-#endif
|
|
|
|
-static int poly1305_simd_init(struct shash_desc *desc)
|
|
-{
|
|
- struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
|
|
-
|
|
- sctx->uset = false;
|
|
-#ifdef CONFIG_AS_AVX2
|
|
- sctx->wset = false;
|
|
-#endif
|
|
-
|
|
- return crypto_poly1305_init(desc);
|
|
-}
|
|
+static bool poly1305_use_avx2 __ro_after_init;
|
|
|
|
static void poly1305_simd_mult(u32 *a, const u32 *b)
|
|
{
|
|
@@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, c
|
|
static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
|
|
const u8 *src, unsigned int srclen)
|
|
{
|
|
- struct poly1305_simd_desc_ctx *sctx;
|
|
unsigned int blocks, datalen;
|
|
|
|
- BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
|
|
- sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
|
|
-
|
|
if (unlikely(!dctx->sset)) {
|
|
datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
|
|
src += srclen - datalen;
|
|
srclen = datalen;
|
|
}
|
|
|
|
-#ifdef CONFIG_AS_AVX2
|
|
- if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
|
|
- if (unlikely(!sctx->wset)) {
|
|
- if (!sctx->uset) {
|
|
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
|
|
- poly1305_simd_mult(sctx->u, dctx->r.r);
|
|
- sctx->uset = true;
|
|
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
|
|
+ poly1305_use_avx2 &&
|
|
+ srclen >= POLY1305_BLOCK_SIZE * 4) {
|
|
+ if (unlikely(dctx->rset < 4)) {
|
|
+ if (dctx->rset < 2) {
|
|
+ dctx->r[1] = dctx->r[0];
|
|
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
|
|
}
|
|
- memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
|
|
- poly1305_simd_mult(sctx->u + 5, dctx->r.r);
|
|
- memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
|
|
- poly1305_simd_mult(sctx->u + 10, dctx->r.r);
|
|
- sctx->wset = true;
|
|
+ dctx->r[2] = dctx->r[1];
|
|
+ poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
|
|
+ dctx->r[3] = dctx->r[2];
|
|
+ poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
|
|
+ dctx->rset = 4;
|
|
}
|
|
blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
|
|
- poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
|
|
- sctx->u);
|
|
+ poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
|
|
+ dctx->r[1].r);
|
|
src += POLY1305_BLOCK_SIZE * 4 * blocks;
|
|
srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
|
|
}
|
|
-#endif
|
|
+
|
|
if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
|
|
- if (unlikely(!sctx->uset)) {
|
|
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
|
|
- poly1305_simd_mult(sctx->u, dctx->r.r);
|
|
- sctx->uset = true;
|
|
+ if (unlikely(dctx->rset < 2)) {
|
|
+ dctx->r[1] = dctx->r[0];
|
|
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
|
|
+ dctx->rset = 2;
|
|
}
|
|
blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
|
|
- poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
|
|
- sctx->u);
|
|
+ poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
|
|
+ blocks, dctx->r[1].r);
|
|
src += POLY1305_BLOCK_SIZE * 2 * blocks;
|
|
srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
|
|
}
|
|
if (srclen >= POLY1305_BLOCK_SIZE) {
|
|
- poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
|
|
+ poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
|
|
srclen -= POLY1305_BLOCK_SIZE;
|
|
}
|
|
return srclen;
|
|
@@ -159,10 +129,10 @@ static int poly1305_simd_update(struct s
|
|
|
|
static struct shash_alg alg = {
|
|
.digestsize = POLY1305_DIGEST_SIZE,
|
|
- .init = poly1305_simd_init,
|
|
+ .init = crypto_poly1305_init,
|
|
.update = poly1305_simd_update,
|
|
.final = crypto_poly1305_final,
|
|
- .descsize = sizeof(struct poly1305_simd_desc_ctx),
|
|
+ .descsize = sizeof(struct poly1305_desc_ctx),
|
|
.base = {
|
|
.cra_name = "poly1305",
|
|
.cra_driver_name = "poly1305-simd",
|
|
@@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init
|
|
if (!boot_cpu_has(X86_FEATURE_XMM2))
|
|
return -ENODEV;
|
|
|
|
-#ifdef CONFIG_AS_AVX2
|
|
- poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
|
|
+ poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
|
|
+ boot_cpu_has(X86_FEATURE_AVX) &&
|
|
boot_cpu_has(X86_FEATURE_AVX2) &&
|
|
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
|
- alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
|
|
+ alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
|
|
if (poly1305_use_avx2)
|
|
alg.descsize += 10 * sizeof(u32);
|
|
-#endif
|
|
+
|
|
return crypto_register_shash(&alg);
|
|
}
|
|
|
|
--- a/crypto/poly1305_generic.c
|
|
+++ b/crypto/poly1305_generic.c
|
|
@@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_de
|
|
|
|
poly1305_core_init(&dctx->h);
|
|
dctx->buflen = 0;
|
|
- dctx->rset = false;
|
|
+ dctx->rset = 0;
|
|
dctx->sset = false;
|
|
|
|
return 0;
|
|
@@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1
|
|
srclen = datalen;
|
|
}
|
|
|
|
- poly1305_core_blocks(&dctx->h, &dctx->r, src,
|
|
+ poly1305_core_blocks(&dctx->h, dctx->r, src,
|
|
srclen / POLY1305_BLOCK_SIZE, 1);
|
|
}
|
|
|
|
@@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_d
|
|
dctx->buf[dctx->buflen++] = 1;
|
|
memset(dctx->buf + dctx->buflen, 0,
|
|
POLY1305_BLOCK_SIZE - dctx->buflen);
|
|
- poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
|
|
+ poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
|
|
}
|
|
|
|
poly1305_core_emit(&dctx->h, digest);
|
|
--- a/include/crypto/internal/poly1305.h
|
|
+++ b/include/crypto/internal/poly1305.h
|
|
@@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(
|
|
{
|
|
if (!dctx->sset) {
|
|
if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
|
|
- poly1305_core_setkey(&dctx->r, src);
|
|
+ poly1305_core_setkey(dctx->r, src);
|
|
src += POLY1305_BLOCK_SIZE;
|
|
srclen -= POLY1305_BLOCK_SIZE;
|
|
- dctx->rset = true;
|
|
+ dctx->rset = 1;
|
|
}
|
|
if (srclen >= POLY1305_BLOCK_SIZE) {
|
|
dctx->s[0] = get_unaligned_le32(src + 0);
|
|
--- a/include/crypto/poly1305.h
|
|
+++ b/include/crypto/poly1305.h
|
|
@@ -22,20 +22,20 @@ struct poly1305_state {
|
|
};
|
|
|
|
struct poly1305_desc_ctx {
|
|
- /* key */
|
|
- struct poly1305_key r;
|
|
- /* finalize key */
|
|
- u32 s[4];
|
|
- /* accumulator */
|
|
- struct poly1305_state h;
|
|
/* partial buffer */
|
|
u8 buf[POLY1305_BLOCK_SIZE];
|
|
/* bytes used in partial buffer */
|
|
unsigned int buflen;
|
|
- /* r key has been set */
|
|
- bool rset;
|
|
- /* s key has been set */
|
|
+ /* how many keys have been set in r[] */
|
|
+ unsigned short rset;
|
|
+ /* whether s[] has been set */
|
|
bool sset;
|
|
+ /* finalize key */
|
|
+ u32 s[4];
|
|
+ /* accumulator */
|
|
+ struct poly1305_state h;
|
|
+ /* key */
|
|
+ struct poly1305_key r[1];
|
|
};
|
|
|
|
#endif
|