1225 lines
32 KiB
C++
1225 lines
32 KiB
C++
|
/*
|
||
|
* Copyright (c) 2012-2020 MIRACL UK Ltd.
|
||
|
*
|
||
|
* This file is part of MIRACL Core
|
||
|
* (see https://github.com/miracl/core).
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
/* Dilithium API implementation. Constant time where it matters. Spends nearly all of its time running SHA3. Small.
|
||
|
|
||
|
The Matrix A is calculated on-the-fly to keep memory requirement minimal
|
||
|
Note that
|
||
|
1. A precalculated A can be included in the public key, for use by signature and verification (which blows up public key size)
|
||
|
2. Precalculating A for signature calculation means that the A does not have to re-calculated for each attempt to find a good signature
|
||
|
|
||
|
Might be simpler to wait for hardware support for SHA3 before attempting further optimization!
|
||
|
|
||
|
M.Scott 30/09/2021
|
||
|
*/
|
||
|
|
||
|
#include "dilithium.h"
|
||
|
|
||
|
using namespace core;
|
||
|
|
||
|
#define round(a,b) (((a)+((b)/2))/(b))
|
||
|
|
||
|
// parameters for each security level
|
||
|
// tau,gamma1,gamma2,K,L,eta,lg(2*eta+1),omega
|
||
|
const int PARAMS_2[8]={39,17,88,4,4,2,3,80};
|
||
|
const int PARAMS_3[8]={49,19,32,6,5,4,4,55};
|
||
|
const int PARAMS_5[8]={60,19,32,8,7,2,3,75};
|
||
|
|
||
|
const sign32 roots[] = {0x3ffe00,0x64f7,0x581103,0x77f504,0x39e44,0x740119,0x728129,0x71e24,0x1bde2b,0x23e92b,0x7a64ae,0x5ff480,0x2f9a75,0x53db0a,0x2f7a49,0x28e527,0x299658,0xfa070,0x6f65a5,0x36b788,0x777d91,0x6ecaa1,0x27f968,0x5fb37c,0x5f8dd7,0x44fae8,0x6a84f8,0x4ddc99,0x1ad035,0x7f9423,0x3d3201,0x445c5,0x294a67,0x17620,0x2ef4cd,0x35dec5,0x668504,0x49102d,0x5927d5,0x3bbeaf,0x44f586,0x516e7d,0x368a96,0x541e42,0x360400,0x7b4a4e,0x23d69c,0x77a55e,0x65f23e,0x66cad7,0x357e1e,0x458f5a,0x35843f,0x5f3618,0x67745d,0x38738c,0xc63a8,0x81b9a,0xe8f76,0x3b3853,0x3b8534,0x58dc31,0x1f9d54,0x552f2e,0x43e6e6,0x688c82,0x47c1d0,0x51781a,0x69b65e,0x3509ee,0x2135c7,0x67afbc,0x6caf76,0x1d9772,0x419073,0x709cf7,0x4f3281,0x4fb2af,0x4870e1,0x1efca,0x3410f2,0x70de86,0x20c638,0x296e9f,0x5297a4,0x47844c,0x799a6e,0x5a140a,0x75a283,0x6d2114,0x7f863c,0x6be9f8,0x7a0bde,0x1495d4,0x1c4563,0x6a0c63,0x4cdbea,0x40af0,0x7c417,0x2f4588,0xad00,0x6f16bf,0xdcd44,0x3c675a,0x470bcb,0x7fbe7f,0x193948,0x4e49c1,0x24756c,0x7ca7e0,0xb98a1,0x6bc809,0x2e46c,0x49a809,0x3036c2,0x639ff7,0x5b1c94,0x7d2ae1,0x141305,0x147792,0x139e25,0x67b0e1,0x737945,0x69e803,0x51cea3,0x44a79d,0x488058,0x3a97d9,0x1fea93,0x33ff5a,0x2358d4,0x3a41f8,0x4cdf73,0x223dfb,0x5a8ba0,0x498423,0x412f5,0x252587,0x6d04f1,0x359b5d,0x4a28a1,0x4682fd,0x6d9b57,0x4f25df,0xdbe5e,0x1c5e1a,0xde0e6,0xc7f5a,0x78f83,0x67428b,0x7f3705,0x77e6fd,0x75e022,0x503af7,0x1f0084,0x30ef86,0x49997e,0x77dcd7,0x742593,0x4901c3,0x53919,0x4610c,0x5aad42,0x3eb01b,0x3472e7,0x4ce03c,0x1a7cc7,0x31924,0x2b5ee5,0x291199,0x585a3b,0x134d71,0x3de11c,0x130984,0x25f051,0x185a46,0x466519,0x1314be,0x283891,0x49bb91,0x52308a,0x1c853f,0x1d0b4b,0x6fd6a7,0x6b88bf,0x12e11b,0x4d3e3f,0x6a0d30,0x78fde5,0x1406c7,0x327283,0x61ed6f,0x6c5954,0x1d4099,0x590579,0x6ae5ae,0x16e405,0xbdbe7,0x221de8,0x33f8cf,0x779935,0x54aa0d,0x665ff9,0x63b158,0x58711c,0x470c13,0x910d8,0x463e20,0x612659,0x251d8b,0x2573b7,0x7d5c90,0x1ddd98,0x336898,0x2d4bb,0x6d73a8,0x4f4cbf,0x27c1c,0x18aa08,0x2dfd71,0xc5ca5,0x19379a,0x478168,0x646c3e,0x51813d,0x35c539,0x3b0115,0x41dc0,0x21c4f7,0x70fbf5,0x1a35e7,0x7340e,0x795d46,0x1a4cd0,0x645caf,0x1d2668,0x666e99,0x6f0634,0x7be5db,0x455fdc,0x530765,0x5dc1b0,0x7973de,0x5cfd0a,0x2cc93,0x70f806,0x189c2a,0x49c5aa,0x776a51,0x3bcf2c,0x7f234f,0x6b16e0,0x3c15ca,0x155e68,0x72f6b7,0x1e29ce};
|
||
|
const sign32 iroots[] = {0x3ffe00,0x7f7b0a,0x7eafd,0x27cefe,0x78c1dd,0xd5ed8,0xbdee8,0x7c41bd,0x56fada,0x5065b8,0x2c04f7,0x50458c,0x1feb81,0x57b53,0x5bf6d6,0x6401d6,0x7b9a3c,0x42ae00,0x4bde,0x650fcc,0x320368,0x155b09,0x3ae519,0x20522a,0x202c85,0x57e699,0x111560,0x86270,0x492879,0x107a5c,0x703f91,0x5649a9,0x2ab0d3,0x6042ad,0x2703d0,0x445acd,0x44a7ae,0x71508b,0x77c467,0x737c59,0x476c75,0x186ba4,0x20a9e9,0x4a5bc2,0x3a50a7,0x4a61e3,0x19152a,0x19edc3,0x83aa3,0x5c0965,0x495b3,0x49dc01,0x2bc1bf,0x49556b,0x2e7184,0x3aea7b,0x442152,0x26b82c,0x36cfd4,0x195afd,0x4a013c,0x50eb34,0x7e69e1,0x56959a,0x454828,0x375fa9,0x3b3864,0x2e115e,0x15f7fe,0xc66bc,0x182f20,0x6c41dc,0x6b686f,0x6bccfc,0x2b520,0x24c36d,0x1c400a,0x4fa93f,0x3637f8,0x7cfb95,0x1417f8,0x744760,0x33821,0x5b6a95,0x319640,0x66a6b9,0x2182,0x38d436,0x4378a7,0x7212bd,0x10c942,0x7f3301,0x509a79,0x781bea,0x7bd511,0x330417,0x15d39e,0x639a9e,0x6b4a2d,0x5d423,0x13f609,0x59c5,0x12beed,0xa3d7e,0x25cbf7,0x64593,0x385bb5,0x2d485d,0x567162,0x5f19c9,0xf017b,0x4bcf0f,0x7df037,0x376f20,0x302d52,0x30ad80,0xf430a,0x3e4f8e,0x62488f,0x13308b,0x183045,0x5eaa3a,0x4ad613,0x1629a3,0x2e67e7,0x381e31,0x17537f,0x3bf91b,0x61b633,0xce94a,0x6a8199,0x43ca37,0x14c921,0xbcb2,0x4410d5,0x875b0,0x361a57,0x6743d7,0xee7fb,0x7d136e,0x22e2f7,0x66c23,0x221e51,0x2cd89c,0x3a8025,0x3fa26,0x10d9cd,0x197168,0x62b999,0x1b8352,0x659331,0x682bb,0x78abf3,0x65aa1a,0xee40c,0x5e1b0a,0x7bc241,0x44deec,0x4a1ac8,0x2e5ec4,0x1b73c3,0x385e99,0x66a867,0x73835c,0x51e290,0x6735f9,0x7d63e5,0x309342,0x126c59,0x7d0b46,0x4c7769,0x620269,0x28371,0x5a6c4a,0x5ac276,0x1eb9a8,0x39a1e1,0x76cf29,0x38d3ee,0x276ee5,0x1c2ea9,0x198008,0x2b35f4,0x846cc,0x4be732,0x5dc219,0x74041a,0x68fbfc,0x14fa53,0x26da88,0x629f68,0x1386ad,0x1df292,0x4d6d7e,0x6bd93a,0x6e21c,0x15d2d1,0x32a1c2,0x6cfee6,0x145742,0x10095a,0x62d4b6,0x635ac2,0x2daf77,0x362470,0x57a770,0x6ccb43,0x397ae8,0x6785bb,0x59efb0,0x6cd67d,0x41fee5,0x6c9290,0x2785c6,0x56ce68,0x54811c,0x7cc6dd,0x65633a,0x32ffc5,0x4b6d1a,0x412fe6,0x2532bf,0x7b7ef5,0x7aa6e8,0x36de3e,0xbba6e,0x8032a,0x364683,0x4ef07b,0x60df7d,0x2fa50a,0x9ffdf,0x7f904,0xa8fc,0x189d76,0x78507e,0x7360a7,0x71ff1b,0x6381e7,0x7221a3,0x30ba22,0x1244aa,0x395d04,0x35b760,0x4a44a4,0x12db10,0x5aba7a,0x7bcd0c,0x365bde,0x255461,0x5da206,0x33008e,0x459e09,0x5c872d,0x4be0a7,0x5ff56e};
|
||
|
|
||
|
/* Montgomery stuff */
|
||
|
|
||
|
static sign32 redc(unsign64 T)
|
||
|
{
|
||
|
unsign32 m = (unsign32)T * (unsign32)DL_ND;
|
||
|
return ((unsign64)m * DL_PRIME + T) >> 32;
|
||
|
}
|
||
|
|
||
|
static sign32 nres(unsign32 x)
|
||
|
{
|
||
|
return redc((unsign64)x * DL_R2MODP);
|
||
|
}
|
||
|
|
||
|
static sign32 modmul(unsign32 a, unsign32 b)
|
||
|
{
|
||
|
return redc((unsign64)a * b);
|
||
|
}
|
||
|
|
||
|
// make all elements +ve
|
||
|
static void poly_pos(sign32 *p)
|
||
|
{
|
||
|
int i;
|
||
|
for (i=0;i<DL_DEGREE;i++)
|
||
|
p[i]+=(p[i]>>31)&DL_PRIME;
|
||
|
}
|
||
|
|
||
|
// NTT code
|
||
|
|
||
|
// Important!
|
||
|
// nres(x); ntt(x)
|
||
|
// nres(y); ntt(y)
|
||
|
// z=x*y
|
||
|
// intt(z);
|
||
|
// redc(z);
|
||
|
|
||
|
// is equivalent to (note that nres() and redc() cancel out)
|
||
|
|
||
|
// ntt(x);
|
||
|
// nres(y); ntt(y);
|
||
|
// z=x*y
|
||
|
// intt(z)
|
||
|
|
||
|
// is equivalent to
|
||
|
|
||
|
// ntt(x)
|
||
|
// ntt(y)
|
||
|
// z=x*y
|
||
|
// intt(z)
|
||
|
// nres(z)
|
||
|
|
||
|
// In all cases z ends up in normal (non-Montgomery) form!
|
||
|
// So the conversion to Montgomery form can be "pushed" through the calculation.
|
||
|
|
||
|
// Here intt(z) <- intt(z);nres(z);
|
||
|
// Combining is more efficient
|
||
|
// note that ntt() and intt() are not mutually inverse
|
||
|
|
||
|
/* NTT code */
|
||
|
/* Cooley-Tukey NTT */
|
||
|
/* Excess of 2 allowed on input - coefficients must be < 2*PRIME */
|
||
|
|
||
|
static void ntt(sign32 *x)
|
||
|
{
|
||
|
int m, i, j, start, len = DL_DEGREE / 2;
|
||
|
sign32 S, V, q = DL_PRIME;
|
||
|
|
||
|
/* Make positive */
|
||
|
poly_pos(x);
|
||
|
m = 1;
|
||
|
while (m < DL_DEGREE)
|
||
|
{
|
||
|
start = 0;
|
||
|
for (i = 0; i < m; i++)
|
||
|
{
|
||
|
S = roots[m + i];
|
||
|
for (j = start; j < start + len; j++)
|
||
|
{
|
||
|
V = modmul(x[j + len], S);
|
||
|
x[j + len] = x[j] + 2 * q - V;
|
||
|
x[j] = x[j] + V;
|
||
|
}
|
||
|
start += 2 * len;
|
||
|
}
|
||
|
len /= 2;
|
||
|
m *= 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Gentleman-Sande INTT */
|
||
|
/* Excess of 2 allowed on input - coefficients must be < 2*PRIME */
|
||
|
/* Output fully reduced */
|
||
|
#define NTTL 1 // maybe should be 2?
|
||
|
|
||
|
static void intt(sign32 *x)
|
||
|
{
|
||
|
int m, i, j, k, n,lim,t = 1;
|
||
|
sign32 S, U, V, W, q = DL_PRIME;
|
||
|
|
||
|
m = DL_DEGREE/2;
|
||
|
n=DL_LGN;
|
||
|
while (m >= 1)
|
||
|
{
|
||
|
lim=NTTL>>n;
|
||
|
n--;
|
||
|
k = 0;
|
||
|
for (i = 0; i < m; i++)
|
||
|
{
|
||
|
S = iroots[m + i];
|
||
|
for (j = k; j < k + t; j++)
|
||
|
{
|
||
|
#if NTTL>1
|
||
|
if (m<NTTL && j<k+lim) // This should be unwound for timings
|
||
|
{ // need to knock back excesses. Never used if NTTL=1.
|
||
|
U=modmul(x[j],DL_ONE);
|
||
|
V=modmul(x[j+t],DL_ONE);
|
||
|
} else {
|
||
|
U = x[j];
|
||
|
V = x[j + t];
|
||
|
}
|
||
|
#else
|
||
|
U = x[j];
|
||
|
V = x[j + t];
|
||
|
#endif
|
||
|
x[j] = U + V;
|
||
|
W = U + (DL_DEGREE/NTTL) * q - V;
|
||
|
x[j + t] = modmul(W, S);
|
||
|
}
|
||
|
k += 2 * t;
|
||
|
}
|
||
|
t *= 2;
|
||
|
m /= 2;
|
||
|
}
|
||
|
|
||
|
for (j = 0; j < DL_DEGREE; j++)
|
||
|
{ // fully reduce, nres combined with 1/DEGREE
|
||
|
x[j]= modmul(x[j],DL_COMBO);
|
||
|
x[j] -= q;
|
||
|
x[j] += (x[j] >> 31)&q;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void nres_it(sign32 *p)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
p[i] = nres(p[i]);
|
||
|
}
|
||
|
|
||
|
static void redc_it(sign32 *p)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
p[i] = redc(p[i]);
|
||
|
}
|
||
|
|
||
|
// copy polynomial
|
||
|
static void poly_copy(sign32 *p1, sign32 *p2)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
p1[i] = p2[i];
|
||
|
}
|
||
|
|
||
|
// copy from small polynomial
|
||
|
static void poly_scopy(sign32 *p1, sign8 *p2)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
p1[i] = (sign32)p2[i];
|
||
|
}
|
||
|
|
||
|
// copy from medium polynomial
|
||
|
static void poly_mcopy(sign32 *p1, sign16 *p2)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
p1[i] = (sign32)p2[i];
|
||
|
}
|
||
|
|
||
|
static void poly_zero(sign32 *p1)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
p1[i] = 0;
|
||
|
}
|
||
|
|
||
|
static void poly_negate(sign32 *p1,sign32 *p2)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
p1[i] = DL_PRIME-p2[i];
|
||
|
}
|
||
|
|
||
|
static void poly_mul(sign32 *p1, sign32 *p2, sign32 *p3)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
p1[i] = modmul(p2[i], p3[i]);
|
||
|
}
|
||
|
|
||
|
static void poly_add(sign32 *p1, sign32 *p2, sign32 *p3)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
p1[i] = (p2[i] + p3[i]);
|
||
|
}
|
||
|
|
||
|
static void poly_sub(sign32 *p1, sign32 *p2, sign32 *p3)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
p1[i] = (p2[i] + DL_PRIME - p3[i]);
|
||
|
}
|
||
|
|
||
|
/* reduces inputs that are already < 2q */
|
||
|
static void poly_soft_reduce(sign32 *poly)
|
||
|
{
|
||
|
int i;
|
||
|
sign32 e;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
{
|
||
|
e = poly[i] - DL_PRIME;
|
||
|
poly[i] = e + ((e >> 31)&DL_PRIME);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* fully reduces modulo q */
|
||
|
static void poly_hard_reduce(sign32 *poly)
|
||
|
{
|
||
|
int i;
|
||
|
sign32 e;
|
||
|
for (i = 0; i < DL_DEGREE; i++)
|
||
|
{
|
||
|
e = modmul(poly[i], DL_ONE); // reduces to < 2q
|
||
|
e = e - DL_PRIME;
|
||
|
poly[i] = e + ((e >> 31)&DL_PRIME); // finishes it off
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Generate A[i][j] from rho
|
||
|
static void ExpandAij(byte rho[32],sign32 Aij[],int i,int j)
|
||
|
{
|
||
|
sha3 sh;
|
||
|
int m,n;
|
||
|
unsign32 b0,b1,b2;
|
||
|
sign32 cf;
|
||
|
SHA3_init(&sh, SHAKE128);
|
||
|
byte buff[840]; // should be plenty
|
||
|
for (m=0;m<32;m++)
|
||
|
SHA3_process(&sh,rho[m]);
|
||
|
SHA3_process(&sh,j&0xff);
|
||
|
SHA3_process(&sh,i&0xff);
|
||
|
SHA3_shake(&sh,(char *)buff,840);
|
||
|
m=n=0;
|
||
|
while (m<DL_DEGREE)
|
||
|
{
|
||
|
b0=(unsign32)buff[n++]; b1=(unsign32)buff[n++]; b2=(unsign32)buff[n++];
|
||
|
cf=((b2&0x7F)<<16)+(b1<<8)+b0;
|
||
|
if (cf>=DL_PRIME) continue;
|
||
|
Aij[m++]=cf;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// array t has ab active bits per word
|
||
|
// extract bytes from array of words
|
||
|
// if max!=0 then -max<=t[i]<=+max
|
||
|
static byte nextbyte32(int ab,int max,sign32 t[],int &ptr, int &bts)
|
||
|
{
|
||
|
sign32 r,w;
|
||
|
int left=ab-bts; // number of bits left in this word
|
||
|
int i=0;
|
||
|
w=t[ptr];
|
||
|
if (max!=0)
|
||
|
w=max-w;
|
||
|
r=w>>bts;
|
||
|
while (left<8)
|
||
|
{
|
||
|
i++;
|
||
|
w=t[ptr+i];
|
||
|
if (max!=0)
|
||
|
w=max-w;
|
||
|
r|=w<<left;
|
||
|
left+=ab;
|
||
|
}
|
||
|
|
||
|
bts+=8;
|
||
|
while (bts>=ab)
|
||
|
{
|
||
|
bts-=ab;
|
||
|
ptr++;
|
||
|
}
|
||
|
return (byte)r&0xff;
|
||
|
}
|
||
|
|
||
|
// array t has ab active bits per word
|
||
|
// extract dense bytes from array of words
|
||
|
// if max!=0 then -max<=t[i]<=+max
|
||
|
static byte nextbyte16(int ab,int max,sign16 t[],int &ptr, int &bts)
|
||
|
{
|
||
|
sign32 r,w;
|
||
|
int left=ab-bts; // number of bits left in this word
|
||
|
int i=0;
|
||
|
w=t[ptr];
|
||
|
if (max!=0)
|
||
|
w=max-w;
|
||
|
r=w>>bts;
|
||
|
while (left<8)
|
||
|
{
|
||
|
i++;
|
||
|
w=t[ptr+i];
|
||
|
if (max!=0)
|
||
|
w=max-w;
|
||
|
r|=w<<left;
|
||
|
left+=ab;
|
||
|
}
|
||
|
|
||
|
bts+=8;
|
||
|
while (bts>=ab)
|
||
|
{
|
||
|
bts-=ab;
|
||
|
ptr++;
|
||
|
}
|
||
|
return (byte)r&0xff;
|
||
|
}
|
||
|
|
||
|
// array t has ab active bits per word
|
||
|
// extract dense bytes from array of words
|
||
|
// if max!=0 then -max<=t[i]<=+max
|
||
|
static byte nextbyte8(int ab,int max,sign8 t[],int &ptr, int &bts)
|
||
|
{
|
||
|
sign32 r,w;
|
||
|
int left=ab-bts; // number of bits left in this word
|
||
|
int i=0;
|
||
|
w=t[ptr];
|
||
|
if (max!=0)
|
||
|
w=max-w;
|
||
|
r=w>>bts;
|
||
|
while (left<8)
|
||
|
{
|
||
|
i++;
|
||
|
w=t[ptr+i];
|
||
|
if (max!=0)
|
||
|
w=max-w;
|
||
|
r|=w<<left;
|
||
|
left+=ab;
|
||
|
}
|
||
|
|
||
|
bts+=8;
|
||
|
while (bts>=ab)
|
||
|
{
|
||
|
bts-=ab;
|
||
|
ptr++;
|
||
|
}
|
||
|
return (byte)r&0xff;
|
||
|
}
|
||
|
|
||
|
// extract ab bits into word from dense byte stream
|
||
|
static sign32 nextword(const int ab,int max,byte t[],int &ptr, int &bts)
|
||
|
{
|
||
|
sign32 r=t[ptr]>>bts;
|
||
|
sign32 mask=(1<<ab)-1;
|
||
|
sign32 w;
|
||
|
int i=0;
|
||
|
int gotbits=8-bts; // bits left in current byte
|
||
|
while (gotbits<ab)
|
||
|
{
|
||
|
i++;
|
||
|
w=(sign32)t[ptr+i];
|
||
|
r|=w<<gotbits;
|
||
|
gotbits+=8;
|
||
|
}
|
||
|
bts+=ab;
|
||
|
while (bts>=8)
|
||
|
{
|
||
|
bts-=8;
|
||
|
ptr++;
|
||
|
}
|
||
|
w=r&mask;
|
||
|
if (max!=0)
|
||
|
w=max-w;
|
||
|
return w;
|
||
|
}
|
||
|
|
||
|
// pack public key
|
||
|
static int pack_pk(const int *params,byte pk[],byte rho[32],sign16 t1[])
|
||
|
{
|
||
|
int ptr,bts,i,n=0;
|
||
|
int ck=params[3];
|
||
|
for (i=0;i<32;i++)
|
||
|
pk[i]=rho[i];
|
||
|
n=32; ptr=bts=0;
|
||
|
for (i=0;i<(ck*DL_DEGREE*DL_TD)/8;i++ )
|
||
|
pk[n++]=nextbyte16(DL_TD,0,t1,ptr,bts);
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
// unpack public key
|
||
|
static void unpack_pk(const int *params,byte rho[32],sign16 t1[],byte pk[])
|
||
|
{
|
||
|
int ptr,bts,i;
|
||
|
int ck=params[3];
|
||
|
for (i=0;i<32;i++)
|
||
|
rho[i]=pk[i];
|
||
|
ptr=bts=0;
|
||
|
for (i=0;i<ck*DL_DEGREE;i++ )
|
||
|
t1[i]=(sign16)nextword(DL_TD,0,&pk[32],ptr,bts);
|
||
|
}
|
||
|
|
||
|
// secret key of size 32*3+DEGREE*(K*D+L*LG2ETA1+K*LG2ETA1)/8
|
||
|
static int pack_sk(const int *params,byte sk[],byte rho[32],byte bK[32],byte tr[32],sign8 s1[],sign8 s2[],sign16 t0[])
|
||
|
{
|
||
|
int ptr,bts,i,n=32;
|
||
|
int ck=params[3];
|
||
|
int el=params[4];
|
||
|
int eta=params[5];
|
||
|
int lg2eta1=params[6];
|
||
|
|
||
|
for (i=0;i<32;i++)
|
||
|
sk[i]=rho[i];
|
||
|
for (i=0;i<32;i++)
|
||
|
sk[n++]=bK[i];
|
||
|
for (i=0;i<32;i++)
|
||
|
sk[n++]=tr[i];
|
||
|
ptr=bts=0;
|
||
|
for (i=0;i<(el*DL_DEGREE*lg2eta1)/8;i++)
|
||
|
sk[n++]=nextbyte8(lg2eta1,eta,s1,ptr,bts);
|
||
|
ptr=bts=0;
|
||
|
for (i=0;i<(ck*DL_DEGREE*lg2eta1)/8;i++)
|
||
|
sk[n++]=nextbyte8(lg2eta1,eta,s2,ptr,bts);
|
||
|
ptr=bts=0;
|
||
|
for (i=0;i<(ck*DL_DEGREE*DL_D)/8;i++)
|
||
|
sk[n++]=nextbyte16(DL_D,(1<<(DL_D-1)),t0,ptr,bts);
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
static void unpack_sk(const int *params,byte rho[32],byte bK[32],byte tr[32],sign8 s1[],sign8 s2[],sign16 t0[],byte sk[])
|
||
|
{
|
||
|
int ptr,bts,i,n=32;
|
||
|
int ck=params[3];
|
||
|
int el=params[4];
|
||
|
int eta=params[5];
|
||
|
int lg2eta1=params[6];
|
||
|
|
||
|
for (i=0;i<32;i++)
|
||
|
rho[i]=sk[i];
|
||
|
for (i=0;i<32;i++)
|
||
|
bK[i]=sk[n++];
|
||
|
for (i=0;i<32;i++)
|
||
|
tr[i]=sk[n++];
|
||
|
ptr=bts=0;
|
||
|
for (i=0;i<el*DL_DEGREE;i++ )
|
||
|
s1[i]=(sign8)nextword(lg2eta1,eta,&sk[n],ptr,bts);
|
||
|
n+=ptr;
|
||
|
ptr=bts=0;
|
||
|
for (i=0;i<ck*DL_DEGREE;i++ )
|
||
|
s2[i]=(sign8)nextword(lg2eta1,eta,&sk[n],ptr,bts);
|
||
|
n+=ptr;
|
||
|
ptr=bts=0;
|
||
|
for (i=0;i<ck*DL_DEGREE;i++ )
|
||
|
t0[i]=(sign16)nextword(DL_D,(1<<(DL_D-1)),&sk[n],ptr,bts);
|
||
|
}
|
||
|
|
||
|
// pack signature - changes z
|
||
|
static int pack_sig(const int *params,byte sig[],sign32 z[],byte ct[],byte h[])
|
||
|
{
|
||
|
int lg=params[1];
|
||
|
int gamma1=1<<lg;
|
||
|
int ck=params[3];
|
||
|
int el=params[4];
|
||
|
int omega=params[7];
|
||
|
int ptr,bts,i,m,n,row,k=(el*DL_DEGREE*(lg+1))/8;
|
||
|
sign32 t;
|
||
|
|
||
|
for (i=0;i<32;i++)
|
||
|
sig[i]=ct[i];
|
||
|
n=32;
|
||
|
ptr=bts=0;
|
||
|
//pre-process z
|
||
|
for (i=0;i<el;i++)
|
||
|
{
|
||
|
row=DL_DEGREE*i;
|
||
|
for (m=0;m<DL_DEGREE;m++)
|
||
|
{
|
||
|
t=z[row+m];
|
||
|
if (t>DL_PRIME/2) t-=DL_PRIME;
|
||
|
t=gamma1-t;
|
||
|
z[row+m]=t;
|
||
|
}
|
||
|
}
|
||
|
for (i=0;i<k;i++) {
|
||
|
sig[n++]=nextbyte32(lg+1,0,z,ptr,bts);
|
||
|
}
|
||
|
for (i=0;i<omega+ck;i++)
|
||
|
sig[n++]=h[i];
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
static void unpack_sig(const int *params,sign32 z[],byte ct[],byte h[],byte sig[])
|
||
|
{
|
||
|
int lg=params[1];
|
||
|
int gamma1=1<<lg;
|
||
|
int ck=params[3];
|
||
|
int el=params[4];
|
||
|
int omega=params[7];
|
||
|
|
||
|
int ptr,bts,i,n=el*DL_DEGREE;
|
||
|
sign32 t;
|
||
|
int m=32+(el*DL_DEGREE*(lg+1))/8;
|
||
|
for (i=0;i<32;i++)
|
||
|
ct[i]=sig[i];
|
||
|
|
||
|
ptr=bts=0;
|
||
|
for (i=0;i<n;i++) {
|
||
|
t=nextword(lg+1,0,&sig[32],ptr,bts);
|
||
|
t=gamma1-t;
|
||
|
if (t<0) t+=DL_PRIME;
|
||
|
z[i]=t;
|
||
|
}
|
||
|
|
||
|
for (i=0;i<omega+ck;i++)
|
||
|
h[i]=sig[m++];
|
||
|
}
|
||
|
|
||
|
// rejection sampling in range -ETA to +ETA
|
||
|
static void sample_Sn(const int *params,byte rhod[64],sign8 s[],int n)
|
||
|
{
|
||
|
int m,ptr,bts;
|
||
|
int eta=params[5];
|
||
|
int lg2eta1=params[6];
|
||
|
sha3 sh;
|
||
|
byte buff[272];
|
||
|
SHA3_init(&sh, SHAKE256);
|
||
|
for (m=0;m<64;m++)
|
||
|
SHA3_process(&sh,rhod[m]);
|
||
|
SHA3_process(&sh,n&0xff);
|
||
|
SHA3_process(&sh,(n>>8)&0xff);
|
||
|
SHA3_shake(&sh,(char *)buff,272);
|
||
|
ptr=bts=0;
|
||
|
for (m=0;m<DL_DEGREE;m++)
|
||
|
{
|
||
|
do
|
||
|
{
|
||
|
s[m]=nextword(lg2eta1,0,buff,ptr,bts);
|
||
|
}
|
||
|
while (s[m]>2*eta);
|
||
|
s[m]=eta-s[m];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// uniform random sampling
|
||
|
static void sample_Y(const int *params,int k,byte rhod[64],sign32 y[])
|
||
|
{
|
||
|
int i,j,m,ki,row,ptr,bts; // 2^n-1
|
||
|
int lg=params[1];
|
||
|
int gamma1=1<<lg;
|
||
|
int el=params[4];
|
||
|
sha3 sh;
|
||
|
sign32 w,t;
|
||
|
byte buff[DL_YBYTES];
|
||
|
|
||
|
for (i=0;i<el;i++)
|
||
|
{
|
||
|
row=DL_DEGREE*i;
|
||
|
SHA3_init(&sh, SHAKE256);
|
||
|
for (j=0;j<64;j++)
|
||
|
SHA3_process(&sh,rhod[j]);
|
||
|
ki=k+i;
|
||
|
SHA3_process(&sh,ki&0xff);
|
||
|
SHA3_process(&sh,ki>>8);
|
||
|
SHA3_shake(&sh,(char *)buff,DL_YBYTES);
|
||
|
|
||
|
ptr=bts=0;
|
||
|
for (m=0;m<DL_DEGREE;m++)
|
||
|
{ // take in LG+1 bits at a time
|
||
|
w=nextword(lg+1,0,buff,ptr,bts); // 20 bits
|
||
|
w=gamma1-w;
|
||
|
t=w>>31;
|
||
|
y[row+m]=w+(DL_PRIME&t);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// CRH(rho,t1)
|
||
|
static void CRH1(const int *params,byte H[32],byte rho[32],sign16 t1[])
|
||
|
{
|
||
|
int i;
|
||
|
int ptr,bts;
|
||
|
int ck=params[3];
|
||
|
sha3 sh;
|
||
|
SHA3_init(&sh, SHAKE256);
|
||
|
for (i=0;i<32;i++)
|
||
|
SHA3_process(&sh,rho[i]);
|
||
|
ptr=bts=0;
|
||
|
for (i=0;i<(ck*DL_DEGREE*DL_TD)/8;i++)
|
||
|
SHA3_process(&sh,nextbyte16(DL_TD,0,t1,ptr,bts));
|
||
|
SHA3_shake(&sh,(char *)H,32);
|
||
|
}
|
||
|
|
||
|
// CRH(tr,M)
|
||
|
static void CRH2(byte H[64],byte tr[32],byte mess[],int mlen)
|
||
|
{
|
||
|
int i;
|
||
|
sha3 sh;
|
||
|
SHA3_init(&sh, SHAKE256);
|
||
|
for (i=0;i<32;i++)
|
||
|
SHA3_process(&sh,tr[i]);
|
||
|
for (i=0;i<mlen;i++)
|
||
|
SHA3_process(&sh,mess[i]);
|
||
|
SHA3_shake(&sh,(char *)H,64);
|
||
|
}
|
||
|
|
||
|
// CRH(K,mu)
|
||
|
static void CRH3(byte H[64],byte bK[32],byte mu[64])
|
||
|
{
|
||
|
int i;
|
||
|
sha3 sh;
|
||
|
SHA3_init(&sh, SHAKE256);
|
||
|
for (i=0;i<32;i++)
|
||
|
SHA3_process(&sh,bK[i]);
|
||
|
for (i=0;i<64;i++)
|
||
|
SHA3_process(&sh,mu[i]);
|
||
|
SHA3_shake(&sh,(char *)H,64);
|
||
|
}
|
||
|
|
||
|
// H(mu,w1)
|
||
|
static void H4(const int *params,byte CT[32],byte mu[64],sign8 w1[])
|
||
|
{
|
||
|
int i;
|
||
|
int ptr,bts;
|
||
|
int ck=params[3];
|
||
|
int dv=params[2];
|
||
|
sha3 sh;
|
||
|
int w1b=4;
|
||
|
if (dv==88) w1b=6;
|
||
|
SHA3_init(&sh, SHAKE256);
|
||
|
for (i=0;i<64;i++)
|
||
|
SHA3_process(&sh,mu[i]);
|
||
|
ptr=bts=0;
|
||
|
for (i=0;i<(ck*DL_DEGREE*w1b)/8;i++)
|
||
|
SHA3_process(&sh,nextbyte8(w1b,0,w1,ptr,bts));
|
||
|
SHA3_shake(&sh,(char *)CT,32);
|
||
|
}
|
||
|
|
||
|
static void SampleInBall(const int *params,byte ct[32],sign32 c[DL_DEGREE])
|
||
|
{
|
||
|
int i,j,k,n,b,sn;
|
||
|
int tau=params[0];
|
||
|
byte signs[8],buff[136];
|
||
|
sha3 sh;
|
||
|
SHA3_init(&sh, SHAKE256);
|
||
|
for (i=0;i<32;i++)
|
||
|
SHA3_process(&sh,ct[i]);
|
||
|
SHA3_shake(&sh,(char *)buff,136);
|
||
|
for (i=0;i<8;i++)
|
||
|
signs[i]=buff[i];
|
||
|
k=8;
|
||
|
b=0;
|
||
|
poly_zero(c);
|
||
|
sn=signs[0]; n=1;
|
||
|
for (i=DL_DEGREE-tau;i<DL_DEGREE;i++)
|
||
|
{
|
||
|
do
|
||
|
{
|
||
|
j=buff[k++];
|
||
|
} while (j>i);
|
||
|
c[i]=c[j];
|
||
|
c[j]=1-2*((sign32)sn&1);
|
||
|
sn>>=1; b++;
|
||
|
if (b==8) {
|
||
|
sn=signs[n++]; b=0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static sign16 p2r(sign32 *r0)
|
||
|
{
|
||
|
sign32 d=(1<<DL_D);
|
||
|
sign32 r1;
|
||
|
r1=(*r0+d/2-1)>>DL_D;
|
||
|
*r0-=(r1 << DL_D);
|
||
|
return (sign16)r1;
|
||
|
}
|
||
|
|
||
|
static void Power2Round(sign32 t[],sign16 t0[],sign16 t1[])
|
||
|
{
|
||
|
int m;
|
||
|
sign32 w;
|
||
|
for (m=0;m<DL_DEGREE;m++)
|
||
|
{
|
||
|
w=t[m];
|
||
|
t1[m]=p2r(&w);
|
||
|
t0[m]=(sign16)w;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// ALPHA = (Q-1)/16 - borrowed from dilithium ref implementation
|
||
|
static sign32 decompose_lo(const int *params,sign32 a) {
|
||
|
int gamma2,dv=params[2];
|
||
|
sign32 a0,a1;
|
||
|
a1 = (a + 127) >> 7;
|
||
|
|
||
|
if (dv==32)
|
||
|
{
|
||
|
a1 = (a1*1025 + (1 << 21)) >> 22;
|
||
|
a1 &= 15;
|
||
|
gamma2=(DL_PRIME-1)/32;
|
||
|
} else { // 88
|
||
|
a1 = (a1*11275 + (1 << 23)) >> 24;
|
||
|
a1 ^= ((43 - a1) >> 31) & a1;
|
||
|
gamma2=(DL_PRIME-1)/88;
|
||
|
}
|
||
|
|
||
|
a0 = a - a1*2*gamma2; // (Q-1)/2R=alpha
|
||
|
a0 -= (((DL_PRIME-1)/2 - a0) >> 31) & DL_PRIME;
|
||
|
a0 += (a0>>31)&DL_PRIME;
|
||
|
return a0;
|
||
|
}
|
||
|
|
||
|
// ALPHA = (Q-1)/16
|
||
|
static sign8 decompose_hi(const int *params,sign32 a) {
|
||
|
int dv=params[2];
|
||
|
sign32 a1;
|
||
|
a1 = (a + 127) >> 7;
|
||
|
if (dv==32) {
|
||
|
a1 = (a1*1025 + (1 << 21)) >> 22;
|
||
|
a1 &= 15;
|
||
|
} else {
|
||
|
a1 = (a1*11275 + (1 << 23)) >> 24;
|
||
|
a1 ^= ((43 - a1) >> 31) & a1;
|
||
|
}
|
||
|
return (sign8)a1;
|
||
|
}
|
||
|
|
||
|
static void lobits(const int *params,sign32 r0[],sign32 r[])
|
||
|
{
|
||
|
int m;
|
||
|
for (m=0;m<DL_DEGREE;m++)
|
||
|
r0[m]=decompose_lo(params,r[m]);
|
||
|
}
|
||
|
|
||
|
static void hibits(const int *params,sign8 r1[],sign32 r[])
|
||
|
{
|
||
|
int m;
|
||
|
for (m=0;m<DL_DEGREE;m++)
|
||
|
r1[m]=decompose_hi(params,r[m]);
|
||
|
}
|
||
|
|
||
|
|
||
|
// before h initialised to zeros, hptr=0
|
||
|
// after new hptr returned and h[OMEGA+i]= hptr
|
||
|
static int MakePartialHint(const int *params,byte h[],int hptr,sign32 z[],sign32 r[])
|
||
|
{
|
||
|
int m;
|
||
|
sign8 a0,a1;
|
||
|
sign32 rz;
|
||
|
int omega=params[7];
|
||
|
for (m=0;m<DL_DEGREE;m++)
|
||
|
{
|
||
|
a0=decompose_hi(params,r[m]);
|
||
|
rz=r[m]+z[m];
|
||
|
rz-=DL_PRIME;
|
||
|
rz=rz+((rz>>31)&DL_PRIME);
|
||
|
a1=decompose_hi(params,rz);
|
||
|
if (a0!=a1) {
|
||
|
if (hptr>=omega) return omega+1;
|
||
|
h[hptr++]=m&0xff;
|
||
|
}
|
||
|
}
|
||
|
return hptr;
|
||
|
}
|
||
|
|
||
|
static int UsePartialHint(const int *params,sign8 r[],byte h[],int hptr,int i,sign32 w[])
|
||
|
{
|
||
|
int dv=(sign8)params[2];
|
||
|
int omega=params[7];
|
||
|
sign8 a1,md=dv/2;
|
||
|
sign32 a0;
|
||
|
int m;
|
||
|
for (m=0;m<DL_DEGREE;m++)
|
||
|
{
|
||
|
a1=decompose_hi(params,w[m]);
|
||
|
if (m==h[hptr] && hptr<h[omega+i])
|
||
|
{
|
||
|
hptr++;
|
||
|
a0=decompose_lo(params,w[m]);
|
||
|
if (a0<=DL_PRIME/2) {
|
||
|
a1++;
|
||
|
if (a1>=md) a1-=md;
|
||
|
} else {
|
||
|
a1--;
|
||
|
if (a1<0) a1+=md;
|
||
|
}
|
||
|
}
|
||
|
r[m]=a1;
|
||
|
}
|
||
|
return hptr;
|
||
|
}
|
||
|
|
||
|
static sign32 infinity_norm(sign32 w[])
|
||
|
{
|
||
|
int m;
|
||
|
sign32 az,n=0;
|
||
|
for (m=0;m<DL_DEGREE;m++)
|
||
|
{
|
||
|
az=w[m];
|
||
|
if (az>DL_PRIME/2) az=DL_PRIME-az;
|
||
|
if (az>n) n=az;
|
||
|
}
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
// Dilithium API
|
||
|
|
||
|
static void keypair(const int *params,byte *tau,octet *sk,octet *pk)
|
||
|
{
|
||
|
int i,row,j;
|
||
|
sha3 sh;
|
||
|
byte buff[128];
|
||
|
byte rho[32];
|
||
|
byte rhod[64];
|
||
|
byte bK[32];
|
||
|
byte tr[32]; // 320 bytes
|
||
|
sign32 Aij[DL_DEGREE]; // 1024 bytes
|
||
|
sign32 w[DL_DEGREE]; // work space 1024 bytes
|
||
|
sign32 r[DL_DEGREE]; // work space 1024 bytes total = 12352
|
||
|
|
||
|
int ck=params[3];
|
||
|
int el=params[4];
|
||
|
|
||
|
#ifdef USE_VLAS
|
||
|
sign8 s1[el*DL_DEGREE]; // 1280 bytes
|
||
|
sign8 s2[ck*DL_DEGREE]; // 1536 bytes
|
||
|
sign16 t0[ck*DL_DEGREE]; // 3072 bytes
|
||
|
sign16 t1[ck*DL_DEGREE]; // 3072 bytes
|
||
|
#else
|
||
|
sign8 s1[DL_MAXL*DL_DEGREE]; // 1280 bytes
|
||
|
sign8 s2[DL_MAXK*DL_DEGREE]; // 1536 bytes
|
||
|
sign16 t0[DL_MAXK*DL_DEGREE]; // 3072 bytes
|
||
|
sign16 t1[DL_MAXK*DL_DEGREE]; // 3072 bytes
|
||
|
#endif
|
||
|
SHA3_init(&sh, SHAKE256);
|
||
|
|
||
|
for (i=0;i<32;i++)
|
||
|
SHA3_process(&sh,tau[i]);
|
||
|
SHA3_shake(&sh,(char *)buff,128);
|
||
|
for (i=0;i<32;i++)
|
||
|
{
|
||
|
rho[i]=buff[i];
|
||
|
bK[i]=buff[i+96];
|
||
|
}
|
||
|
for (i=0;i<64;i++)
|
||
|
rhod[i]=buff[32+i];
|
||
|
|
||
|
for (i=0;i<el;i++)
|
||
|
{
|
||
|
row=DL_DEGREE*i;
|
||
|
sample_Sn(params,rhod,&s1[row],i);
|
||
|
}
|
||
|
|
||
|
for (i=0;i<ck;i++)
|
||
|
{
|
||
|
row=DL_DEGREE*i;
|
||
|
sample_Sn(params,rhod,&s2[row],el+i);
|
||
|
poly_zero(r);
|
||
|
for (j=0;j<el;j++)
|
||
|
{
|
||
|
poly_scopy(w,&s1[j*DL_DEGREE]);
|
||
|
ntt(w);
|
||
|
ExpandAij(rho,Aij,i,j); // This is bottleneck
|
||
|
poly_mul(w,w,Aij);
|
||
|
poly_add(r,r,w);
|
||
|
//poly_soft_reduce(r); // be lazy
|
||
|
}
|
||
|
poly_hard_reduce(r);
|
||
|
intt(r);
|
||
|
poly_scopy(w,&s2[row]);
|
||
|
poly_pos(w);
|
||
|
poly_add(r,r,w);
|
||
|
poly_soft_reduce(r);
|
||
|
Power2Round(r,&t0[row],&t1[row]);
|
||
|
}
|
||
|
|
||
|
CRH1(params,tr,rho,t1);
|
||
|
|
||
|
pk->len=pack_pk(params,(byte *)pk->val,rho,t1);
|
||
|
sk->len=pack_sk(params,(byte *)sk->val,rho,bK,tr,s1,s2,t0);
|
||
|
}
|
||
|
|
||
|
static int signature(const int *params,octet *sk,octet *M,octet *sig)
|
||
|
{
|
||
|
int i,k,nh,fk,row,j;
|
||
|
bool badone;
|
||
|
byte rho[32];
|
||
|
byte bK[32];
|
||
|
byte ct[32];
|
||
|
byte tr[32];
|
||
|
byte mu[64];
|
||
|
byte rhod[64]; // 288 bytes
|
||
|
byte hint[100]; // 61 bytes
|
||
|
|
||
|
sign32 c[DL_DEGREE]; // 1024 bytes
|
||
|
sign32 w[DL_DEGREE]; // work space 1024 bytes
|
||
|
sign32 r[DL_DEGREE]; // work space 1024 bytes total= 21673 bytes
|
||
|
//sign32 Aij[DL_DEGREE]; // 1024 bytes
|
||
|
|
||
|
int ck=params[3];
|
||
|
int el=params[4];
|
||
|
|
||
|
#ifdef USE_VLAS
|
||
|
sign8 s1[el*DL_DEGREE]; // 1280 bytes
|
||
|
sign8 s2[ck*DL_DEGREE]; // 1536 bytes
|
||
|
sign16 t0[ck*DL_DEGREE]; // 3072 bytes
|
||
|
sign32 y[el*DL_DEGREE]; // 5120 bytes
|
||
|
sign32 Ay[ck*DL_DEGREE]; // 6144 bytes
|
||
|
sign8 w1[ck*DL_DEGREE]; // 1280 bytes
|
||
|
#else
|
||
|
sign8 s1[DL_MAXL*DL_DEGREE]; // 1280 bytes
|
||
|
sign8 s2[DL_MAXK*DL_DEGREE]; // 1536 bytes
|
||
|
sign16 t0[DL_MAXK*DL_DEGREE]; // 3072 bytes
|
||
|
sign32 y[DL_MAXL*DL_DEGREE]; // 5120 bytes
|
||
|
sign32 Ay[DL_MAXK*DL_DEGREE]; // 6144 bytes
|
||
|
sign8 w1[DL_MAXK*DL_DEGREE]; // 1280 bytes
|
||
|
#endif
|
||
|
int tau=params[0];
|
||
|
int lg=params[1];
|
||
|
int gamma1=(sign32)(1<<lg);
|
||
|
int dv=(sign32)params[2];
|
||
|
int gamma2=(DL_PRIME-1)/dv;
|
||
|
int eta=params[5];
|
||
|
int beta=(sign32)(tau*eta);
|
||
|
int omega=params[7];
|
||
|
unpack_sk(params,rho,bK,tr,s1,s2,t0,(byte *)sk->val);
|
||
|
|
||
|
// signature
|
||
|
CRH2(mu,tr,(byte*)M->val,M->len);
|
||
|
CRH3(rhod,bK,mu);
|
||
|
|
||
|
for (k=0; ;k++ )
|
||
|
{
|
||
|
fk=k*el;
|
||
|
sample_Y(params,fk,rhod,y);
|
||
|
|
||
|
// NTT y
|
||
|
for (i=0;i<el;i++)
|
||
|
{
|
||
|
row=DL_DEGREE*i;
|
||
|
ntt(&y[row]);
|
||
|
}
|
||
|
|
||
|
// Calculate Ay
|
||
|
for (i=0;i<ck;i++)
|
||
|
{
|
||
|
row=DL_DEGREE*i;
|
||
|
poly_zero(r);
|
||
|
for (j=0;j<el;j++)
|
||
|
{ // Note: no NTTs in here
|
||
|
poly_copy(w,&y[j*DL_DEGREE]);
|
||
|
ExpandAij(rho,c,i,j); // This is bottleneck // re-use c for Aij
|
||
|
poly_mul(w,w,c);
|
||
|
poly_add(r,r,w);
|
||
|
//poly_soft_reduce(r); // be lazy
|
||
|
}
|
||
|
poly_hard_reduce(r);
|
||
|
intt(r);
|
||
|
poly_copy(&Ay[row],r);
|
||
|
// Calculate w1
|
||
|
hibits(params,&w1[row],&Ay[row]);
|
||
|
}
|
||
|
|
||
|
// Calculate c
|
||
|
H4(params,ct,mu,w1);
|
||
|
SampleInBall(params,ct,c);
|
||
|
|
||
|
badone=false;
|
||
|
// Calculate z=y+c.s1
|
||
|
ntt(c);
|
||
|
for (i=0;i<el;i++)
|
||
|
{
|
||
|
row=DL_DEGREE*i;
|
||
|
poly_scopy(w,&s1[row]);
|
||
|
ntt(w);
|
||
|
poly_mul(w,w,c);
|
||
|
nres_it(w);
|
||
|
poly_add(&y[row],&y[row],w); // re-use y for z
|
||
|
redc_it(&y[row]); // unNTT y
|
||
|
intt(&y[row]);
|
||
|
poly_soft_reduce(&y[row]);
|
||
|
if (infinity_norm(&y[row])>=gamma1-beta)
|
||
|
{
|
||
|
badone=true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (badone) continue;
|
||
|
|
||
|
// Calculate Ay=w-c.s2 and r0=lobits(w-c.s2)
|
||
|
nh=0;
|
||
|
for (i=0;i<omega+ck;i++)
|
||
|
hint[i]=0;
|
||
|
for (i=0;i<ck;i++)
|
||
|
{
|
||
|
row=DL_DEGREE*i;
|
||
|
poly_scopy(w,&s2[row]);
|
||
|
ntt(w);
|
||
|
poly_mul(w,w,c);
|
||
|
|
||
|
intt(w);
|
||
|
poly_sub(&Ay[row],&Ay[row],w);
|
||
|
poly_soft_reduce(&Ay[row]); //Ay=w-cs2
|
||
|
lobits(params,w,&Ay[row]); // r0
|
||
|
if (infinity_norm(w)>=gamma2-beta)
|
||
|
{
|
||
|
badone=true;
|
||
|
break;
|
||
|
}
|
||
|
poly_mcopy(w,&t0[row]);
|
||
|
ntt(w);
|
||
|
poly_mul(w,w,c);
|
||
|
|
||
|
intt(w);
|
||
|
poly_negate(r,w); // -ct0
|
||
|
if (infinity_norm(r)>=gamma2)
|
||
|
{
|
||
|
badone=true;
|
||
|
break;
|
||
|
}
|
||
|
poly_sub(&Ay[row],&Ay[row],r);
|
||
|
poly_soft_reduce(&Ay[row]);
|
||
|
|
||
|
nh=MakePartialHint(params,hint,nh,r,&Ay[row]);
|
||
|
if (nh>omega)
|
||
|
{
|
||
|
badone=true;
|
||
|
break;
|
||
|
}
|
||
|
hint[omega+i]=nh;
|
||
|
}
|
||
|
if (badone) continue;
|
||
|
break;
|
||
|
}
|
||
|
sig->len=pack_sig(params,(byte *)sig->val,y,ct,hint);
|
||
|
return k+1;
|
||
|
}
|
||
|
|
||
|
static bool verify(const int *params,octet *pk,octet *M,octet *sig)
|
||
|
{
|
||
|
int i,row,j,m,hints;
|
||
|
byte rho[32];
|
||
|
byte mu[64];
|
||
|
byte ct[32];
|
||
|
byte cct[32];
|
||
|
byte tr[32]; // 192 bytes
|
||
|
byte hint[100]; // 61 bytes
|
||
|
|
||
|
|
||
|
sign32 Aij[DL_DEGREE]; // 1024 bytes
|
||
|
sign32 c[DL_DEGREE]; // 1024 bytes
|
||
|
sign32 w[DL_DEGREE]; // work space // 1024 bytes
|
||
|
sign32 r[DL_DEGREE]; // work space // 1024 bytes total=14077 bytes
|
||
|
|
||
|
int ck=params[3];
|
||
|
int el=params[4];
|
||
|
|
||
|
#ifdef USE_VLAS
|
||
|
sign32 z[el*DL_DEGREE]; // 5120 bytes
|
||
|
sign16 t1[ck*DL_DEGREE]; // 3072 bytes
|
||
|
sign8 w1d[ck*DL_DEGREE]; // 1536 bytes
|
||
|
#else
|
||
|
sign32 z[DL_MAXL*DL_DEGREE]; // 5120 bytes
|
||
|
sign16 t1[DL_MAXK*DL_DEGREE]; // 3072 bytes
|
||
|
sign8 w1d[DL_MAXK*DL_DEGREE]; // 1536 bytes
|
||
|
#endif
|
||
|
int tau=params[0];
|
||
|
int lg=params[1];
|
||
|
int gamma1=(sign32)(1<<lg);
|
||
|
int eta=params[5];
|
||
|
int beta=(sign32)(tau*eta);
|
||
|
int omega=params[7];
|
||
|
|
||
|
// unpack public key and signature
|
||
|
unpack_pk(params,rho,t1,(byte *)pk->val);
|
||
|
unpack_sig(params,z,ct,hint,(byte *)sig->val);
|
||
|
|
||
|
for (i=0;i<el;i++)
|
||
|
{
|
||
|
row=DL_DEGREE*i;
|
||
|
if (infinity_norm(&z[row])>=gamma1-beta)
|
||
|
return false;
|
||
|
ntt(&z[row]); // convert to ntt form
|
||
|
}
|
||
|
|
||
|
CRH1(params,tr,rho,t1);
|
||
|
CRH2(mu,tr,(byte *)M->val,M->len);
|
||
|
SampleInBall(params,ct,c);
|
||
|
ntt(c);
|
||
|
|
||
|
// Calculate Az
|
||
|
hints=0;
|
||
|
for (i=0;i<ck;i++)
|
||
|
{
|
||
|
row=DL_DEGREE*i;
|
||
|
poly_zero(r);
|
||
|
for (j=0;j<el;j++)
|
||
|
{ // Note: no NTTs in here
|
||
|
poly_copy(w,&z[j*DL_DEGREE]);
|
||
|
ExpandAij(rho,Aij,i,j); // This is bottleneck
|
||
|
poly_mul(w,w,Aij);
|
||
|
poly_add(r,r,w);
|
||
|
//poly_soft_reduce(r); // be lazy
|
||
|
}
|
||
|
poly_hard_reduce(r);
|
||
|
|
||
|
// Calculate Az-ct1.2^d
|
||
|
for (m=0;m<DL_DEGREE;m++)
|
||
|
w[m]=(sign32)(t1[row+m])<<DL_D;
|
||
|
ntt(w);
|
||
|
poly_mul(w,w,c);
|
||
|
poly_sub(r,r,w);
|
||
|
intt(r);
|
||
|
|
||
|
hints=UsePartialHint(params,&w1d[row],hint,hints,i,r);
|
||
|
if (hints>omega) return false;
|
||
|
}
|
||
|
|
||
|
H4(params,cct,mu,w1d);
|
||
|
|
||
|
for (i=0;i<32;i++)
|
||
|
if (ct[i]!=cct[i])
|
||
|
return false;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
void core::DLTHM_keypair_2(byte *tau,octet *sk,octet *pk)
|
||
|
{
|
||
|
keypair(PARAMS_2,tau,sk,pk);
|
||
|
}
|
||
|
|
||
|
int core::DLTHM_signature_2(octet *sk,octet *M,octet *sig)
|
||
|
{
|
||
|
return signature(PARAMS_2,sk,M,sig);
|
||
|
}
|
||
|
|
||
|
bool core::DLTHM_verify_2(octet *pk,octet *M,octet *sig)
|
||
|
{
|
||
|
return verify(PARAMS_2,pk,M,sig);
|
||
|
}
|
||
|
|
||
|
|
||
|
void core::DLTHM_keypair_3(byte *tau,octet *sk,octet *pk)
|
||
|
{
|
||
|
keypair(PARAMS_3,tau,sk,pk);
|
||
|
}
|
||
|
|
||
|
int core::DLTHM_signature_3(octet *sk,octet *M,octet *sig)
|
||
|
{
|
||
|
return signature(PARAMS_3,sk,M,sig);
|
||
|
}
|
||
|
|
||
|
bool core::DLTHM_verify_3(octet *pk,octet *M,octet *sig)
|
||
|
{
|
||
|
return verify(PARAMS_3,pk,M,sig);
|
||
|
}
|
||
|
|
||
|
void core::DLTHM_keypair_5(byte *tau,octet *sk,octet *pk)
|
||
|
{
|
||
|
keypair(PARAMS_5,tau,sk,pk);
|
||
|
}
|
||
|
|
||
|
int core::DLTHM_signature_5(octet *sk,octet *M,octet *sig)
|
||
|
{
|
||
|
return signature(PARAMS_5,sk,M,sig);
|
||
|
}
|
||
|
|
||
|
bool core::DLTHM_verify_5(octet *pk,octet *M,octet *sig)
|
||
|
{
|
||
|
return verify(PARAMS_5,pk,M,sig);
|
||
|
}
|