[ring] : faster NTT and basis extension

This commit is contained in:
Jean-Philippe Bossuat
2020-10-21 01:09:03 +02:00
parent 47252cb2db
commit 647eec2f86
9 changed files with 922 additions and 859 deletions

View File

@@ -316,7 +316,6 @@ func (eval *evaluator) tensorAndRescale(ct0, ct1, ctOut *Element) {
for i := range ct0.value {
eval.baseconverterQ1Q2.ModUpSplitQP(levelQ, ct0.value[i], c0Q2[i])
eval.ringQ.NTT(ct0.value[i], c0Q1[i])
eval.ringQMul.NTT(c0Q2[i], c0Q2[i])
}
@@ -325,7 +324,6 @@ func (eval *evaluator) tensorAndRescale(ct0, ct1, ctOut *Element) {
for i := range ct1.value {
eval.baseconverterQ1Q2.ModUpSplitQP(levelQ, ct1.value[i], c1Q2[i])
eval.ringQ.NTT(ct1.value[i], c1Q1[i])
eval.ringQMul.NTT(c1Q2[i], c1Q2[i])
}

View File

@@ -17,7 +17,7 @@ func TestBootstrapp(t *testing.T) {
var err error
var testContext = new(testParams)
paramSet := uint64(0)
paramSet := uint64(1)
shemeParams := DefaultBootstrappSchemeParams[paramSet : paramSet+1]
bootstrappParams := DefaultBootstrappParams[paramSet : paramSet+1]

View File

@@ -1643,33 +1643,32 @@ func (eval *evaluator) switchKeysInPlaceNoModDown(level uint64, cx *ring.Poly, e
evakey1P.Coeffs = evakey.evakey[i][1].Coeffs[len(ringQ.Modulus):]
if i == 0 {
ringQ.MulCoeffsMontgomeryLvl(level, evakey0Q, c2QiQ, pool2Q)
ringQ.MulCoeffsMontgomeryLvl(level, evakey1Q, c2QiQ, pool3Q)
ringP.MulCoeffsMontgomery(evakey0P, c2QiP, pool2P)
ringP.MulCoeffsMontgomery(evakey1P, c2QiP, pool3P)
ringQ.MulCoeffsMontgomeryConstantLvl(level, evakey0Q, c2QiQ, pool2Q)
ringQ.MulCoeffsMontgomeryConstantLvl(level, evakey1Q, c2QiQ, pool3Q)
ringP.MulCoeffsMontgomeryConstant(evakey0P, c2QiP, pool2P)
ringP.MulCoeffsMontgomeryConstant(evakey1P, c2QiP, pool3P)
} else {
ringQ.MulCoeffsMontgomeryAndAddNoModLvl(level, evakey0Q, c2QiQ, pool2Q)
ringQ.MulCoeffsMontgomeryAndAddNoModLvl(level, evakey1Q, c2QiQ, pool3Q)
ringP.MulCoeffsMontgomeryAndAddNoMod(evakey0P, c2QiP, pool2P)
ringP.MulCoeffsMontgomeryAndAddNoMod(evakey1P, c2QiP, pool3P)
ringQ.MulCoeffsMontgomeryConstantAndAddNoModLvl(level, evakey0Q, c2QiQ, pool2Q)
ringQ.MulCoeffsMontgomeryConstantAndAddNoModLvl(level, evakey1Q, c2QiQ, pool3Q)
ringP.MulCoeffsMontgomeryConstantAndAddNoMod(evakey0P, c2QiP, pool2P)
ringP.MulCoeffsMontgomeryConstantAndAddNoMod(evakey1P, c2QiP, pool3P)
}
if reduce&7 == 1 {
ringQ.ReduceLvl(level, pool2Q, pool2Q)
ringQ.ReduceLvl(level, pool3Q, pool3Q)
ringP.Reduce(pool2P, pool2P)
ringP.Reduce(pool3P, pool3P)
//
if reduce&1 == 1 {
ringQ.ReduceConstantLvl(level, pool2Q, pool2Q)
ringQ.ReduceConstantLvl(level, pool3Q, pool3Q)
ringP.ReduceConstant(pool2P, pool2P)
ringP.ReduceConstant(pool3P, pool3P)
}
reduce++
}
if (reduce-1)&7 != 1 {
ringQ.ReduceLvl(level, pool2Q, pool2Q)
ringQ.ReduceLvl(level, pool3Q, pool3Q)
ringP.Reduce(pool2P, pool2P)
ringP.Reduce(pool3P, pool3P)
}
ringQ.ReduceLvl(level, pool2Q, pool2Q)
ringQ.ReduceLvl(level, pool3Q, pool3Q)
ringP.Reduce(pool2P, pool2P)
ringP.Reduce(pool3P, pool3P)
}
// switchKeysInPlace applies the general key-switching procedure of the form [c0 + cx*evakey[0], c1 + cx*evakey[1]]
@@ -1707,11 +1706,11 @@ func (eval *evaluator) decomposeAndSplitNTT(level, beta uint64, c2NTT, c2InvNTT,
p1tmp[j] = p0tmp[j]
}
} else {
ring.NTT(c2QiQ.Coeffs[x], c2QiQ.Coeffs[x], ringQ.N, nttPsi, qi, mredParams, bredParams)
ring.NTTLazy(c2QiQ.Coeffs[x], c2QiQ.Coeffs[x], ringQ.N, nttPsi, qi, mredParams, bredParams)
}
}
// c2QiP = c2 mod qi mod pj
ringP.NTT(c2QiP, c2QiP)
ringP.NTTLazy(c2QiP, c2QiP)
}
// RotateHoisted takes an input Ciphertext and a list of rotations and returns a map of Ciphertext, where each element of the map is the input Ciphertext

View File

@@ -3,6 +3,7 @@ package ring
import (
"math"
"math/big"
"math/bits"
"unsafe"
)
@@ -174,7 +175,7 @@ func (basisextender *FastBasisExtender) ModDownNTTPQ(level uint64, p1, p2 *Poly)
// First we get the P basis part of p1 out of the NTT domain
for j := 0; j < nPj; j++ {
InvNTT(p1.Coeffs[nQi+j], p1.Coeffs[nQi+j], ringP.N, ringP.GetNttPsiInv()[j], ringP.GetNttNInv()[j], ringP.Modulus[j], ringP.GetMredParams()[j])
InvNTTLazy(p1.Coeffs[nQi+j], p1.Coeffs[nQi+j], ringP.N, ringP.GetNttPsiInv()[j], ringP.GetNttNInv()[j], ringP.Modulus[j], ringP.GetMredParams()[j])
}
// Then we target this P basis of p1 and convert it to a Q basis (at the "level" of p1) and copy it on polypool
@@ -185,15 +186,16 @@ func (basisextender *FastBasisExtender) ModDownNTTPQ(level uint64, p1, p2 *Poly)
for i := uint64(0); i < level+1; i++ {
qi := ringQ.Modulus[i]
twoqi := qi << 1
p1tmp := p1.Coeffs[i]
p2tmp := p2.Coeffs[i]
p3tmp := polypool.Coeffs[i]
params := modDownParams[i]
params := qi - modDownParams[i]
mredParams := ringQ.MredParams[i]
bredParams := ringQ.BredParams[i]
// First we switch back the relevant polypool CRT array back to the NTT domain
NTT(p3tmp, p3tmp, ringQ.N, ringQ.GetNttPsi()[i], qi, mredParams, bredParams)
NTTLazy(p3tmp, p3tmp, ringQ.N, ringQ.GetNttPsi()[i], qi, mredParams, bredParams)
// Then for each coefficient we compute (P^-1) * (p1[i][j] - polypool[i][j]) mod qi
for j := uint64(0); j < ringQ.N; j = j + 8 {
@@ -202,14 +204,14 @@ func (basisextender *FastBasisExtender) ModDownNTTPQ(level uint64, p1, p2 *Poly)
y := (*[8]uint64)(unsafe.Pointer(&p3tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p2tmp[j]))
z[0] = MRed(x[0]+(qi-y[0]), params, qi, mredParams)
z[1] = MRed(x[1]+(qi-y[1]), params, qi, mredParams)
z[2] = MRed(x[2]+(qi-y[2]), params, qi, mredParams)
z[3] = MRed(x[3]+(qi-y[3]), params, qi, mredParams)
z[4] = MRed(x[4]+(qi-y[4]), params, qi, mredParams)
z[5] = MRed(x[5]+(qi-y[5]), params, qi, mredParams)
z[6] = MRed(x[6]+(qi-y[6]), params, qi, mredParams)
z[7] = MRed(x[7]+(qi-y[7]), params, qi, mredParams)
z[0] = MRed(y[0]+twoqi-x[0], params, qi, mredParams)
z[1] = MRed(y[1]+twoqi-x[1], params, qi, mredParams)
z[2] = MRed(y[2]+twoqi-x[2], params, qi, mredParams)
z[3] = MRed(y[3]+twoqi-x[3], params, qi, mredParams)
z[4] = MRed(y[4]+twoqi-x[4], params, qi, mredParams)
z[5] = MRed(y[5]+twoqi-x[5], params, qi, mredParams)
z[6] = MRed(y[6]+twoqi-x[6], params, qi, mredParams)
z[7] = MRed(y[7]+twoqi-x[7], params, qi, mredParams)
}
}
@@ -229,7 +231,7 @@ func (basisextender *FastBasisExtender) ModDownSplitNTTPQ(level uint64, p1Q, p1P
polypool := basisextender.polypoolQ
// First we get the P basis part of p1 out of the NTT domain
ringP.InvNTT(p1P, p1P)
ringP.InvNTTLazy(p1P, p1P)
// Then we target this P basis of p1 and convert it to a Q basis (at the "level" of p1) and copy it on polypool
// polypool is now the representation of the P basis of p1 but in basis Q (at the "level" of p1)
@@ -239,15 +241,16 @@ func (basisextender *FastBasisExtender) ModDownSplitNTTPQ(level uint64, p1Q, p1P
for i := uint64(0); i < level+1; i++ {
qi := ringQ.Modulus[i]
twoqi := qi << 1
p1tmp := p1Q.Coeffs[i]
p2tmp := p2.Coeffs[i]
p3tmp := polypool.Coeffs[i]
params := modDownParams[i]
params := qi - modDownParams[i]
mredParams := ringQ.MredParams[i]
bredParams := ringQ.BredParams[i]
// First we switch back the relevant polypool CRT array back to the NTT domain
NTT(p3tmp, p3tmp, ringQ.N, ringQ.GetNttPsi()[i], ringQ.Modulus[i], mredParams, bredParams)
NTTLazy(p3tmp, p3tmp, ringQ.N, ringQ.GetNttPsi()[i], ringQ.Modulus[i], mredParams, bredParams)
// Then for each coefficient we compute (P^-1) * (p1[i][j] - polypool[i][j]) mod qi
for j := uint64(0); j < ringQ.N; j = j + 8 {
@@ -256,14 +259,14 @@ func (basisextender *FastBasisExtender) ModDownSplitNTTPQ(level uint64, p1Q, p1P
y := (*[8]uint64)(unsafe.Pointer(&p3tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p2tmp[j]))
z[0] = MRed(x[0]+(qi-y[0]), params, qi, mredParams)
z[1] = MRed(x[1]+(qi-y[1]), params, qi, mredParams)
z[2] = MRed(x[2]+(qi-y[2]), params, qi, mredParams)
z[3] = MRed(x[3]+(qi-y[3]), params, qi, mredParams)
z[4] = MRed(x[4]+(qi-y[4]), params, qi, mredParams)
z[5] = MRed(x[5]+(qi-y[5]), params, qi, mredParams)
z[6] = MRed(x[6]+(qi-y[6]), params, qi, mredParams)
z[7] = MRed(x[7]+(qi-y[7]), params, qi, mredParams)
z[0] = MRed(y[0]+twoqi-x[0], params, qi, mredParams)
z[1] = MRed(y[1]+twoqi-x[1], params, qi, mredParams)
z[2] = MRed(y[2]+twoqi-x[2], params, qi, mredParams)
z[3] = MRed(y[3]+twoqi-x[3], params, qi, mredParams)
z[4] = MRed(y[4]+twoqi-x[4], params, qi, mredParams)
z[5] = MRed(y[5]+twoqi-x[5], params, qi, mredParams)
z[6] = MRed(y[6]+twoqi-x[6], params, qi, mredParams)
z[7] = MRed(y[7]+twoqi-x[7], params, qi, mredParams)
}
}
@@ -289,10 +292,11 @@ func (basisextender *FastBasisExtender) ModDownPQ(level uint64, p1, p2 *Poly) {
for i := uint64(0); i < level+1; i++ {
qi := ringQ.Modulus[i]
twoqi := qi << 1
p1tmp := p1.Coeffs[i]
p2tmp := p2.Coeffs[i]
p3tmp := polypool.Coeffs[i]
params := modDownParams[i]
params := qi - modDownParams[i]
mredParams := ringQ.MredParams[i]
// Then for each coefficient we compute (P^-1) * (p1[i][j] - polypool[i][j]) mod qi
@@ -302,14 +306,14 @@ func (basisextender *FastBasisExtender) ModDownPQ(level uint64, p1, p2 *Poly) {
y := (*[8]uint64)(unsafe.Pointer(&p3tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p2tmp[j]))
z[0] = MRed(x[0]+(qi-y[0]), params, qi, mredParams)
z[1] = MRed(x[1]+(qi-y[1]), params, qi, mredParams)
z[2] = MRed(x[2]+(qi-y[2]), params, qi, mredParams)
z[3] = MRed(x[3]+(qi-y[3]), params, qi, mredParams)
z[4] = MRed(x[4]+(qi-y[4]), params, qi, mredParams)
z[5] = MRed(x[5]+(qi-y[5]), params, qi, mredParams)
z[6] = MRed(x[6]+(qi-y[6]), params, qi, mredParams)
z[7] = MRed(x[7]+(qi-y[7]), params, qi, mredParams)
z[0] = MRed(y[0]+twoqi-x[0], params, qi, mredParams)
z[1] = MRed(y[1]+twoqi-x[1], params, qi, mredParams)
z[2] = MRed(y[2]+twoqi-x[2], params, qi, mredParams)
z[3] = MRed(y[3]+twoqi-x[3], params, qi, mredParams)
z[4] = MRed(y[4]+twoqi-x[4], params, qi, mredParams)
z[5] = MRed(y[5]+twoqi-x[5], params, qi, mredParams)
z[6] = MRed(y[6]+twoqi-x[6], params, qi, mredParams)
z[7] = MRed(y[7]+twoqi-x[7], params, qi, mredParams)
}
}
@@ -334,10 +338,11 @@ func (basisextender *FastBasisExtender) ModDownSplitPQ(level uint64, p1Q, p1P, p
for i := uint64(0); i < level+1; i++ {
qi := ringQ.Modulus[i]
twoqi := qi << 1
p1tmp := p1Q.Coeffs[i]
p2tmp := p2.Coeffs[i]
p3tmp := polypool.Coeffs[i]
params := modDownParams[i]
params := qi - modDownParams[i]
mredParams := ringQ.MredParams[i]
// Then for each coefficient we compute (P^-1) * (p1[i][j] - polypool[i][j]) mod qi
@@ -347,14 +352,14 @@ func (basisextender *FastBasisExtender) ModDownSplitPQ(level uint64, p1Q, p1P, p
y := (*[8]uint64)(unsafe.Pointer(&p3tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p2tmp[j]))
z[0] = MRed(x[0]+(qi-y[0]), params, qi, mredParams)
z[1] = MRed(x[1]+(qi-y[1]), params, qi, mredParams)
z[2] = MRed(x[2]+(qi-y[2]), params, qi, mredParams)
z[3] = MRed(x[3]+(qi-y[3]), params, qi, mredParams)
z[4] = MRed(x[4]+(qi-y[4]), params, qi, mredParams)
z[5] = MRed(x[5]+(qi-y[5]), params, qi, mredParams)
z[6] = MRed(x[6]+(qi-y[6]), params, qi, mredParams)
z[7] = MRed(x[7]+(qi-y[7]), params, qi, mredParams)
z[0] = MRed(y[0]+twoqi-x[0], params, qi, mredParams)
z[1] = MRed(y[1]+twoqi-x[1], params, qi, mredParams)
z[2] = MRed(y[2]+twoqi-x[2], params, qi, mredParams)
z[3] = MRed(y[3]+twoqi-x[3], params, qi, mredParams)
z[4] = MRed(y[4]+twoqi-x[4], params, qi, mredParams)
z[5] = MRed(y[5]+twoqi-x[5], params, qi, mredParams)
z[6] = MRed(y[6]+twoqi-x[6], params, qi, mredParams)
z[7] = MRed(y[7]+twoqi-x[7], params, qi, mredParams)
}
}
@@ -379,117 +384,55 @@ func (basisextender *FastBasisExtender) ModDownSplitQP(levelQ, levelP uint64, p1
for i := uint64(0); i < levelP+1; i++ {
qi := ringP.Modulus[i]
twoqi := qi << 1
p1tmp := p1P.Coeffs[i]
p2tmp := p2.Coeffs[i]
p3tmp := polypool.Coeffs[i]
params := modDownParams[i]
params := qi - modDownParams[i]
mredParams := ringP.MredParams[i]
// Then for each coefficient we compute (P^-1) * (p1[i][j] - polypool[i][j]) mod qi
for j := uint64(0); j < ringP.N; j++ {
p2tmp[j] = MRed(p1tmp[j]+(qi-p3tmp[j]), params, qi, mredParams)
for j := uint64(0); j < ringP.N; j = j + 8 {
x := (*[8]uint64)(unsafe.Pointer(&p1tmp[j]))
y := (*[8]uint64)(unsafe.Pointer(&p3tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p2tmp[j]))
z[0] = MRed(y[0]+twoqi-x[0], params, qi, mredParams)
z[1] = MRed(y[1]+twoqi-x[1], params, qi, mredParams)
z[2] = MRed(y[2]+twoqi-x[2], params, qi, mredParams)
z[3] = MRed(y[3]+twoqi-x[3], params, qi, mredParams)
z[4] = MRed(y[4]+twoqi-x[4], params, qi, mredParams)
z[5] = MRed(y[5]+twoqi-x[5], params, qi, mredParams)
z[6] = MRed(y[6]+twoqi-x[6], params, qi, mredParams)
z[7] = MRed(y[7]+twoqi-x[7], params, qi, mredParams)
}
}
// In total we do len(P) + len(Q) NTT, which is optimal (linear in the number of moduli of P and Q)
}
// Caution, returns the values in [0, 2q-1]
func modUpExact(p1, p2 [][]uint64, params *modupParams) {
var v0, v1, v2, v3, v4, v5, v6, v7 uint64
var vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7 float64
var xpj0, xpj1, xpj2, xpj3, xpj4, xpj5, xpj6, xpj7 uint64
y0 := make([]uint64, len(p1), len(p1))
y1 := make([]uint64, len(p1), len(p1))
y2 := make([]uint64, len(p1), len(p1))
y3 := make([]uint64, len(p1), len(p1))
y4 := make([]uint64, len(p1), len(p1))
y5 := make([]uint64, len(p1), len(p1))
y6 := make([]uint64, len(p1), len(p1))
y7 := make([]uint64, len(p1), len(p1))
var qibMont, qi, pj, mredParams uint64
var qif float64
var v [8]uint64
var y0, y1, y2, y3, y4, y5, y6, y7 [32]uint64
// We loop over each coefficient and apply the basis extension
for x := uint64(0); x < uint64(len(p1[0])); x = x + 8 {
vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7 = 0, 0, 0, 0, 0, 0, 0, 0
for i := 0; i < len(p1); i++ {
qibMont = params.qibMont[i]
qi = params.Q[i]
mredParams = params.mredParamsQ[i]
qif = float64(qi)
y0[i] = MRed(p1[i][x], qibMont, qi, mredParams)
y1[i] = MRed(p1[i][x+1], qibMont, qi, mredParams)
y2[i] = MRed(p1[i][x+2], qibMont, qi, mredParams)
y3[i] = MRed(p1[i][x+3], qibMont, qi, mredParams)
y4[i] = MRed(p1[i][x+4], qibMont, qi, mredParams)
y5[i] = MRed(p1[i][x+5], qibMont, qi, mredParams)
y6[i] = MRed(p1[i][x+6], qibMont, qi, mredParams)
y7[i] = MRed(p1[i][x+7], qibMont, qi, mredParams)
// Computation of the correction term v * Q%pi
vi0 += float64(y0[i]) / qif
vi1 += float64(y1[i]) / qif
vi2 += float64(y2[i]) / qif
vi3 += float64(y3[i]) / qif
vi4 += float64(y4[i]) / qif
vi5 += float64(y5[i]) / qif
vi6 += float64(y6[i]) / qif
vi7 += float64(y7[i]) / qif
}
// Index of the correction term
v0, v1, v2, v3, v4, v5, v6, v7 = uint64(vi0), uint64(vi1), uint64(vi2), uint64(vi3), uint64(vi4), uint64(vi5), uint64(vi6), uint64(vi7)
reconstructRNS(uint64(len(p1)), x, p1, &v, &y0, &y1, &y2, &y3, &y4, &y5, &y6, &y7, params.Q, params.mredParamsQ, params.qibMont)
for j := 0; j < len(p2); j++ {
xpj0, xpj1, xpj2, xpj3, xpj4, xpj5, xpj6, xpj7 = 0, 0, 0, 0, 0, 0, 0, 0
pj = params.P[j]
mredParams = params.mredParamsP[j]
bredParams := params.bredParamsP[j]
pj := params.P[j]
qInv := params.mredParamsP[j]
qpjInv := params.qpjInv[j]
qispjMont := params.qispjMont[j]
res := p2[j]
for i := 0; i < len(p1); i++ {
xpj0 += MRed(y0[i], qispjMont[i], pj, mredParams)
xpj1 += MRed(y1[i], qispjMont[i], pj, mredParams)
xpj2 += MRed(y2[i], qispjMont[i], pj, mredParams)
xpj3 += MRed(y3[i], qispjMont[i], pj, mredParams)
xpj4 += MRed(y4[i], qispjMont[i], pj, mredParams)
xpj5 += MRed(y5[i], qispjMont[i], pj, mredParams)
xpj6 += MRed(y6[i], qispjMont[i], pj, mredParams)
xpj7 += MRed(y7[i], qispjMont[i], pj, mredParams)
if i&7 == 6 { //Only every 7 additions, since we add one more 60 bit integer after the loop
xpj0 = BRedAdd(xpj0, pj, bredParams)
xpj1 = BRedAdd(xpj1, pj, bredParams)
xpj2 = BRedAdd(xpj2, pj, bredParams)
xpj3 = BRedAdd(xpj3, pj, bredParams)
xpj4 = BRedAdd(xpj4, pj, bredParams)
xpj5 = BRedAdd(xpj5, pj, bredParams)
xpj6 = BRedAdd(xpj6, pj, bredParams)
xpj7 = BRedAdd(xpj7, pj, bredParams)
}
}
res[x+0] = BRedAdd(xpj0+qpjInv[v0], pj, bredParams)
res[x+1] = BRedAdd(xpj1+qpjInv[v1], pj, bredParams)
res[x+2] = BRedAdd(xpj2+qpjInv[v2], pj, bredParams)
res[x+3] = BRedAdd(xpj3+qpjInv[v3], pj, bredParams)
res[x+4] = BRedAdd(xpj4+qpjInv[v4], pj, bredParams)
res[x+5] = BRedAdd(xpj5+qpjInv[v5], pj, bredParams)
res[x+6] = BRedAdd(xpj6+qpjInv[v6], pj, bredParams)
res[x+7] = BRedAdd(xpj7+qpjInv[v7], pj, bredParams)
res := (*[8]uint64)(unsafe.Pointer(&p2[j][x]))
multSum(res, &v, &y0, &y1, &y2, &y3, &y4, &y5, &y6, &y7, uint64(len(p1)), pj, qInv, qpjInv, qispjMont)
}
}
}
@@ -573,259 +516,6 @@ func NewDecomposer(Q, P []uint64) (decomposer *Decomposer) {
return
}
// Decompose decomposes a polynomial p(x) in basis Q, reduces it modulo qi, and returns
// the result in basis QP.
func (decomposer *Decomposer) Decompose(level, crtDecompLevel uint64, p0, p1 *Poly) {
alphai := decomposer.xalpha[crtDecompLevel]
p0idxst := crtDecompLevel * decomposer.alpha
p0idxed := p0idxst + alphai
// First we check if the vector can simply by coping and rearranging elements (the case where no reconstruction is needed)
if (p0idxed > level+1 && (level+1)%decomposer.nPprimes == 1) || alphai == 1 {
for x := uint64(0); x < uint64(len(p0.Coeffs[0])); x = x + 8 {
tmp := p0.Coeffs[p0idxst]
for j := uint64(0); j < level+decomposer.nPprimes+1; j++ {
p1.Coeffs[j][x+0] = tmp[x+0]
p1.Coeffs[j][x+1] = tmp[x+1]
p1.Coeffs[j][x+2] = tmp[x+2]
p1.Coeffs[j][x+3] = tmp[x+3]
p1.Coeffs[j][x+4] = tmp[x+4]
p1.Coeffs[j][x+5] = tmp[x+5]
p1.Coeffs[j][x+6] = tmp[x+6]
p1.Coeffs[j][x+7] = tmp[x+7]
}
}
// Otherwise, we apply a fast exact base conversion for the reconstruction
} else {
var index uint64
if level >= alphai+crtDecompLevel*decomposer.alpha {
index = decomposer.xalpha[crtDecompLevel] - 2
} else {
index = (level - 1) % decomposer.alpha
}
params := decomposer.modUpParams[crtDecompLevel][index]
v := make([]uint64, 8, 8)
vi := make([]float64, 8, 8)
xpj := make([]uint64, 8, 8)
y0 := make([]uint64, index+2, index+2)
y1 := make([]uint64, index+2, index+2)
y2 := make([]uint64, index+2, index+2)
y3 := make([]uint64, index+2, index+2)
y4 := make([]uint64, index+2, index+2)
y5 := make([]uint64, index+2, index+2)
y6 := make([]uint64, index+2, index+2)
y7 := make([]uint64, index+2, index+2)
var qibMont, qi, pj, mredParams uint64
var qif float64
// We loop over each coefficient and apply the basis extension
for x := uint64(0); x < uint64(len(p0.Coeffs[0])); x = x + 8 {
vi[0], vi[1], vi[2], vi[3], vi[4], vi[5], vi[6], vi[7] = 0, 0, 0, 0, 0, 0, 0, 0
// Coefficients to be decomposed
for i, j := uint64(0), p0idxst; i < index+2; i, j = i+1, j+1 {
qibMont = params.qibMont[i]
qi = params.Q[i]
mredParams = params.mredParamsQ[i]
qif = float64(qi)
px := p0.Coeffs[j]
py := p1.Coeffs[j]
// For the coefficients to be decomposed, we can simply copy them
py[x+0] = px[x+0]
py[x+1] = px[x+1]
py[x+2] = px[x+2]
py[x+3] = px[x+3]
py[x+4] = px[x+4]
py[x+5] = px[x+5]
py[x+6] = px[x+6]
py[x+7] = px[x+7]
y0[i] = MRed(px[x+0], qibMont, qi, mredParams)
y1[i] = MRed(px[x+1], qibMont, qi, mredParams)
y2[i] = MRed(px[x+2], qibMont, qi, mredParams)
y3[i] = MRed(px[x+3], qibMont, qi, mredParams)
y4[i] = MRed(px[x+4], qibMont, qi, mredParams)
y5[i] = MRed(px[x+5], qibMont, qi, mredParams)
y6[i] = MRed(px[x+6], qibMont, qi, mredParams)
y7[i] = MRed(px[x+7], qibMont, qi, mredParams)
// Computation of the correction term v * Q%pi
vi[0] += float64(y0[i]) / qif
vi[1] += float64(y1[i]) / qif
vi[2] += float64(y2[i]) / qif
vi[3] += float64(y3[i]) / qif
vi[4] += float64(y4[i]) / qif
vi[5] += float64(y5[i]) / qif
vi[6] += float64(y6[i]) / qif
vi[7] += float64(y7[i]) / qif
}
// Index of the correction term
v[0] = uint64(vi[0])
v[1] = uint64(vi[1])
v[2] = uint64(vi[2])
v[3] = uint64(vi[3])
v[4] = uint64(vi[4])
v[5] = uint64(vi[5])
v[6] = uint64(vi[6])
v[7] = uint64(vi[7])
// Coefficients of index smaller than the ones to be decomposed
for j := uint64(0); j < p0idxst; j++ {
xpj[0], xpj[1], xpj[2], xpj[3], xpj[4], xpj[5], xpj[6], xpj[7] = 0, 0, 0, 0, 0, 0, 0, 0
pj = params.P[j]
mredParams = params.mredParamsP[j]
bredParams := params.bredParamsP[j]
qpjInv := params.qpjInv[j]
qispjMont := params.qispjMont[j]
res := p1.Coeffs[j]
for i := uint64(0); i < index+2; i++ {
xpj[0] += MRed(y0[i], qispjMont[i], pj, mredParams)
xpj[1] += MRed(y1[i], qispjMont[i], pj, mredParams)
xpj[2] += MRed(y2[i], qispjMont[i], pj, mredParams)
xpj[3] += MRed(y3[i], qispjMont[i], pj, mredParams)
xpj[4] += MRed(y4[i], qispjMont[i], pj, mredParams)
xpj[5] += MRed(y5[i], qispjMont[i], pj, mredParams)
xpj[6] += MRed(y6[i], qispjMont[i], pj, mredParams)
xpj[7] += MRed(y7[i], qispjMont[i], pj, mredParams)
if i&7 == 6 { // Only every 7 additions, since we add one more 60 bit integer after the loop
xpj[0] = BRedAdd(xpj[0], pj, bredParams)
xpj[1] = BRedAdd(xpj[1], pj, bredParams)
xpj[2] = BRedAdd(xpj[2], pj, bredParams)
xpj[3] = BRedAdd(xpj[3], pj, bredParams)
xpj[4] = BRedAdd(xpj[4], pj, bredParams)
xpj[5] = BRedAdd(xpj[5], pj, bredParams)
xpj[6] = BRedAdd(xpj[6], pj, bredParams)
xpj[7] = BRedAdd(xpj[7], pj, bredParams)
}
}
res[x+0] = BRedAdd(xpj[0]+qpjInv[v[0]], pj, bredParams)
res[x+1] = BRedAdd(xpj[1]+qpjInv[v[1]], pj, bredParams)
res[x+2] = BRedAdd(xpj[2]+qpjInv[v[2]], pj, bredParams)
res[x+3] = BRedAdd(xpj[3]+qpjInv[v[3]], pj, bredParams)
res[x+4] = BRedAdd(xpj[4]+qpjInv[v[4]], pj, bredParams)
res[x+5] = BRedAdd(xpj[5]+qpjInv[v[5]], pj, bredParams)
res[x+6] = BRedAdd(xpj[6]+qpjInv[v[6]], pj, bredParams)
res[x+7] = BRedAdd(xpj[7]+qpjInv[v[7]], pj, bredParams)
}
// Coefficients of index greater than the ones to be decomposed
for j := decomposer.alpha * crtDecompLevel; j < level+1; j = j + 1 {
xpj[0], xpj[1], xpj[2], xpj[3], xpj[4], xpj[5], xpj[6], xpj[7] = 0, 0, 0, 0, 0, 0, 0, 0
pj = params.P[j]
mredParams = params.mredParamsP[j]
bredParams := params.bredParamsP[j]
qpjInv := params.qpjInv[j]
qispjMont := params.qispjMont[j]
res := p1.Coeffs[j]
for i := uint64(0); i < index+2; i++ {
xpj[0] += MRed(y0[i], qispjMont[i], pj, mredParams)
xpj[1] += MRed(y1[i], qispjMont[i], pj, mredParams)
xpj[2] += MRed(y2[i], qispjMont[i], pj, mredParams)
xpj[3] += MRed(y3[i], qispjMont[i], pj, mredParams)
xpj[4] += MRed(y4[i], qispjMont[i], pj, mredParams)
xpj[5] += MRed(y5[i], qispjMont[i], pj, mredParams)
xpj[6] += MRed(y6[i], qispjMont[i], pj, mredParams)
xpj[7] += MRed(y7[i], qispjMont[i], pj, mredParams)
if i&7 == 6 { // Only every 7 additions, since we add one more 60 bit integer after the loop
xpj[0] = BRedAdd(xpj[0], pj, bredParams)
xpj[1] = BRedAdd(xpj[1], pj, bredParams)
xpj[2] = BRedAdd(xpj[2], pj, bredParams)
xpj[3] = BRedAdd(xpj[3], pj, bredParams)
xpj[4] = BRedAdd(xpj[4], pj, bredParams)
xpj[5] = BRedAdd(xpj[5], pj, bredParams)
xpj[6] = BRedAdd(xpj[6], pj, bredParams)
xpj[7] = BRedAdd(xpj[7], pj, bredParams)
}
}
res[x+0] = BRedAdd(xpj[0]+qpjInv[v[0]], pj, bredParams)
res[x+1] = BRedAdd(xpj[1]+qpjInv[v[1]], pj, bredParams)
res[x+2] = BRedAdd(xpj[2]+qpjInv[v[2]], pj, bredParams)
res[x+3] = BRedAdd(xpj[3]+qpjInv[v[3]], pj, bredParams)
res[x+4] = BRedAdd(xpj[4]+qpjInv[v[4]], pj, bredParams)
res[x+5] = BRedAdd(xpj[5]+qpjInv[v[5]], pj, bredParams)
res[x+6] = BRedAdd(xpj[6]+qpjInv[v[6]], pj, bredParams)
res[x+7] = BRedAdd(xpj[7]+qpjInv[v[7]], pj, bredParams)
}
// Coefficients of the special primes
for j, u := level+1, decomposer.nQprimes; j < level+1+decomposer.nPprimes; j, u = u+1, j+1 {
xpj[0], xpj[1], xpj[2], xpj[3], xpj[4], xpj[5], xpj[6], xpj[7] = 0, 0, 0, 0, 0, 0, 0, 0
pj = params.P[j]
mredParams = params.mredParamsP[j]
bredParams := params.bredParamsP[j]
qpjInv := params.qpjInv[j]
qispjMont := params.qispjMont[j]
res := p1.Coeffs[u]
for i := uint64(0); i < index+2; i++ {
xpj[0] += MRed(y0[i], qispjMont[i], pj, mredParams)
xpj[1] += MRed(y1[i], qispjMont[i], pj, mredParams)
xpj[2] += MRed(y2[i], qispjMont[i], pj, mredParams)
xpj[3] += MRed(y3[i], qispjMont[i], pj, mredParams)
xpj[4] += MRed(y4[i], qispjMont[i], pj, mredParams)
xpj[5] += MRed(y5[i], qispjMont[i], pj, mredParams)
xpj[6] += MRed(y6[i], qispjMont[i], pj, mredParams)
xpj[7] += MRed(y7[i], qispjMont[i], pj, mredParams)
if i&7 == 6 { // Only every 7 additions, since we add one more 60 bit integer after the loop
xpj[0] = BRedAdd(xpj[0], pj, bredParams)
xpj[1] = BRedAdd(xpj[1], pj, bredParams)
xpj[2] = BRedAdd(xpj[2], pj, bredParams)
xpj[3] = BRedAdd(xpj[3], pj, bredParams)
xpj[4] = BRedAdd(xpj[4], pj, bredParams)
xpj[5] = BRedAdd(xpj[5], pj, bredParams)
xpj[6] = BRedAdd(xpj[6], pj, bredParams)
xpj[7] = BRedAdd(xpj[7], pj, bredParams)
}
}
res[x+0] = BRedAdd(xpj[0]+qpjInv[v[0]], pj, bredParams)
res[x+1] = BRedAdd(xpj[1]+qpjInv[v[1]], pj, bredParams)
res[x+2] = BRedAdd(xpj[2]+qpjInv[v[2]], pj, bredParams)
res[x+3] = BRedAdd(xpj[3]+qpjInv[v[3]], pj, bredParams)
res[x+4] = BRedAdd(xpj[4]+qpjInv[v[4]], pj, bredParams)
res[x+5] = BRedAdd(xpj[5]+qpjInv[v[5]], pj, bredParams)
res[x+6] = BRedAdd(xpj[6]+qpjInv[v[6]], pj, bredParams)
res[x+7] = BRedAdd(xpj[7]+qpjInv[v[7]], pj, bredParams)
}
}
}
}
// DecomposeAndSplit decomposes a polynomial p(x) in basis Q, reduces it modulo qi, and returns
// the result in basis QP separately.
func (decomposer *Decomposer) DecomposeAndSplit(level, crtDecompLevel uint64, p0, p1Q, p1P *Poly) {
@@ -838,33 +528,12 @@ func (decomposer *Decomposer) DecomposeAndSplit(level, crtDecompLevel uint64, p0
// First we check if the vector can simply by coping and rearranging elements (the case where no reconstruction is needed)
if (p0idxed > level+1 && (level+1)%decomposer.nPprimes == 1) || alphai == 1 {
for x := uint64(0); x < uint64(len(p0.Coeffs[0])); x = x + 8 {
for j := uint64(0); j < level+1; j++ {
copy(p1Q.Coeffs[j], p0.Coeffs[p0idxst])
}
tmp := p0.Coeffs[p0idxst]
for j := uint64(0); j < level+1; j++ {
p1Q.Coeffs[j][x+0] = tmp[x+0]
p1Q.Coeffs[j][x+1] = tmp[x+1]
p1Q.Coeffs[j][x+2] = tmp[x+2]
p1Q.Coeffs[j][x+3] = tmp[x+3]
p1Q.Coeffs[j][x+4] = tmp[x+4]
p1Q.Coeffs[j][x+5] = tmp[x+5]
p1Q.Coeffs[j][x+6] = tmp[x+6]
p1Q.Coeffs[j][x+7] = tmp[x+7]
}
for j := uint64(0); j < decomposer.nPprimes; j++ {
p1P.Coeffs[j][x+0] = tmp[x+0]
p1P.Coeffs[j][x+1] = tmp[x+1]
p1P.Coeffs[j][x+2] = tmp[x+2]
p1P.Coeffs[j][x+3] = tmp[x+3]
p1P.Coeffs[j][x+4] = tmp[x+4]
p1P.Coeffs[j][x+5] = tmp[x+5]
p1P.Coeffs[j][x+6] = tmp[x+6]
p1P.Coeffs[j][x+7] = tmp[x+7]
}
for j := uint64(0); j < decomposer.nPprimes; j++ {
copy(p1P.Coeffs[j], p0.Coeffs[p0idxst])
}
// Otherwise, we apply a fast exact base conversion for the reconstruction
@@ -879,19 +548,9 @@ func (decomposer *Decomposer) DecomposeAndSplit(level, crtDecompLevel uint64, p0
params := decomposer.modUpParams[crtDecompLevel][index]
v := make([]uint64, 8, 8)
vi := make([]float64, 8, 8)
xpj := make([]uint64, 8, 8)
y0 := make([]uint64, index+2, index+2)
y1 := make([]uint64, index+2, index+2)
y2 := make([]uint64, index+2, index+2)
y3 := make([]uint64, index+2, index+2)
y4 := make([]uint64, index+2, index+2)
y5 := make([]uint64, index+2, index+2)
y6 := make([]uint64, index+2, index+2)
y7 := make([]uint64, index+2, index+2)
var v [8]uint64
var vi [8]float64
var y0, y1, y2, y3, y4, y5, y6, y7 [32]uint64
var qibMont, qi, pj, mredParams uint64
var qif float64
@@ -908,27 +567,20 @@ func (decomposer *Decomposer) DecomposeAndSplit(level, crtDecompLevel uint64, p0
mredParams = params.mredParamsQ[i]
qif = float64(qi)
px := p0.Coeffs[j]
py := p1Q.Coeffs[j]
px := (*[8]uint64)(unsafe.Pointer(&p0.Coeffs[j][x]))
py := (*[8]uint64)(unsafe.Pointer(&p1Q.Coeffs[j][x]))
// For the coefficients to be decomposed, we can simply copy them
py[x+0] = px[x+0]
py[x+1] = px[x+1]
py[x+2] = px[x+2]
py[x+3] = px[x+3]
py[x+4] = px[x+4]
py[x+5] = px[x+5]
py[x+6] = px[x+6]
py[x+7] = px[x+7]
py[0], py[1], py[2], py[3], py[4], py[5], py[6], py[7] = px[0], px[1], px[2], px[3], px[4], px[5], px[6], px[7]
y0[i] = MRed(px[x+0], qibMont, qi, mredParams)
y1[i] = MRed(px[x+1], qibMont, qi, mredParams)
y2[i] = MRed(px[x+2], qibMont, qi, mredParams)
y3[i] = MRed(px[x+3], qibMont, qi, mredParams)
y4[i] = MRed(px[x+4], qibMont, qi, mredParams)
y5[i] = MRed(px[x+5], qibMont, qi, mredParams)
y6[i] = MRed(px[x+6], qibMont, qi, mredParams)
y7[i] = MRed(px[x+7], qibMont, qi, mredParams)
y0[i] = MRed(px[0], qibMont, qi, mredParams)
y1[i] = MRed(px[1], qibMont, qi, mredParams)
y2[i] = MRed(px[2], qibMont, qi, mredParams)
y3[i] = MRed(px[3], qibMont, qi, mredParams)
y4[i] = MRed(px[4], qibMont, qi, mredParams)
y5[i] = MRed(px[5], qibMont, qi, mredParams)
y6[i] = MRed(px[6], qibMont, qi, mredParams)
y7[i] = MRed(px[7], qibMont, qi, mredParams)
// Computation of the correction term v * Q%pi
vi[0] += float64(y0[i]) / qif
@@ -954,140 +606,151 @@ func (decomposer *Decomposer) DecomposeAndSplit(level, crtDecompLevel uint64, p0
// Coefficients of index smaller than the ones to be decomposed
for j := uint64(0); j < p0idxst; j++ {
xpj[0], xpj[1], xpj[2], xpj[3], xpj[4], xpj[5], xpj[6], xpj[7] = 0, 0, 0, 0, 0, 0, 0, 0
pj = params.P[j]
mredParams = params.mredParamsP[j]
bredParams := params.bredParamsP[j]
qInv := params.mredParamsP[j]
qpjInv := params.qpjInv[j]
qispjMont := params.qispjMont[j]
res := p1Q.Coeffs[j]
for i := uint64(0); i < index+2; i++ {
xpj[0] += MRed(y0[i], qispjMont[i], pj, mredParams)
xpj[1] += MRed(y1[i], qispjMont[i], pj, mredParams)
xpj[2] += MRed(y2[i], qispjMont[i], pj, mredParams)
xpj[3] += MRed(y3[i], qispjMont[i], pj, mredParams)
xpj[4] += MRed(y4[i], qispjMont[i], pj, mredParams)
xpj[5] += MRed(y5[i], qispjMont[i], pj, mredParams)
xpj[6] += MRed(y6[i], qispjMont[i], pj, mredParams)
xpj[7] += MRed(y7[i], qispjMont[i], pj, mredParams)
if i&7 == 6 { // Only every 7 additions, since we add one more 60 bit integer after the loop
xpj[0] = BRedAdd(xpj[0], pj, bredParams)
xpj[1] = BRedAdd(xpj[1], pj, bredParams)
xpj[2] = BRedAdd(xpj[2], pj, bredParams)
xpj[3] = BRedAdd(xpj[3], pj, bredParams)
xpj[4] = BRedAdd(xpj[4], pj, bredParams)
xpj[5] = BRedAdd(xpj[5], pj, bredParams)
xpj[6] = BRedAdd(xpj[6], pj, bredParams)
xpj[7] = BRedAdd(xpj[7], pj, bredParams)
}
}
res[x+0] = BRedAdd(xpj[0]+qpjInv[v[0]], pj, bredParams)
res[x+1] = BRedAdd(xpj[1]+qpjInv[v[1]], pj, bredParams)
res[x+2] = BRedAdd(xpj[2]+qpjInv[v[2]], pj, bredParams)
res[x+3] = BRedAdd(xpj[3]+qpjInv[v[3]], pj, bredParams)
res[x+4] = BRedAdd(xpj[4]+qpjInv[v[4]], pj, bredParams)
res[x+5] = BRedAdd(xpj[5]+qpjInv[v[5]], pj, bredParams)
res[x+6] = BRedAdd(xpj[6]+qpjInv[v[6]], pj, bredParams)
res[x+7] = BRedAdd(xpj[7]+qpjInv[v[7]], pj, bredParams)
res := (*[8]uint64)(unsafe.Pointer(&p1Q.Coeffs[j][x]))
multSum(res, &v, &y0, &y1, &y2, &y3, &y4, &y5, &y6, &y7, index+2, pj, qInv, qpjInv, qispjMont)
}
// Coefficients of index greater than the ones to be decomposed
for j := decomposer.alpha * crtDecompLevel; j < level+1; j = j + 1 {
xpj[0], xpj[1], xpj[2], xpj[3], xpj[4], xpj[5], xpj[6], xpj[7] = 0, 0, 0, 0, 0, 0, 0, 0
pj = params.P[j]
mredParams = params.mredParamsP[j]
bredParams := params.bredParamsP[j]
qInv := params.mredParamsP[j]
qpjInv := params.qpjInv[j]
qispjMont := params.qispjMont[j]
res := p1Q.Coeffs[j]
for i := uint64(0); i < index+2; i++ {
xpj[0] += MRed(y0[i], qispjMont[i], pj, mredParams)
xpj[1] += MRed(y1[i], qispjMont[i], pj, mredParams)
xpj[2] += MRed(y2[i], qispjMont[i], pj, mredParams)
xpj[3] += MRed(y3[i], qispjMont[i], pj, mredParams)
xpj[4] += MRed(y4[i], qispjMont[i], pj, mredParams)
xpj[5] += MRed(y5[i], qispjMont[i], pj, mredParams)
xpj[6] += MRed(y6[i], qispjMont[i], pj, mredParams)
xpj[7] += MRed(y7[i], qispjMont[i], pj, mredParams)
if i&7 == 6 { // Only every 7 additions, since we add one more 60 bit integer after the loop
xpj[0] = BRedAdd(xpj[0], pj, bredParams)
xpj[1] = BRedAdd(xpj[1], pj, bredParams)
xpj[2] = BRedAdd(xpj[2], pj, bredParams)
xpj[3] = BRedAdd(xpj[3], pj, bredParams)
xpj[4] = BRedAdd(xpj[4], pj, bredParams)
xpj[5] = BRedAdd(xpj[5], pj, bredParams)
xpj[6] = BRedAdd(xpj[6], pj, bredParams)
xpj[7] = BRedAdd(xpj[7], pj, bredParams)
}
}
res[x+0] = BRedAdd(xpj[0]+qpjInv[v[0]], pj, bredParams)
res[x+1] = BRedAdd(xpj[1]+qpjInv[v[1]], pj, bredParams)
res[x+2] = BRedAdd(xpj[2]+qpjInv[v[2]], pj, bredParams)
res[x+3] = BRedAdd(xpj[3]+qpjInv[v[3]], pj, bredParams)
res[x+4] = BRedAdd(xpj[4]+qpjInv[v[4]], pj, bredParams)
res[x+5] = BRedAdd(xpj[5]+qpjInv[v[5]], pj, bredParams)
res[x+6] = BRedAdd(xpj[6]+qpjInv[v[6]], pj, bredParams)
res[x+7] = BRedAdd(xpj[7]+qpjInv[v[7]], pj, bredParams)
res := (*[8]uint64)(unsafe.Pointer(&p1Q.Coeffs[j][x]))
multSum(res, &v, &y0, &y1, &y2, &y3, &y4, &y5, &y6, &y7, index+2, pj, qInv, qpjInv, qispjMont)
}
// Coefficients of the special primes
// Coefficients of the special primes Pi
for j, u := uint64(0), decomposer.nQprimes; j < decomposer.nPprimes; j, u = j+1, u+1 {
xpj[0], xpj[1], xpj[2], xpj[3], xpj[4], xpj[5], xpj[6], xpj[7] = 0, 0, 0, 0, 0, 0, 0, 0
pj = params.P[u]
mredParams = params.mredParamsP[u]
bredParams := params.bredParamsP[u]
qInv := params.mredParamsP[u]
qpjInv := params.qpjInv[u]
qispjMont := params.qispjMont[u]
res := p1P.Coeffs[j]
for i := uint64(0); i < index+2; i++ {
xpj[0] += MRed(y0[i], qispjMont[i], pj, mredParams)
xpj[1] += MRed(y1[i], qispjMont[i], pj, mredParams)
xpj[2] += MRed(y2[i], qispjMont[i], pj, mredParams)
xpj[3] += MRed(y3[i], qispjMont[i], pj, mredParams)
xpj[4] += MRed(y4[i], qispjMont[i], pj, mredParams)
xpj[5] += MRed(y5[i], qispjMont[i], pj, mredParams)
xpj[6] += MRed(y6[i], qispjMont[i], pj, mredParams)
xpj[7] += MRed(y7[i], qispjMont[i], pj, mredParams)
if i&7 == 6 { // Only every 7 additions, since we add one more 60 bit integer after the loop
xpj[0] = BRedAdd(xpj[0], pj, bredParams)
xpj[1] = BRedAdd(xpj[1], pj, bredParams)
xpj[2] = BRedAdd(xpj[2], pj, bredParams)
xpj[3] = BRedAdd(xpj[3], pj, bredParams)
xpj[4] = BRedAdd(xpj[4], pj, bredParams)
xpj[5] = BRedAdd(xpj[5], pj, bredParams)
xpj[6] = BRedAdd(xpj[6], pj, bredParams)
xpj[7] = BRedAdd(xpj[7], pj, bredParams)
}
}
res[x+0] = BRedAdd(xpj[0]+qpjInv[v[0]], pj, bredParams)
res[x+1] = BRedAdd(xpj[1]+qpjInv[v[1]], pj, bredParams)
res[x+2] = BRedAdd(xpj[2]+qpjInv[v[2]], pj, bredParams)
res[x+3] = BRedAdd(xpj[3]+qpjInv[v[3]], pj, bredParams)
res[x+4] = BRedAdd(xpj[4]+qpjInv[v[4]], pj, bredParams)
res[x+5] = BRedAdd(xpj[5]+qpjInv[v[5]], pj, bredParams)
res[x+6] = BRedAdd(xpj[6]+qpjInv[v[6]], pj, bredParams)
res[x+7] = BRedAdd(xpj[7]+qpjInv[v[7]], pj, bredParams)
res := (*[8]uint64)(unsafe.Pointer(&p1P.Coeffs[j][x]))
multSum(res, &v, &y0, &y1, &y2, &y3, &y4, &y5, &y6, &y7, index+2, pj, qInv, qpjInv, qispjMont)
}
}
}
}
func reconstructRNS(index, x uint64, p [][]uint64, v *[8]uint64, y0, y1, y2, y3, y4, y5, y6, y7 *[32]uint64, Q, QInv, QbMont []uint64) {
var vi [8]float64
var qi, qiInv, qibMont uint64
var qif float64
for i := uint64(0); i < index; i++ {
qibMont = QbMont[i]
qi = Q[i]
qiInv = QInv[i]
qif = float64(qi)
y0[i] = MRed(p[i][x+0], qibMont, qi, qiInv)
y1[i] = MRed(p[i][x+1], qibMont, qi, qiInv)
y2[i] = MRed(p[i][x+2], qibMont, qi, qiInv)
y3[i] = MRed(p[i][x+3], qibMont, qi, qiInv)
y4[i] = MRed(p[i][x+4], qibMont, qi, qiInv)
y5[i] = MRed(p[i][x+5], qibMont, qi, qiInv)
y6[i] = MRed(p[i][x+6], qibMont, qi, qiInv)
y7[i] = MRed(p[i][x+7], qibMont, qi, qiInv)
// Computation of the correction term v * Q%pi
vi[0] += float64(y0[i]) / qif
vi[1] += float64(y1[i]) / qif
vi[2] += float64(y2[i]) / qif
vi[3] += float64(y3[i]) / qif
vi[4] += float64(y4[i]) / qif
vi[5] += float64(y5[i]) / qif
vi[6] += float64(y6[i]) / qif
vi[7] += float64(y7[i]) / qif
}
v[0] = uint64(vi[0])
v[1] = uint64(vi[1])
v[2] = uint64(vi[2])
v[3] = uint64(vi[3])
v[4] = uint64(vi[4])
v[5] = uint64(vi[5])
v[6] = uint64(vi[6])
v[7] = uint64(vi[7])
}
// Caution, returns the values in [0, 2q-1]
func multSum(res, v *[8]uint64, y0, y1, y2, y3, y4, y5, y6, y7 *[32]uint64, index, pj, qInv uint64, qpjInv, qispjMont []uint64) {
var rlo, rhi [8]uint64
var mhi, mlo, c, hhi uint64
// Accumulates the sum on uint128 and does a lazy montgomery reduction at the end
for i := uint64(0); i < index; i++ {
mhi, mlo = bits.Mul64(y0[i], qispjMont[i])
rlo[0], c = bits.Add64(rlo[0], mlo, 0)
rhi[0] += mhi + c
mhi, mlo = bits.Mul64(y1[i], qispjMont[i])
rlo[1], c = bits.Add64(rlo[1], mlo, 0)
rhi[1] += mhi + c
mhi, mlo = bits.Mul64(y2[i], qispjMont[i])
rlo[2], c = bits.Add64(rlo[2], mlo, 0)
rhi[2] += mhi + c
mhi, mlo = bits.Mul64(y3[i], qispjMont[i])
rlo[3], c = bits.Add64(rlo[3], mlo, 0)
rhi[3] += mhi + c
mhi, mlo = bits.Mul64(y4[i], qispjMont[i])
rlo[4], c = bits.Add64(rlo[4], mlo, 0)
rhi[4] += mhi + c
mhi, mlo = bits.Mul64(y5[i], qispjMont[i])
rlo[5], c = bits.Add64(rlo[5], mlo, 0)
rhi[5] += mhi + c
mhi, mlo = bits.Mul64(y6[i], qispjMont[i])
rlo[6], c = bits.Add64(rlo[6], mlo, 0)
rhi[6] += mhi + c
mhi, mlo = bits.Mul64(y7[i], qispjMont[i])
rlo[7], c = bits.Add64(rlo[7], mlo, 0)
rhi[7] += mhi + c
}
hhi, _ = bits.Mul64(rlo[0]*qInv, pj)
res[0] = rhi[0] - hhi + pj + qpjInv[v[0]]
hhi, _ = bits.Mul64(rlo[1]*qInv, pj)
res[1] = rhi[1] - hhi + pj + qpjInv[v[1]]
hhi, _ = bits.Mul64(rlo[2]*qInv, pj)
res[2] = rhi[2] - hhi + pj + qpjInv[v[2]]
hhi, _ = bits.Mul64(rlo[3]*qInv, pj)
res[3] = rhi[3] - hhi + pj + qpjInv[v[3]]
hhi, _ = bits.Mul64(rlo[4]*qInv, pj)
res[4] = rhi[4] - hhi + pj + qpjInv[v[4]]
hhi, _ = bits.Mul64(rlo[5]*qInv, pj)
res[5] = rhi[5] - hhi + pj + qpjInv[v[5]]
hhi, _ = bits.Mul64(rlo[6]*qInv, pj)
res[6] = rhi[6] - hhi + pj + qpjInv[v[6]]
hhi, _ = bits.Mul64(rlo[7]*qInv, pj)
res[7] = rhi[7] - hhi + pj + qpjInv[v[7]]
}

View File

@@ -1,6 +1,7 @@
package ring
import (
"math/bits"
"unsafe"
)
@@ -34,142 +35,51 @@ func (r *Ring) InvNTTLvl(level uint64, p1, p2 *Poly) {
}
}
// butterfly computes X, Y = U + V*Psi, U - V*Psi mod Q.
func butterfly(U, V, Psi, Q, Qinv uint64) (X, Y uint64) {
if U > 2*Q {
U -= 2 * Q
// NTTLazy computes the NTT of p1 and returns the result on p2.
// Output values are in the range [0, 2q-1]
func (r *Ring) NTTLazy(p1, p2 *Poly) {
for x := range r.Modulus {
NTTLazy(p1.Coeffs[x], p2.Coeffs[x], r.N, r.NttPsi[x], r.Modulus[x], r.MredParams[x], r.BredParams[x])
}
V = MRedConstant(V, Psi, Q, Qinv)
X = U + V
Y = U + 2*Q - V
return
}
// invbutterfly computes X, Y = U + V, (U - V) * Psi mod Q.
func invbutterfly(U, V, Psi, Q, Qinv uint64) (X, Y uint64) {
X = U + V
if X > 2*Q {
X -= 2 * Q
// NTTLazyLvl computes the NTT of p1 and returns the result on p2.
// The value level defines the number of moduli of the input polynomials.
// Output values are in the range [0, 2q-1]
func (r *Ring) NTTLazyLvl(level uint64, p1, p2 *Poly) {
for x := uint64(0); x < level+1; x++ {
NTTLazy(p1.Coeffs[x], p2.Coeffs[x], r.N, r.NttPsi[x], r.Modulus[x], r.MredParams[x], r.BredParams[x])
}
Y = MRedConstant(U+2*Q-V, Psi, Q, Qinv) // At the moment it is not possible to use MRedConstant if Q > 61 bits
return
}
// InvNTTLazy computes the inverse-NTT of p1 and returns the result on p2.
// Output values are in the range [0, 2q-1]
func (r *Ring) InvNTTLazy(p1, p2 *Poly) {
for x := range r.Modulus {
InvNTTLazy(p1.Coeffs[x], p2.Coeffs[x], r.N, r.NttPsiInv[x], r.NttNInv[x], r.Modulus[x], r.MredParams[x])
}
}
// InvNTTLazyLvl computes the inverse-NTT of p1 and returns the result on p2.
// The value level defines the number of moduli of the input polynomials.
// Output values are in the range [0, 2q-1]
func (r *Ring) InvNTTLazyLvl(level uint64, p1, p2 *Poly) {
for x := uint64(0); x < level+1; x++ {
InvNTTLazy(p1.Coeffs[x], p2.Coeffs[x], r.N, r.NttPsiInv[x], r.NttNInv[x], r.Modulus[x], r.MredParams[x])
}
}
// butterfly computes X, Y = U + V*Psi, U - V*Psi mod Q.
func butterfly(U, V, Psi, twoQ, QBitLen, Q, Qinv uint64) (uint64, uint64) {
U -= (U >> QBitLen) * Q
V = MRedConstant(V, Psi, Q, Qinv)
return U + V, U + twoQ - V
}
// NTT computes the NTT on the input coefficients using the input parameters.
func NTT(coeffsIn, coeffsOut []uint64, N uint64, nttPsi []uint64, Q, mredParams uint64, bredParams []uint64) {
var j1, j2, t uint64
var F uint64
// Copy the result of the first round of butterflies on p2 with approximate reduction
t = N >> 1
F = nttPsi[1]
for j := uint64(0); j <= t-1; j = j + 8 {
xin := (*[8]uint64)(unsafe.Pointer(&coeffsIn[j]))
yin := (*[8]uint64)(unsafe.Pointer(&coeffsIn[j+t]))
xout := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j]))
yout := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j+t]))
xout[0], yout[0] = butterfly(xin[0], yin[0], F, Q, mredParams)
xout[1], yout[1] = butterfly(xin[1], yin[1], F, Q, mredParams)
xout[2], yout[2] = butterfly(xin[2], yin[2], F, Q, mredParams)
xout[3], yout[3] = butterfly(xin[3], yin[3], F, Q, mredParams)
xout[4], yout[4] = butterfly(xin[4], yin[4], F, Q, mredParams)
xout[5], yout[5] = butterfly(xin[5], yin[5], F, Q, mredParams)
xout[6], yout[6] = butterfly(xin[6], yin[6], F, Q, mredParams)
xout[7], yout[7] = butterfly(xin[7], yin[7], F, Q, mredParams)
}
// Continue the rest of the second to the n-1 butterflies on p2 with approximate reduction
for m := uint64(2); m < N; m <<= 1 {
t >>= 1
if t >= 8 {
for i := uint64(0); i < m; i++ {
j1 = (i * t) << 1
j2 = j1 + t - 1
F = nttPsi[m+i]
for j := j1; j <= j2; j = j + 8 {
x := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j]))
y := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j+t]))
x[0], y[0] = butterfly(x[0], y[0], F, Q, mredParams)
x[1], y[1] = butterfly(x[1], y[1], F, Q, mredParams)
x[2], y[2] = butterfly(x[2], y[2], F, Q, mredParams)
x[3], y[3] = butterfly(x[3], y[3], F, Q, mredParams)
x[4], y[4] = butterfly(x[4], y[4], F, Q, mredParams)
x[5], y[5] = butterfly(x[5], y[5], F, Q, mredParams)
x[6], y[6] = butterfly(x[6], y[6], F, Q, mredParams)
x[7], y[7] = butterfly(x[7], y[7], F, Q, mredParams)
}
}
} else if t == 4 {
for i := uint64(0); i < m; i = i + 2 {
j1 = (i * t) << 1
psi := (*[2]uint64)(unsafe.Pointer(&nttPsi[m+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[j1]))
x[0], x[4] = butterfly(x[0], x[4], psi[0], Q, mredParams)
x[1], x[5] = butterfly(x[1], x[5], psi[0], Q, mredParams)
x[2], x[6] = butterfly(x[2], x[6], psi[0], Q, mredParams)
x[3], x[7] = butterfly(x[3], x[7], psi[0], Q, mredParams)
x[8], x[12] = butterfly(x[8], x[12], psi[1], Q, mredParams)
x[9], x[13] = butterfly(x[9], x[13], psi[1], Q, mredParams)
x[10], x[14] = butterfly(x[10], x[14], psi[1], Q, mredParams)
x[11], x[15] = butterfly(x[11], x[15], psi[1], Q, mredParams)
}
} else if t == 2 {
for i := uint64(0); i < m; i = i + 4 {
j1 = (i * t) << 1
psi := (*[4]uint64)(unsafe.Pointer(&nttPsi[m+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[j1]))
x[0], x[2] = butterfly(x[0], x[2], psi[0], Q, mredParams)
x[1], x[3] = butterfly(x[1], x[3], psi[0], Q, mredParams)
x[4], x[6] = butterfly(x[4], x[6], psi[1], Q, mredParams)
x[5], x[7] = butterfly(x[5], x[7], psi[1], Q, mredParams)
x[8], x[10] = butterfly(x[8], x[10], psi[2], Q, mredParams)
x[9], x[11] = butterfly(x[9], x[11], psi[2], Q, mredParams)
x[12], x[14] = butterfly(x[12], x[14], psi[3], Q, mredParams)
x[13], x[15] = butterfly(x[13], x[15], psi[3], Q, mredParams)
}
} else {
for i := uint64(0); i < m; i = i + 8 {
psi := (*[8]uint64)(unsafe.Pointer(&nttPsi[m+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[2*i]))
x[0], x[1] = butterfly(x[0], x[1], psi[0], Q, mredParams)
x[2], x[3] = butterfly(x[2], x[3], psi[1], Q, mredParams)
x[4], x[5] = butterfly(x[4], x[5], psi[2], Q, mredParams)
x[6], x[7] = butterfly(x[6], x[7], psi[3], Q, mredParams)
x[8], x[9] = butterfly(x[8], x[9], psi[4], Q, mredParams)
x[10], x[11] = butterfly(x[10], x[11], psi[5], Q, mredParams)
x[12], x[13] = butterfly(x[12], x[13], psi[6], Q, mredParams)
x[14], x[15] = butterfly(x[14], x[15], psi[7], Q, mredParams)
}
}
}
NTTLazy(coeffsIn, coeffsOut, N, nttPsi, Q, mredParams, bredParams)
// Finish with an exact reduction
for i := uint64(0); i < N; i = i + 8 {
@@ -184,11 +94,319 @@ func NTT(coeffsIn, coeffsOut []uint64, N uint64, nttPsi []uint64, Q, mredParams
x[6] = BRedAdd(x[6], Q, bredParams)
x[7] = BRedAdd(x[7], Q, bredParams)
}
}
// NTTLazy computes the NTT on the input coefficients using the input parameters with output values in the range [0, 2q-1].
func NTTLazy(coeffsIn, coeffsOut []uint64, N uint64, nttPsi []uint64, Q, QInv uint64, bredParams []uint64) {
var j1, j2, t uint64
var F uint64
QBitLen := uint64(bits.Len64(Q))
twoQ := 2 * Q
// Copy the result of the first round of butterflies on p2 with approximate reduction
t = N >> 1
F = nttPsi[1]
var V uint64
for j := uint64(0); j <= t-1; j = j + 8 {
xin := (*[8]uint64)(unsafe.Pointer(&coeffsIn[j]))
yin := (*[8]uint64)(unsafe.Pointer(&coeffsIn[j+t]))
xout := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j]))
yout := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j+t]))
V = MRedConstant(yin[0], F, Q, QInv)
xout[0], yout[0] = xin[0]+V, xin[0]+twoQ-V
V = MRedConstant(yin[1], F, Q, QInv)
xout[1], yout[1] = xin[1]+V, xin[1]+twoQ-V
V = MRedConstant(yin[2], F, Q, QInv)
xout[2], yout[2] = xin[2]+V, xin[2]+twoQ-V
V = MRedConstant(yin[3], F, Q, QInv)
xout[3], yout[3] = xin[3]+V, xin[3]+twoQ-V
V = MRedConstant(yin[4], F, Q, QInv)
xout[4], yout[4] = xin[4]+V, xin[4]+twoQ-V
V = MRedConstant(yin[5], F, Q, QInv)
xout[5], yout[5] = xin[5]+V, xin[5]+twoQ-V
V = MRedConstant(yin[6], F, Q, QInv)
xout[6], yout[6] = xin[6]+V, xin[6]+twoQ-V
V = MRedConstant(yin[7], F, Q, QInv)
xout[7], yout[7] = xin[7]+V, xin[7]+twoQ-V
}
// Continue the rest of the second to the n-1 butterflies on p2 with approximate reduction
var reduce bool
for m := uint64(2); m < N; m <<= 1 {
reduce = (bits.Len64(m)&1 == 1)
t >>= 1
if t >= 8 {
for i := uint64(0); i < m; i++ {
j1 = (i * t) << 1
j2 = j1 + t - 1
F = nttPsi[m+i]
if reduce {
for j := j1; j <= j2; j = j + 8 {
x := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j]))
y := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j+t]))
// input := (x[i] < 8q, y[i] < 8q)
// output := (x[i] < 2q, y[i] < 2q)
x[0], y[0] = butterfly(x[0], y[0], F, twoQ, QBitLen, Q, QInv)
x[1], y[1] = butterfly(x[1], y[1], F, twoQ, QBitLen, Q, QInv)
x[2], y[2] = butterfly(x[2], y[2], F, twoQ, QBitLen, Q, QInv)
x[3], y[3] = butterfly(x[3], y[3], F, twoQ, QBitLen, Q, QInv)
x[4], y[4] = butterfly(x[4], y[4], F, twoQ, QBitLen, Q, QInv)
x[5], y[5] = butterfly(x[5], y[5], F, twoQ, QBitLen, Q, QInv)
x[6], y[6] = butterfly(x[6], y[6], F, twoQ, QBitLen, Q, QInv)
x[7], y[7] = butterfly(x[7], y[7], F, twoQ, QBitLen, Q, QInv)
}
} else {
for j := j1; j <= j2; j = j + 8 {
x := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j]))
y := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j+t]))
// input := (x[i] < 2q, y[i] < 2q)
// output := (x[i] < 4q, y[i] < 4q)
V = MRedConstant(y[0], F, Q, QInv)
x[0], y[0] = x[0]+V, x[0]+twoQ-V
V = MRedConstant(y[1], F, Q, QInv)
x[1], y[1] = x[1]+V, x[1]+twoQ-V
V = MRedConstant(y[2], F, Q, QInv)
x[2], y[2] = x[2]+V, x[2]+twoQ-V
V = MRedConstant(y[3], F, Q, QInv)
x[3], y[3] = x[3]+V, x[3]+twoQ-V
V = MRedConstant(y[4], F, Q, QInv)
x[4], y[4] = x[4]+V, x[4]+twoQ-V
V = MRedConstant(y[5], F, Q, QInv)
x[5], y[5] = x[5]+V, x[5]+twoQ-V
V = MRedConstant(y[6], F, Q, QInv)
x[6], y[6] = x[6]+V, x[6]+twoQ-V
V = MRedConstant(y[7], F, Q, QInv)
x[7], y[7] = x[7]+V, x[7]+twoQ-V
}
}
}
} else if t == 4 {
if reduce {
for i := uint64(0); i < m; i = i + 2 {
j1 = (i * t) << 1
psi := (*[2]uint64)(unsafe.Pointer(&nttPsi[m+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[j1]))
// input := (x[i] < 4q, y[i] < 4q)
// output := (x[i] < 2q, y[i] < 2q)
x[0], x[4] = butterfly(x[0], x[4], psi[0], twoQ, QBitLen, Q, QInv)
x[1], x[5] = butterfly(x[1], x[5], psi[0], twoQ, QBitLen, Q, QInv)
x[2], x[6] = butterfly(x[2], x[6], psi[0], twoQ, QBitLen, Q, QInv)
x[3], x[7] = butterfly(x[3], x[7], psi[0], twoQ, QBitLen, Q, QInv)
x[8], x[12] = butterfly(x[8], x[12], psi[1], twoQ, QBitLen, Q, QInv)
x[9], x[13] = butterfly(x[9], x[13], psi[1], twoQ, QBitLen, Q, QInv)
x[10], x[14] = butterfly(x[10], x[14], psi[1], twoQ, QBitLen, Q, QInv)
x[11], x[15] = butterfly(x[11], x[15], psi[1], twoQ, QBitLen, Q, QInv)
}
} else {
for i := uint64(0); i < m; i = i + 2 {
j1 = (i * t) << 1
psi := (*[2]uint64)(unsafe.Pointer(&nttPsi[m+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[j1]))
// input := (x[i] < 2q, y[i] < 2q)
// output := (x[i] < 8q, y[i] < 8q)
V = MRedConstant(x[4], psi[0], Q, QInv)
x[0], x[4] = x[0]+V, x[0]+twoQ-V
V = MRedConstant(x[5], psi[0], Q, QInv)
x[1], x[5] = x[1]+V, x[1]+twoQ-V
V = MRedConstant(x[6], psi[0], Q, QInv)
x[2], x[6] = x[2]+V, x[2]+twoQ-V
V = MRedConstant(x[7], psi[0], Q, QInv)
x[3], x[7] = x[3]+V, x[3]+twoQ-V
V = MRedConstant(x[12], psi[1], Q, QInv)
x[8], x[12] = x[8]+V, x[8]+twoQ-V
V = MRedConstant(x[13], psi[1], Q, QInv)
x[9], x[13] = x[9]+V, x[9]+twoQ-V
V = MRedConstant(x[14], psi[1], Q, QInv)
x[10], x[14] = x[10]+V, x[10]+twoQ-V
V = MRedConstant(x[15], psi[1], Q, QInv)
x[11], x[15] = x[11]+V, x[11]+twoQ-V
}
}
} else if t == 2 {
if reduce {
for i := uint64(0); i < m; i = i + 4 {
j1 = (i * t) << 1
psi := (*[4]uint64)(unsafe.Pointer(&nttPsi[m+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[j1]))
// input := (x[i] < 8q, y[i] < 8q)
// output := (x[i] < 2q, y[i] < 2q)
x[0], x[2] = butterfly(x[0], x[2], psi[0], twoQ, QBitLen, Q, QInv)
x[1], x[3] = butterfly(x[1], x[3], psi[0], twoQ, QBitLen, Q, QInv)
x[4], x[6] = butterfly(x[4], x[6], psi[1], twoQ, QBitLen, Q, QInv)
x[5], x[7] = butterfly(x[5], x[7], psi[1], twoQ, QBitLen, Q, QInv)
x[8], x[10] = butterfly(x[8], x[10], psi[2], twoQ, QBitLen, Q, QInv)
x[9], x[11] = butterfly(x[9], x[11], psi[2], twoQ, QBitLen, Q, QInv)
x[12], x[14] = butterfly(x[12], x[14], psi[3], twoQ, QBitLen, Q, QInv)
x[13], x[15] = butterfly(x[13], x[15], psi[3], twoQ, QBitLen, Q, QInv)
}
} else {
for i := uint64(0); i < m; i = i + 4 {
j1 = (i * t) << 1
psi := (*[4]uint64)(unsafe.Pointer(&nttPsi[m+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[j1]))
// input := (x[i] < 2q, y[i] < 2q)
// output := (x[i] < 8q, y[i] < 8q)
V = MRedConstant(x[2], psi[0], Q, QInv)
x[0], x[2] = x[0]+V, x[0]+twoQ-V
V = MRedConstant(x[3], psi[0], Q, QInv)
x[1], x[3] = x[1]+V, x[1]+twoQ-V
V = MRedConstant(x[6], psi[1], Q, QInv)
x[4], x[6] = x[4]+V, x[4]+twoQ-V
V = MRedConstant(x[7], psi[1], Q, QInv)
x[5], x[7] = x[5]+V, x[5]+twoQ-V
V = MRedConstant(x[10], psi[2], Q, QInv)
x[8], x[10] = x[8]+V, x[8]+twoQ-V
V = MRedConstant(x[11], psi[2], Q, QInv)
x[9], x[11] = x[9]+V, x[9]+twoQ-V
V = MRedConstant(x[14], psi[3], Q, QInv)
x[12], x[14] = x[12]+V, x[12]+twoQ-V
V = MRedConstant(x[15], psi[3], Q, QInv)
x[13], x[15] = x[13]+V, x[13]+twoQ-V
}
}
} else {
if reduce {
for i := uint64(0); i < m; i = i + 8 {
psi := (*[8]uint64)(unsafe.Pointer(&nttPsi[m+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[2*i]))
// input := (x[i] < 8q, y[i] < 8q)
// output := (x[i] < 2q, y[i] < 2q)
x[0], x[1] = butterfly(x[0], x[1], psi[0], twoQ, QBitLen, Q, QInv)
x[2], x[3] = butterfly(x[2], x[3], psi[1], twoQ, QBitLen, Q, QInv)
x[4], x[5] = butterfly(x[4], x[5], psi[2], twoQ, QBitLen, Q, QInv)
x[6], x[7] = butterfly(x[6], x[7], psi[3], twoQ, QBitLen, Q, QInv)
x[8], x[9] = butterfly(x[8], x[9], psi[4], twoQ, QBitLen, Q, QInv)
x[10], x[11] = butterfly(x[10], x[11], psi[5], twoQ, QBitLen, Q, QInv)
x[12], x[13] = butterfly(x[12], x[13], psi[6], twoQ, QBitLen, Q, QInv)
x[14], x[15] = butterfly(x[14], x[15], psi[7], twoQ, QBitLen, Q, QInv)
}
} else {
for i := uint64(0); i < m; i = i + 8 {
psi := (*[8]uint64)(unsafe.Pointer(&nttPsi[m+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[2*i]))
// input := (x[i] < 2q, y[i] < 2q)
// output := (x[i] < 4q, y[i] < 4q)
V = MRedConstant(x[1], psi[0], Q, QInv)
x[0], x[1] = x[0]+V, x[0]+twoQ-V
V = MRedConstant(x[3], psi[1], Q, QInv)
x[2], x[3] = x[2]+V, x[2]+twoQ-V
V = MRedConstant(x[5], psi[2], Q, QInv)
x[4], x[5] = x[4]+V, x[4]+twoQ-V
V = MRedConstant(x[7], psi[3], Q, QInv)
x[6], x[7] = x[6]+V, x[6]+twoQ-V
V = MRedConstant(x[9], psi[4], Q, QInv)
x[8], x[9] = x[8]+V, x[8]+twoQ-V
V = MRedConstant(x[11], psi[5], Q, QInv)
x[10], x[11] = x[10]+V, x[10]+twoQ-V
V = MRedConstant(x[13], psi[6], Q, QInv)
x[12], x[13] = x[12]+V, x[12]+twoQ-V
V = MRedConstant(x[15], psi[7], Q, QInv)
x[14], x[15] = x[14]+V, x[14]+twoQ-V
}
}
}
}
}
// invbutterfly computes X, Y = U + V, (U - V) * Psi mod Q.
func invbutterfly(U, V, Psi, twoQ, Q, Qinv uint64) (X, Y uint64) {
X = U + V
if X >= twoQ {
X -= twoQ
}
Y = MRedConstant(U+twoQ-V, Psi, Q, Qinv) // At the moment it is not possible to use MRedConstant if Q > 61 bits
return
}
// InvNTT computes the InvNTT transformation on the input coefficients using the input parameters.
func InvNTT(coeffsIn, coeffsOut []uint64, N uint64, nttPsiInv []uint64, nttNInv, Q, mredParams uint64) {
func InvNTT(coeffsIn, coeffsOut []uint64, N uint64, nttPsiInv []uint64, nttNInv, Q, QInv uint64) {
var j1, j2, h, t uint64
var F uint64
@@ -196,6 +414,7 @@ func InvNTT(coeffsIn, coeffsOut []uint64, N uint64, nttPsiInv []uint64, nttNInv,
// Copy the result of the first round of butterflies on p2 with approximate reduction
t = 1
h = N >> 1
twoQ := Q << 1
for i := uint64(0); i < h; i = i + 8 {
@@ -203,14 +422,14 @@ func InvNTT(coeffsIn, coeffsOut []uint64, N uint64, nttPsiInv []uint64, nttNInv,
xin := (*[16]uint64)(unsafe.Pointer(&coeffsIn[2*i]))
xout := (*[16]uint64)(unsafe.Pointer(&coeffsOut[2*i]))
xout[0], xout[1] = invbutterfly(xin[0], xin[1], psi[0], Q, mredParams)
xout[2], xout[3] = invbutterfly(xin[2], xin[3], psi[1], Q, mredParams)
xout[4], xout[5] = invbutterfly(xin[4], xin[5], psi[2], Q, mredParams)
xout[6], xout[7] = invbutterfly(xin[6], xin[7], psi[3], Q, mredParams)
xout[8], xout[9] = invbutterfly(xin[8], xin[9], psi[4], Q, mredParams)
xout[10], xout[11] = invbutterfly(xin[10], xin[11], psi[5], Q, mredParams)
xout[12], xout[13] = invbutterfly(xin[12], xin[13], psi[6], Q, mredParams)
xout[14], xout[15] = invbutterfly(xin[14], xin[15], psi[7], Q, mredParams)
xout[0], xout[1] = invbutterfly(xin[0], xin[1], psi[0], twoQ, Q, QInv)
xout[2], xout[3] = invbutterfly(xin[2], xin[3], psi[1], twoQ, Q, QInv)
xout[4], xout[5] = invbutterfly(xin[4], xin[5], psi[2], twoQ, Q, QInv)
xout[6], xout[7] = invbutterfly(xin[6], xin[7], psi[3], twoQ, Q, QInv)
xout[8], xout[9] = invbutterfly(xin[8], xin[9], psi[4], twoQ, Q, QInv)
xout[10], xout[11] = invbutterfly(xin[10], xin[11], psi[5], twoQ, Q, QInv)
xout[12], xout[13] = invbutterfly(xin[12], xin[13], psi[6], twoQ, Q, QInv)
xout[14], xout[15] = invbutterfly(xin[14], xin[15], psi[7], twoQ, Q, QInv)
}
// Continue the rest of the second to the n-1 butterflies on p2 with approximate reduction
@@ -233,14 +452,14 @@ func InvNTT(coeffsIn, coeffsOut []uint64, N uint64, nttPsiInv []uint64, nttNInv,
x := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j]))
y := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j+t]))
x[0], y[0] = invbutterfly(x[0], y[0], F, Q, mredParams)
x[1], y[1] = invbutterfly(x[1], y[1], F, Q, mredParams)
x[2], y[2] = invbutterfly(x[2], y[2], F, Q, mredParams)
x[3], y[3] = invbutterfly(x[3], y[3], F, Q, mredParams)
x[4], y[4] = invbutterfly(x[4], y[4], F, Q, mredParams)
x[5], y[5] = invbutterfly(x[5], y[5], F, Q, mredParams)
x[6], y[6] = invbutterfly(x[6], y[6], F, Q, mredParams)
x[7], y[7] = invbutterfly(x[7], y[7], F, Q, mredParams)
x[0], y[0] = invbutterfly(x[0], y[0], F, twoQ, Q, QInv)
x[1], y[1] = invbutterfly(x[1], y[1], F, twoQ, Q, QInv)
x[2], y[2] = invbutterfly(x[2], y[2], F, twoQ, Q, QInv)
x[3], y[3] = invbutterfly(x[3], y[3], F, twoQ, Q, QInv)
x[4], y[4] = invbutterfly(x[4], y[4], F, twoQ, Q, QInv)
x[5], y[5] = invbutterfly(x[5], y[5], F, twoQ, Q, QInv)
x[6], y[6] = invbutterfly(x[6], y[6], F, twoQ, Q, QInv)
x[7], y[7] = invbutterfly(x[7], y[7], F, twoQ, Q, QInv)
}
j1 = j1 + (t << 1)
@@ -253,14 +472,14 @@ func InvNTT(coeffsIn, coeffsOut []uint64, N uint64, nttPsiInv []uint64, nttNInv,
psi := (*[2]uint64)(unsafe.Pointer(&nttPsiInv[h+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[j1]))
x[0], x[4] = invbutterfly(x[0], x[4], psi[0], Q, mredParams)
x[1], x[5] = invbutterfly(x[1], x[5], psi[0], Q, mredParams)
x[2], x[6] = invbutterfly(x[2], x[6], psi[0], Q, mredParams)
x[3], x[7] = invbutterfly(x[3], x[7], psi[0], Q, mredParams)
x[8], x[12] = invbutterfly(x[8], x[12], psi[1], Q, mredParams)
x[9], x[13] = invbutterfly(x[9], x[13], psi[1], Q, mredParams)
x[10], x[14] = invbutterfly(x[10], x[14], psi[1], Q, mredParams)
x[11], x[15] = invbutterfly(x[11], x[15], psi[1], Q, mredParams)
x[0], x[4] = invbutterfly(x[0], x[4], psi[0], twoQ, Q, QInv)
x[1], x[5] = invbutterfly(x[1], x[5], psi[0], twoQ, Q, QInv)
x[2], x[6] = invbutterfly(x[2], x[6], psi[0], twoQ, Q, QInv)
x[3], x[7] = invbutterfly(x[3], x[7], psi[0], twoQ, Q, QInv)
x[8], x[12] = invbutterfly(x[8], x[12], psi[1], twoQ, Q, QInv)
x[9], x[13] = invbutterfly(x[9], x[13], psi[1], twoQ, Q, QInv)
x[10], x[14] = invbutterfly(x[10], x[14], psi[1], twoQ, Q, QInv)
x[11], x[15] = invbutterfly(x[11], x[15], psi[1], twoQ, Q, QInv)
j1 = j1 + (t << 2)
}
@@ -272,14 +491,14 @@ func InvNTT(coeffsIn, coeffsOut []uint64, N uint64, nttPsiInv []uint64, nttNInv,
psi := (*[4]uint64)(unsafe.Pointer(&nttPsiInv[h+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[j1]))
x[0], x[2] = invbutterfly(x[0], x[2], psi[0], Q, mredParams)
x[1], x[3] = invbutterfly(x[1], x[3], psi[0], Q, mredParams)
x[4], x[6] = invbutterfly(x[4], x[6], psi[1], Q, mredParams)
x[5], x[7] = invbutterfly(x[5], x[7], psi[1], Q, mredParams)
x[8], x[10] = invbutterfly(x[8], x[10], psi[2], Q, mredParams)
x[9], x[11] = invbutterfly(x[9], x[11], psi[2], Q, mredParams)
x[12], x[14] = invbutterfly(x[12], x[14], psi[3], Q, mredParams)
x[13], x[15] = invbutterfly(x[13], x[15], psi[3], Q, mredParams)
x[0], x[2] = invbutterfly(x[0], x[2], psi[0], twoQ, Q, QInv)
x[1], x[3] = invbutterfly(x[1], x[3], psi[0], twoQ, Q, QInv)
x[4], x[6] = invbutterfly(x[4], x[6], psi[1], twoQ, Q, QInv)
x[5], x[7] = invbutterfly(x[5], x[7], psi[1], twoQ, Q, QInv)
x[8], x[10] = invbutterfly(x[8], x[10], psi[2], twoQ, Q, QInv)
x[9], x[11] = invbutterfly(x[9], x[11], psi[2], twoQ, Q, QInv)
x[12], x[14] = invbutterfly(x[12], x[14], psi[3], twoQ, Q, QInv)
x[13], x[15] = invbutterfly(x[13], x[15], psi[3], twoQ, Q, QInv)
j1 = j1 + (t << 3)
}
@@ -293,14 +512,133 @@ func InvNTT(coeffsIn, coeffsOut []uint64, N uint64, nttPsiInv []uint64, nttNInv,
x := (*[8]uint64)(unsafe.Pointer(&coeffsOut[i]))
x[0] = MRed(x[0], nttNInv, Q, mredParams)
x[1] = MRed(x[1], nttNInv, Q, mredParams)
x[2] = MRed(x[2], nttNInv, Q, mredParams)
x[3] = MRed(x[3], nttNInv, Q, mredParams)
x[4] = MRed(x[4], nttNInv, Q, mredParams)
x[5] = MRed(x[5], nttNInv, Q, mredParams)
x[6] = MRed(x[6], nttNInv, Q, mredParams)
x[7] = MRed(x[7], nttNInv, Q, mredParams)
x[0] = MRed(x[0], nttNInv, Q, QInv)
x[1] = MRed(x[1], nttNInv, Q, QInv)
x[2] = MRed(x[2], nttNInv, Q, QInv)
x[3] = MRed(x[3], nttNInv, Q, QInv)
x[4] = MRed(x[4], nttNInv, Q, QInv)
x[5] = MRed(x[5], nttNInv, Q, QInv)
x[6] = MRed(x[6], nttNInv, Q, QInv)
x[7] = MRed(x[7], nttNInv, Q, QInv)
}
}
// InvNTTLazy computes the InvNTT transformation on the input coefficients using the input parameters with output values in the range [0, 2q-1].
func InvNTTLazy(coeffsIn, coeffsOut []uint64, N uint64, nttPsiInv []uint64, nttNInv, Q, mredParams uint64) {
var j1, j2, h, t uint64
var F uint64
// Copy the result of the first round of butterflies on p2 with approximate reduction
t = 1
h = N >> 1
twoQ := Q << 1
for i := uint64(0); i < h; i = i + 8 {
psi := (*[8]uint64)(unsafe.Pointer(&nttPsiInv[h+i]))
xin := (*[16]uint64)(unsafe.Pointer(&coeffsIn[2*i]))
xout := (*[16]uint64)(unsafe.Pointer(&coeffsOut[2*i]))
xout[0], xout[1] = invbutterfly(xin[0], xin[1], psi[0], twoQ, Q, mredParams)
xout[2], xout[3] = invbutterfly(xin[2], xin[3], psi[1], twoQ, Q, mredParams)
xout[4], xout[5] = invbutterfly(xin[4], xin[5], psi[2], twoQ, Q, mredParams)
xout[6], xout[7] = invbutterfly(xin[6], xin[7], psi[3], twoQ, Q, mredParams)
xout[8], xout[9] = invbutterfly(xin[8], xin[9], psi[4], twoQ, Q, mredParams)
xout[10], xout[11] = invbutterfly(xin[10], xin[11], psi[5], twoQ, Q, mredParams)
xout[12], xout[13] = invbutterfly(xin[12], xin[13], psi[6], twoQ, Q, mredParams)
xout[14], xout[15] = invbutterfly(xin[14], xin[15], psi[7], twoQ, Q, mredParams)
}
// Continue the rest of the second to the n-1 butterflies on p2 with approximate reduction
t <<= 1
for m := N >> 1; m > 1; m >>= 1 {
j1 = 0
h = m >> 1
if t >= 8 {
for i := uint64(0); i < h; i++ {
j2 = j1 + t - 1
F = nttPsiInv[h+i]
for j := j1; j <= j2; j = j + 8 {
x := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j]))
y := (*[8]uint64)(unsafe.Pointer(&coeffsOut[j+t]))
x[0], y[0] = invbutterfly(x[0], y[0], F, twoQ, Q, mredParams)
x[1], y[1] = invbutterfly(x[1], y[1], F, twoQ, Q, mredParams)
x[2], y[2] = invbutterfly(x[2], y[2], F, twoQ, Q, mredParams)
x[3], y[3] = invbutterfly(x[3], y[3], F, twoQ, Q, mredParams)
x[4], y[4] = invbutterfly(x[4], y[4], F, twoQ, Q, mredParams)
x[5], y[5] = invbutterfly(x[5], y[5], F, twoQ, Q, mredParams)
x[6], y[6] = invbutterfly(x[6], y[6], F, twoQ, Q, mredParams)
x[7], y[7] = invbutterfly(x[7], y[7], F, twoQ, Q, mredParams)
}
j1 = j1 + (t << 1)
}
} else if t == 4 {
for i := uint64(0); i < h; i = i + 2 {
psi := (*[2]uint64)(unsafe.Pointer(&nttPsiInv[h+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[j1]))
x[0], x[4] = invbutterfly(x[0], x[4], psi[0], twoQ, Q, mredParams)
x[1], x[5] = invbutterfly(x[1], x[5], psi[0], twoQ, Q, mredParams)
x[2], x[6] = invbutterfly(x[2], x[6], psi[0], twoQ, Q, mredParams)
x[3], x[7] = invbutterfly(x[3], x[7], psi[0], twoQ, Q, mredParams)
x[8], x[12] = invbutterfly(x[8], x[12], psi[1], twoQ, Q, mredParams)
x[9], x[13] = invbutterfly(x[9], x[13], psi[1], twoQ, Q, mredParams)
x[10], x[14] = invbutterfly(x[10], x[14], psi[1], twoQ, Q, mredParams)
x[11], x[15] = invbutterfly(x[11], x[15], psi[1], twoQ, Q, mredParams)
j1 = j1 + (t << 2)
}
} else {
for i := uint64(0); i < h; i = i + 4 {
psi := (*[4]uint64)(unsafe.Pointer(&nttPsiInv[h+i]))
x := (*[16]uint64)(unsafe.Pointer(&coeffsOut[j1]))
x[0], x[2] = invbutterfly(x[0], x[2], psi[0], twoQ, Q, mredParams)
x[1], x[3] = invbutterfly(x[1], x[3], psi[0], twoQ, Q, mredParams)
x[4], x[6] = invbutterfly(x[4], x[6], psi[1], twoQ, Q, mredParams)
x[5], x[7] = invbutterfly(x[5], x[7], psi[1], twoQ, Q, mredParams)
x[8], x[10] = invbutterfly(x[8], x[10], psi[2], twoQ, Q, mredParams)
x[9], x[11] = invbutterfly(x[9], x[11], psi[2], twoQ, Q, mredParams)
x[12], x[14] = invbutterfly(x[12], x[14], psi[3], twoQ, Q, mredParams)
x[13], x[15] = invbutterfly(x[13], x[15], psi[3], twoQ, Q, mredParams)
j1 = j1 + (t << 3)
}
}
t <<= 1
}
// Finish with an exact reduction
for i := uint64(0); i < N; i = i + 8 {
x := (*[8]uint64)(unsafe.Pointer(&coeffsOut[i]))
x[0] = MRedConstant(x[0], nttNInv, Q, mredParams)
x[1] = MRedConstant(x[1], nttNInv, Q, mredParams)
x[2] = MRedConstant(x[2], nttNInv, Q, mredParams)
x[3] = MRedConstant(x[3], nttNInv, Q, mredParams)
x[4] = MRedConstant(x[4], nttNInv, Q, mredParams)
x[5] = MRedConstant(x[5], nttNInv, Q, mredParams)
x[6] = MRedConstant(x[6], nttNInv, Q, mredParams)
x[7] = MRedConstant(x[7], nttNInv, Q, mredParams)
}
}

View File

@@ -258,6 +258,29 @@ func (r *Ring) Reduce(p1, p2 *Poly) {
}
}
// ReduceConstant applies a modular reduction on the coefficients of p1 and writes the result on p2.
// Return values in [0, 2q-1]
func (r *Ring) ReduceConstant(p1, p2 *Poly) {
for i, qi := range r.Modulus {
p1tmp, p2tmp := p1.Coeffs[i], p2.Coeffs[i]
bredParams := r.BredParams[i]
for j := uint64(0); j < r.N; j = j + 8 {
x := (*[8]uint64)(unsafe.Pointer(&p1tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p2tmp[j]))
z[0] = BRedAddConstant(x[0], qi, bredParams)
z[1] = BRedAddConstant(x[1], qi, bredParams)
z[2] = BRedAddConstant(x[2], qi, bredParams)
z[3] = BRedAddConstant(x[3], qi, bredParams)
z[4] = BRedAddConstant(x[4], qi, bredParams)
z[5] = BRedAddConstant(x[5], qi, bredParams)
z[6] = BRedAddConstant(x[6], qi, bredParams)
z[7] = BRedAddConstant(x[7], qi, bredParams)
}
}
}
// ReduceLvl applies a modular reduction on the coefficients of p1
// for the moduli from q_0 up to q_level and writes the result on p2.
func (r *Ring) ReduceLvl(level uint64, p1, p2 *Poly) {
@@ -282,6 +305,31 @@ func (r *Ring) ReduceLvl(level uint64, p1, p2 *Poly) {
}
}
// ReduceConstantLvl applies a modular reduction on the coefficients of p1
// for the moduli from q_0 up to q_level and writes the result on p2.
// Return values in [0, 2q-1]
func (r *Ring) ReduceConstantLvl(level uint64, p1, p2 *Poly) {
for i := uint64(0); i < level+1; i++ {
qi := r.Modulus[i]
p1tmp, p2tmp := p1.Coeffs[i], p2.Coeffs[i]
bredParams := r.BredParams[i]
for j := uint64(0); j < r.N; j = j + 8 {
x := (*[8]uint64)(unsafe.Pointer(&p1tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p2tmp[j]))
z[0] = BRedAddConstant(x[0], qi, bredParams)
z[1] = BRedAddConstant(x[1], qi, bredParams)
z[2] = BRedAddConstant(x[2], qi, bredParams)
z[3] = BRedAddConstant(x[3], qi, bredParams)
z[4] = BRedAddConstant(x[4], qi, bredParams)
z[5] = BRedAddConstant(x[5], qi, bredParams)
z[6] = BRedAddConstant(x[6], qi, bredParams)
z[7] = BRedAddConstant(x[7], qi, bredParams)
}
}
}
// Mod applies a modular reduction by m on the coefficients of p1 and writes the result on p2.
func (r *Ring) Mod(p1 *Poly, m uint64, p2 *Poly) {
bredParams := BRedParams(m)
@@ -456,6 +504,31 @@ func (r *Ring) MulCoeffsMontgomeryLvl(level uint64, p1, p2, p3 *Poly) {
}
}
// MulCoeffsMontgomeryConstantLvl multiplies p1 by p2 coefficient-wise with a Montgomery
// modular reduction for the moduli from q_0 up to q_level and returns the result on p3.
func (r *Ring) MulCoeffsMontgomeryConstantLvl(level uint64, p1, p2, p3 *Poly) {
for i := uint64(0); i < level+1; i++ {
qi := r.Modulus[i]
p1tmp, p2tmp, p3tmp := p1.Coeffs[i], p2.Coeffs[i], p3.Coeffs[i]
mredParams := r.MredParams[i]
for j := uint64(0); j < r.N; j = j + 8 {
x := (*[8]uint64)(unsafe.Pointer(&p1tmp[j]))
y := (*[8]uint64)(unsafe.Pointer(&p2tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p3tmp[j]))
z[0] = MRedConstant(x[0], y[0], qi, mredParams)
z[1] = MRedConstant(x[1], y[1], qi, mredParams)
z[2] = MRedConstant(x[2], y[2], qi, mredParams)
z[3] = MRedConstant(x[3], y[3], qi, mredParams)
z[4] = MRedConstant(x[4], y[4], qi, mredParams)
z[5] = MRedConstant(x[5], y[5], qi, mredParams)
z[6] = MRedConstant(x[6], y[6], qi, mredParams)
z[7] = MRedConstant(x[7], y[7], qi, mredParams)
}
}
}
// MulCoeffsMontgomeryAndAdd multiplies p1 by p2 coefficient-wise with a
// Montgomery modular reduction and adds the result to p3.
func (r *Ring) MulCoeffsMontgomeryAndAdd(p1, p2, p3 *Poly) {
@@ -529,6 +602,31 @@ func (r *Ring) MulCoeffsMontgomeryAndAddNoMod(p1, p2, p3 *Poly) {
}
}
// MulCoeffsMontgomeryConstantAndAddNoMod multiplies p1 by p2 coefficient-wise with a
// Montgomery modular reduction and adds the result to p3 without modular reduction.
// Return values in [0, 3q-1]
func (r *Ring) MulCoeffsMontgomeryConstantAndAddNoMod(p1, p2, p3 *Poly) {
for i, qi := range r.Modulus {
p1tmp, p2tmp, p3tmp := p1.Coeffs[i], p2.Coeffs[i], p3.Coeffs[i]
mredParams := r.MredParams[i]
for j := uint64(0); j < r.N; j = j + 8 {
x := (*[8]uint64)(unsafe.Pointer(&p1tmp[j]))
y := (*[8]uint64)(unsafe.Pointer(&p2tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p3tmp[j]))
z[0] += MRedConstant(x[0], y[0], qi, mredParams)
z[1] += MRedConstant(x[1], y[1], qi, mredParams)
z[2] += MRedConstant(x[2], y[2], qi, mredParams)
z[3] += MRedConstant(x[3], y[3], qi, mredParams)
z[4] += MRedConstant(x[4], y[4], qi, mredParams)
z[5] += MRedConstant(x[5], y[5], qi, mredParams)
z[6] += MRedConstant(x[6], y[6], qi, mredParams)
z[7] += MRedConstant(x[7], y[7], qi, mredParams)
}
}
}
// MulCoeffsMontgomeryAndAddNoModLvl multiplies p1 by p2 coefficient-wise with a Montgomery modular
// reduction for the moduli from q_0 up to q_level and adds the result to p3 without modular reduction.
func (r *Ring) MulCoeffsMontgomeryAndAddNoModLvl(level uint64, p1, p2, p3 *Poly) {
@@ -556,6 +654,7 @@ func (r *Ring) MulCoeffsMontgomeryAndAddNoModLvl(level uint64, p1, p2, p3 *Poly)
// MulCoeffsMontgomeryConstantAndAddNoModLvl multiplies p1 by p2 coefficient-wise with a constant-time Montgomery
// modular reduction for the moduli from q_0 up to q_level and adds the result to p3 without modular reduction.
// Return values in [0, 3q-1]
func (r *Ring) MulCoeffsMontgomeryConstantAndAddNoModLvl(level uint64, p1, p2, p3 *Poly) {
for i := uint64(0); i < level+1; i++ {
qi := r.Modulus[i]

View File

@@ -70,8 +70,8 @@ func (rnss *RNSScaler) DivByQOverTRounded(p1Q, p2T *Poly) {
p2tmp := p2T.Coeffs[0]
p3tmp := rnss.polypoolT.Coeffs[0]
mredParams := rnss.mredParamsT
qInv := rnss.qInv
qHalfModT := rnss.qHalfModT
qInv := T - rnss.qInv
qHalfModT := T - rnss.qHalfModT
// Multiply P_{Q} by t and extend the basis from P_{Q} to t*(P_{Q}||P_{t})
// Since the coefficients of P_{t} are multiplied by t, they are all zero,
@@ -90,14 +90,14 @@ func (rnss *RNSScaler) DivByQOverTRounded(p1Q, p2T *Poly) {
x := (*[8]uint64)(unsafe.Pointer(&p3tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p2tmp[j]))
z[0] = MRed(qHalfModT+T-x[0], qInv, T, mredParams)
z[1] = MRed(qHalfModT+T-x[1], qInv, T, mredParams)
z[2] = MRed(qHalfModT+T-x[2], qInv, T, mredParams)
z[3] = MRed(qHalfModT+T-x[3], qInv, T, mredParams)
z[4] = MRed(qHalfModT+T-x[4], qInv, T, mredParams)
z[5] = MRed(qHalfModT+T-x[5], qInv, T, mredParams)
z[6] = MRed(qHalfModT+T-x[6], qInv, T, mredParams)
z[7] = MRed(qHalfModT+T-x[7], qInv, T, mredParams)
z[0] = MRed(qHalfModT+x[0], qInv, T, mredParams)
z[1] = MRed(qHalfModT+x[1], qInv, T, mredParams)
z[2] = MRed(qHalfModT+x[2], qInv, T, mredParams)
z[3] = MRed(qHalfModT+x[3], qInv, T, mredParams)
z[4] = MRed(qHalfModT+x[4], qInv, T, mredParams)
z[5] = MRed(qHalfModT+x[5], qInv, T, mredParams)
z[6] = MRed(qHalfModT+x[6], qInv, T, mredParams)
z[7] = MRed(qHalfModT+x[7], qInv, T, mredParams)
}
}
@@ -298,17 +298,18 @@ func (r *Ring) DivFloorByLastModulusNTT(p0 *Poly) {
pTmp := make([]uint64, r.N)
InvNTT(p0.Coeffs[level], p0.Coeffs[level], r.N, r.NttPsiInv[level], r.NttNInv[level], r.Modulus[level], r.MredParams[level])
InvNTTLazy(p0.Coeffs[level], p0.Coeffs[level], r.N, r.NttPsiInv[level], r.NttNInv[level], r.Modulus[level], r.MredParams[level])
for i := 0; i < level; i++ {
NTT(p0.Coeffs[level], pTmp, r.N, r.NttPsi[i], r.Modulus[i], r.MredParams[i], r.BredParams[i])
NTTLazy(p0.Coeffs[level], pTmp, r.N, r.NttPsi[i], r.Modulus[i], r.MredParams[i], r.BredParams[i])
p0tmp := p0.Coeffs[i]
qi := r.Modulus[i]
twoqi := qi << 1
mredParams := r.MredParams[i]
rescalParams := r.RescaleParams[level-1][i]
rescalParams := qi - r.RescaleParams[level-1][i]
// (x[i] - x[-1]) * InvQ
for j := uint64(0); j < r.N; j = j + 8 {
@@ -316,14 +317,14 @@ func (r *Ring) DivFloorByLastModulusNTT(p0 *Poly) {
x := (*[8]uint64)(unsafe.Pointer(&pTmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p0tmp[j]))
z[0] = MRed(z[0]+(qi-x[0]), rescalParams, qi, mredParams)
z[1] = MRed(z[1]+(qi-x[1]), rescalParams, qi, mredParams)
z[2] = MRed(z[2]+(qi-x[2]), rescalParams, qi, mredParams)
z[3] = MRed(z[3]+(qi-x[3]), rescalParams, qi, mredParams)
z[4] = MRed(z[4]+(qi-x[4]), rescalParams, qi, mredParams)
z[5] = MRed(z[5]+(qi-x[5]), rescalParams, qi, mredParams)
z[6] = MRed(z[6]+(qi-x[6]), rescalParams, qi, mredParams)
z[7] = MRed(z[7]+(qi-x[7]), rescalParams, qi, mredParams)
z[0] = MRed(twoqi-z[0]+x[0], rescalParams, qi, mredParams)
z[1] = MRed(twoqi-z[1]+x[1], rescalParams, qi, mredParams)
z[2] = MRed(twoqi-z[2]+x[2], rescalParams, qi, mredParams)
z[3] = MRed(twoqi-z[3]+x[3], rescalParams, qi, mredParams)
z[4] = MRed(twoqi-z[4]+x[4], rescalParams, qi, mredParams)
z[5] = MRed(twoqi-z[5]+x[5], rescalParams, qi, mredParams)
z[6] = MRed(twoqi-z[6]+x[6], rescalParams, qi, mredParams)
z[7] = MRed(twoqi-z[7]+x[7], rescalParams, qi, mredParams)
}
}
@@ -340,23 +341,24 @@ func (r *Ring) DivFloorByLastModulus(p0 *Poly) {
p0tmp := p0.Coeffs[level]
p1tmp := p0.Coeffs[i]
qi := r.Modulus[i]
twoqi := qi << 1
bredParams := r.BredParams[i]
mredParams := r.MredParams[i]
rescaleParams := r.RescaleParams[level-1][i]
rescaleParams := qi - r.RescaleParams[level-1][i]
// (x[i] - x[-1]) * InvQ
for j := uint64(0); j < r.N; j = j + 8 {
x := (*[8]uint64)(unsafe.Pointer(&p0tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p1tmp[j]))
z[0] = MRed(z[0]+(qi-BRedAdd(x[0], qi, bredParams)), rescaleParams, qi, mredParams)
z[1] = MRed(z[1]+(qi-BRedAdd(x[1], qi, bredParams)), rescaleParams, qi, mredParams)
z[2] = MRed(z[2]+(qi-BRedAdd(x[2], qi, bredParams)), rescaleParams, qi, mredParams)
z[3] = MRed(z[3]+(qi-BRedAdd(x[3], qi, bredParams)), rescaleParams, qi, mredParams)
z[4] = MRed(z[4]+(qi-BRedAdd(x[4], qi, bredParams)), rescaleParams, qi, mredParams)
z[5] = MRed(z[5]+(qi-BRedAdd(x[5], qi, bredParams)), rescaleParams, qi, mredParams)
z[6] = MRed(z[6]+(qi-BRedAdd(x[6], qi, bredParams)), rescaleParams, qi, mredParams)
z[7] = MRed(z[7]+(qi-BRedAdd(x[7], qi, bredParams)), rescaleParams, qi, mredParams)
z[0] = MRed(twoqi-z[0]+BRedAdd(x[0], qi, bredParams), rescaleParams, qi, mredParams)
z[1] = MRed(twoqi-z[1]+BRedAdd(x[1], qi, bredParams), rescaleParams, qi, mredParams)
z[2] = MRed(twoqi-z[2]+BRedAdd(x[2], qi, bredParams), rescaleParams, qi, mredParams)
z[3] = MRed(twoqi-z[3]+BRedAdd(x[3], qi, bredParams), rescaleParams, qi, mredParams)
z[4] = MRed(twoqi-z[4]+BRedAdd(x[4], qi, bredParams), rescaleParams, qi, mredParams)
z[5] = MRed(twoqi-z[5]+BRedAdd(x[5], qi, bredParams), rescaleParams, qi, mredParams)
z[6] = MRed(twoqi-z[6]+BRedAdd(x[6], qi, bredParams), rescaleParams, qi, mredParams)
z[7] = MRed(twoqi-z[7]+BRedAdd(x[7], qi, bredParams), rescaleParams, qi, mredParams)
}
}
@@ -386,37 +388,22 @@ func (r *Ring) DivRoundByLastModulusNTT(p0 *Poly) {
pTmp := make([]uint64, r.N)
InvNTT(p0.Coeffs[level], p0.Coeffs[level], r.N, r.NttPsiInv[level], r.NttNInv[level], r.Modulus[level], r.MredParams[level])
InvNTTLazy(p0.Coeffs[level], p0.Coeffs[level], r.N, r.NttPsiInv[level], r.NttNInv[level], r.Modulus[level], r.MredParams[level])
// Center by (p-1)/2
pHalf = (r.Modulus[level] - 1) >> 1
p0tmp := p0.Coeffs[level]
pj := r.Modulus[level]
for i := uint64(0); i < r.N; i = i + 8 {
z := (*[8]uint64)(unsafe.Pointer(&p0tmp[i]))
z[0] = CRed(z[0]+pHalf, pj)
z[1] = CRed(z[1]+pHalf, pj)
z[2] = CRed(z[2]+pHalf, pj)
z[3] = CRed(z[3]+pHalf, pj)
z[4] = CRed(z[4]+pHalf, pj)
z[5] = CRed(z[5]+pHalf, pj)
z[6] = CRed(z[6]+pHalf, pj)
z[7] = CRed(z[7]+pHalf, pj)
}
for i := 0; i < level; i++ {
p1tmp := p0.Coeffs[i]
qi := r.Modulus[i]
twoqi := qi << 1
bredParams := r.BredParams[i]
mredParams := r.MredParams[i]
rescaleParams := r.RescaleParams[level-1][i]
rescaleParams := qi - r.RescaleParams[level-1][i]
pHalfNegQi = r.Modulus[i] - BRedAdd(pHalf, qi, bredParams)
pHalfNegQi = pHalf + r.Modulus[i] - BRedAdd(pHalf, qi, bredParams)
for j := uint64(0); j < r.N; j = j + 8 {
@@ -433,7 +420,7 @@ func (r *Ring) DivRoundByLastModulusNTT(p0 *Poly) {
z[7] = x[7] + pHalfNegQi
}
NTT(pTmp, pTmp, r.N, r.NttPsi[i], qi, mredParams, bredParams)
NTTLazy(pTmp, pTmp, r.N, r.NttPsi[i], qi, mredParams, bredParams)
// (x[i] - x[-1]) * InvQ
for j := uint64(0); j < r.N; j = j + 8 {
@@ -441,14 +428,14 @@ func (r *Ring) DivRoundByLastModulusNTT(p0 *Poly) {
x := (*[8]uint64)(unsafe.Pointer(&pTmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p1tmp[j]))
z[0] = MRed(z[0]+(qi-x[0]), rescaleParams, qi, mredParams)
z[1] = MRed(z[1]+(qi-x[1]), rescaleParams, qi, mredParams)
z[2] = MRed(z[2]+(qi-x[2]), rescaleParams, qi, mredParams)
z[3] = MRed(z[3]+(qi-x[3]), rescaleParams, qi, mredParams)
z[4] = MRed(z[4]+(qi-x[4]), rescaleParams, qi, mredParams)
z[5] = MRed(z[5]+(qi-x[5]), rescaleParams, qi, mredParams)
z[6] = MRed(z[6]+(qi-x[6]), rescaleParams, qi, mredParams)
z[7] = MRed(z[7]+(qi-x[7]), rescaleParams, qi, mredParams)
z[0] = MRed(twoqi+x[0]-z[0], rescaleParams, qi, mredParams)
z[1] = MRed(twoqi+x[1]-z[1], rescaleParams, qi, mredParams)
z[2] = MRed(twoqi+x[2]-z[2], rescaleParams, qi, mredParams)
z[3] = MRed(twoqi+x[3]-z[3], rescaleParams, qi, mredParams)
z[4] = MRed(twoqi+x[4]-z[4], rescaleParams, qi, mredParams)
z[5] = MRed(twoqi+x[5]-z[5], rescaleParams, qi, mredParams)
z[6] = MRed(twoqi+x[6]-z[6], rescaleParams, qi, mredParams)
z[7] = MRed(twoqi+x[7]-z[7], rescaleParams, qi, mredParams)
}
}
@@ -470,16 +457,16 @@ func (r *Ring) DivRoundByLastModulus(p0 *Poly) {
for i := uint64(0); i < r.N; i = i + 8 {
z := (*[8]uint64)(unsafe.Pointer(&p0tmp[i]))
x := (*[8]uint64)(unsafe.Pointer(&p0tmp[i]))
z[0] = CRed(z[0]+pHalf, pj)
z[1] = CRed(z[1]+pHalf, pj)
z[2] = CRed(z[2]+pHalf, pj)
z[3] = CRed(z[3]+pHalf, pj)
z[4] = CRed(z[4]+pHalf, pj)
z[5] = CRed(z[5]+pHalf, pj)
z[6] = CRed(z[6]+pHalf, pj)
z[7] = CRed(z[7]+pHalf, pj)
x[0] = CRed(x[0]+pHalf, pj)
x[1] = CRed(x[1]+pHalf, pj)
x[2] = CRed(x[2]+pHalf, pj)
x[3] = CRed(x[3]+pHalf, pj)
x[4] = CRed(x[4]+pHalf, pj)
x[5] = CRed(x[5]+pHalf, pj)
x[6] = CRed(x[6]+pHalf, pj)
x[7] = CRed(x[7]+pHalf, pj)
}
for i := 0; i < level; i++ {
@@ -487,9 +474,10 @@ func (r *Ring) DivRoundByLastModulus(p0 *Poly) {
p1tmp := p0.Coeffs[i]
qi := r.Modulus[i]
twoqi := qi << 1
bredParams := r.BredParams[i]
mredParams := r.MredParams[i]
rescaleParams := r.RescaleParams[level-1][i]
rescaleParams := qi - r.RescaleParams[level-1][i]
pHalfNegQi = r.Modulus[i] - BRedAdd(pHalf, qi, bredParams)
@@ -499,14 +487,14 @@ func (r *Ring) DivRoundByLastModulus(p0 *Poly) {
x := (*[8]uint64)(unsafe.Pointer(&p0tmp[j]))
z := (*[8]uint64)(unsafe.Pointer(&p1tmp[j]))
z[0] = MRed(z[0]+(qi-BRedAdd(x[0]+pHalfNegQi, qi, bredParams)), rescaleParams, qi, mredParams)
z[1] = MRed(z[1]+(qi-BRedAdd(x[1]+pHalfNegQi, qi, bredParams)), rescaleParams, qi, mredParams)
z[2] = MRed(z[2]+(qi-BRedAdd(x[2]+pHalfNegQi, qi, bredParams)), rescaleParams, qi, mredParams)
z[3] = MRed(z[3]+(qi-BRedAdd(x[3]+pHalfNegQi, qi, bredParams)), rescaleParams, qi, mredParams)
z[4] = MRed(z[4]+(qi-BRedAdd(x[4]+pHalfNegQi, qi, bredParams)), rescaleParams, qi, mredParams)
z[5] = MRed(z[5]+(qi-BRedAdd(x[5]+pHalfNegQi, qi, bredParams)), rescaleParams, qi, mredParams)
z[6] = MRed(z[6]+(qi-BRedAdd(x[6]+pHalfNegQi, qi, bredParams)), rescaleParams, qi, mredParams)
z[7] = MRed(z[7]+(qi-BRedAdd(x[7]+pHalfNegQi, qi, bredParams)), rescaleParams, qi, mredParams)
z[0] = MRed(x[0]+pHalfNegQi+twoqi-z[0], rescaleParams, qi, mredParams)
z[1] = MRed(x[1]+pHalfNegQi+twoqi-z[1], rescaleParams, qi, mredParams)
z[2] = MRed(x[2]+pHalfNegQi+twoqi-z[2], rescaleParams, qi, mredParams)
z[3] = MRed(x[3]+pHalfNegQi+twoqi-z[3], rescaleParams, qi, mredParams)
z[4] = MRed(x[4]+pHalfNegQi+twoqi-z[4], rescaleParams, qi, mredParams)
z[5] = MRed(x[5]+pHalfNegQi+twoqi-z[5], rescaleParams, qi, mredParams)
z[6] = MRed(x[6]+pHalfNegQi+twoqi-z[6], rescaleParams, qi, mredParams)
z[7] = MRed(x[7]+pHalfNegQi+twoqi-z[7], rescaleParams, qi, mredParams)
}
}

View File

@@ -55,7 +55,7 @@ func TestRing(t *testing.T) {
var defaultParams []*Parameters
if testing.Short() {
defaultParams = DefaultParams[:3]
defaultParams = DefaultParams[0:3]
} else {
defaultParams = DefaultParams
}
@@ -473,6 +473,8 @@ func testExtendBasis(testContext *testParams, t *testing.T) {
basisextender.ModUpSplitQP(uint64(len(testContext.ringQ.Modulus)-1), Pol, PolTest)
testContext.ringP.Reduce(PolTest, PolTest)
for i := range testContext.ringP.Modulus {
require.Equal(t, PolTest.Coeffs[i][:testContext.ringQ.N], PolWant.Coeffs[i][:testContext.ringQ.N])
}

View File

@@ -11,51 +11,27 @@ type Parameters struct {
var DefaultParams = []*Parameters{
{12, Qi60[len(Qi60)-2:], Pi60[len(Pi60)-2:]},
{13, Qi60[len(Qi60)-4:], Pi60[len(Pi60)-4:]},
{14, Qi60[len(Qi60)-8:], Pi60[len(Pi60)-8:]},
{15, Qi60[len(Qi60)-16:], Pi60[len(Pi60)-16:]},
{16, Qi60[len(Qi60)-32:], Pi60[len(Pi60)-32:]},
{14, Qi60[len(Qi60)-7:], Pi60[len(Pi60)-7:]},
{15, Qi60[len(Qi60)-14:], Pi60[len(Pi60)-14:]},
{16, Qi60[len(Qi60)-29:], Pi60[len(Pi60)-29:]},
}
// Pi60 are the first one hundred (from 0x800000000000000 and upward) 60-bit NTT-friendly primes for N up to 65536
var Pi60 = []uint64{576460752308273153, 576460752315482113, 576460752319021057, 576460752319414273, 576460752321642497,
576460752325705729, 576460752328327169, 576460752329113601, 576460752329506817, 576460752329900033,
576460752331210753, 576460752337502209, 576460752340123649, 576460752342876161, 576460752347201537,
576460752347332609, 576460752352837633, 576460752354017281, 576460752355065857, 576460752355459073,
576460752358604801, 576460752364240897, 576460752368435201, 576460752371187713, 576460752373547009,
576460752374333441, 576460752376692737, 576460752378003457, 576460752378396673, 576460752380755969,
576460752381411329, 576460752386129921, 576460752395173889, 576460752395960321, 576460752396091393,
576460752396484609, 576460752399106049, 576460752405135361, 576460752405921793, 576460752409722881,
576460752410116097, 576460752411033601, 576460752412082177, 576460752416145409, 576460752416931841,
576460752421257217, 576460752427548673, 576460752429514753, 576460752435281921, 576460752437248001,
576460752438558721, 576460752441966593, 576460752449044481, 576460752451141633, 576460752451534849,
576460752462938113, 576460752465952769, 576460752468705281, 576460752469491713, 576460752472375297,
576460752473948161, 576460752475389953, 576460752480894977, 576460752483254273, 576460752484827137,
576460752486793217, 576460752486924289, 576460752492691457, 576460752498589697, 576460752498720769,
576460752499507201, 576460752504225793, 576460752505405441, 576460752507240449, 576460752507764737,
576460752509206529, 576460752510124033, 576460752510779393, 576460752511959041, 576460752514449409,
576460752516284417, 576460752519168001, 576460752520347649, 576460752520609793, 576460752522969089,
576460752523100161, 576460752524279809, 576460752525852673, 576460752526245889, 576460752526508033,
576460752532013057, 576460752545120257, 576460752550100993, 576460752551804929, 576460752567402497,
576460752568975361, 576460752573431809, 576460752580902913, 576460752585490433, 576460752586407937}
// Qi60 are the first [0:32] 61-bit close to 2^{62} NTT-friendly primes for N up to 2^{17}
var Qi60 = []uint64{0x1fffffffffe00001, 0x1fffffffffc80001, 0x1fffffffffb40001, 0x1fffffffff500001,
0x1fffffffff380001, 0x1fffffffff000001, 0x1ffffffffef00001, 0x1ffffffffee80001,
0x1ffffffffeb40001, 0x1ffffffffe780001, 0x1ffffffffe600001, 0x1ffffffffe4c0001,
0x1ffffffffdf40001, 0x1ffffffffdac0001, 0x1ffffffffda40001, 0x1ffffffffc680001,
0x1ffffffffc000001, 0x1ffffffffb880001, 0x1ffffffffb7c0001, 0x1ffffffffb300001,
0x1ffffffffb1c0001, 0x1ffffffffadc0001, 0x1ffffffffa400001, 0x1ffffffffa140001,
0x1ffffffff9d80001, 0x1ffffffff9140001, 0x1ffffffff8ac0001, 0x1ffffffff8a80001,
0x1ffffffff81c0001, 0x1ffffffff7800001, 0x1ffffffff7680001, 0x1ffffffff7080001}
// Qi60 are the last one hundred (from 0xfffffffffffffff and downward) 60-bit NTT-friendly primes for N up to 65536
var Qi60 = []uint64{1152921504606584833, 1152921504598720513, 1152921504592429057, 1152921504581419009, 1152921504580894721,
1152921504578273281, 1152921504577748993, 1152921504577486849, 1152921504568836097, 1152921504565166081,
1152921504563331073, 1152921504556515329, 1152921504555466753, 1152921504554156033, 1152921504552583169,
1152921504542883841, 1152921504538951681, 1152921504537378817, 1152921504531873793, 1152921504521650177,
1152921504509853697, 1152921504508280833, 1152921504506970113, 1152921504495697921, 1152921504491241473,
1152921504488620033, 1152921504479444993, 1152921504470794241, 1152921504468172801, 1152921504462929921,
1152921504462667777, 1152921504455589889, 1152921504447987713, 1152921504442482689, 1152921504436191233,
1152921504427278337, 1152921504419414017, 1152921504409190401, 1152921504403947521, 1152921504396869633,
1152921504395821057, 1152921504373014529, 1152921504369344513, 1152921504368558081, 1152921504364625921,
1152921504362790913, 1152921504361218049, 1152921504353615873, 1152921504337887233, 1152921504337625089,
1152921504321372161, 1152921504314032129, 1152921504303022081, 1152921504301449217, 1152921504288342017,
1152921504287293441, 1152921504286769153, 1152921504282836993, 1152921504274972673, 1152921504266321921,
1152921504256622593, 1152921504253739009, 1152921504245088257, 1152921504241942529, 1152921504240107521,
1152921504239583233, 1152921504238010369, 1152921504234078209, 1152921504231718913, 1152921504230670337,
1152921504227524609, 1152921504214417409, 1152921504207339521, 1152921504205504513, 1152921504204193793,
1152921504190824449, 1152921504179552257, 1152921504177192961, 1152921504176668673, 1152921504174309377,
1152921504172474369, 1152921504164872193, 1152921504162512897, 1152921504139706369, 1152921504134987777,
1152921504132628481, 1152921504122142721, 1152921504120832001, 1152921504116899841, 1152921504105627649,
1152921504101957633, 1152921504100384769, 1152921504096452609, 1152921504093306881, 1152921504078364673,
1152921504067092481, 1152921504066306049, 1152921504057917441, 1152921504053723137, 1152921504050839553}
// Pi60 are the next [32:64] 61-bit close to 2^{62} NTT-friendly primes for N up to 2^{17}
var Pi60 = []uint64{0x1ffffffff6c80001, 0x1ffffffff6140001, 0x1ffffffff5f40001, 0x1ffffffff5700001,
0x1ffffffff4bc0001, 0x1ffffffff4380001, 0x1ffffffff3240001, 0x1ffffffff2dc0001,
0x1ffffffff1a40001, 0x1ffffffff11c0001, 0x1ffffffff0fc0001, 0x1ffffffff0d80001,
0x1ffffffff0c80001, 0x1ffffffff08c0001, 0x1fffffffefd00001, 0x1fffffffef9c0001,
0x1fffffffef600001, 0x1fffffffeef40001, 0x1fffffffeed40001, 0x1fffffffeed00001,
0x1fffffffeebc0001, 0x1fffffffed540001, 0x1fffffffed440001, 0x1fffffffed2c0001,
0x1fffffffed200001, 0x1fffffffec940001, 0x1fffffffec6c0001, 0x1fffffffebe80001,
0x1fffffffebac0001, 0x1fffffffeba40001, 0x1fffffffeb4c0001, 0x1fffffffeb280001}