wasmcurves
Version:
elliptic curves implementations in wasm
1,309 lines (1,073 loc) • 54.1 kB
JavaScript
const utils = require("../utils");
const buildF1m =require("../build_f1m.js");
const buildF1 =require("../build_f1.js");
const buildF2m =require("../build_f2m.js");
const buildF3m =require("../build_f3m.js");
const buildCurve =require("../build_curve_jacobian_a0.js");
const buildFFT = require("../build_fft");
const buildPol = require("../build_pol");
const buildQAP = require("../build_qap");
const buildApplyKey = require("../build_applykey");
const { bitLength, modInv, isOdd, isNegative } = require("../bigint.js");
module.exports = function buildBN128(module, _prefix) {
const prefix = _prefix || "bn128";
if (module.modules[prefix]) return prefix; // already builded
const q = 21888242871839275222246405745257275088696311157297823662689037894645226208583n;
const r = 21888242871839275222246405745257275088548364400416034343698204186575808495617n;
const n64 = Math.floor((bitLength(q - 1n) - 1)/64) +1;
const n8 = n64*8;
const frsize = n8;
const f1size = n8;
const f2size = f1size * 2;
const ftsize = f1size * 12;
const pr = module.alloc(utils.bigInt2BytesLE( r, frsize ));
const f1mPrefix = buildF1m(module, q, "f1m");
buildF1(module, r, "fr", "frm");
const pG1b = module.alloc(utils.bigInt2BytesLE( toMontgomery(3n), f1size ));
const g1mPrefix = buildCurve(module, "g1m", "f1m", pG1b);
buildFFT(module, "frm", "frm", "frm", "frm_mul");
buildPol(module, "pol", "frm");
buildQAP(module, "qap", "frm");
const f2mPrefix = buildF2m(module, "f1m_neg", "f2m", "f1m");
const pG2b = module.alloc([
...utils.bigInt2BytesLE( toMontgomery(19485874751759354771024239261021720505790618469301721065564631296452457478373n), f1size ),
...utils.bigInt2BytesLE( toMontgomery(266929791119991161246907387137283842545076965332900288569378510910307636690n), f1size )
]);
const g2mPrefix = buildCurve(module, "g2m", "f2m", pG2b);
function buildGTimesFr(fnName, opMul) {
const f = module.addFunction(fnName);
f.addParam("pG", "i32");
f.addParam("pFr", "i32");
f.addParam("pr", "i32");
const c = f.getCodeBuilder();
const AUX = c.i32_const(module.alloc(n8));
f.addCode(
c.call("frm_fromMontgomery", c.getLocal("pFr"), AUX),
c.call(
opMul,
c.getLocal("pG"),
AUX,
c.i32_const(n8),
c.getLocal("pr")
)
);
module.exportFunction(fnName);
}
buildGTimesFr("g1m_timesFr", "g1m_timesScalar");
buildFFT(module, "g1m", "g1m", "frm", "g1m_timesFr");
buildGTimesFr("g2m_timesFr", "g2m_timesScalar");
buildFFT(module, "g2m", "g2m", "frm", "g2m_timesFr");
buildGTimesFr("g1m_timesFrAffine", "g1m_timesScalarAffine");
buildGTimesFr("g2m_timesFrAffine", "g2m_timesScalarAffine");
buildApplyKey(module, "frm_batchApplyKey", "fmr", "frm", n8, n8, n8, "frm_mul");
buildApplyKey(module, "g1m_batchApplyKey", "g1m", "frm", n8*3, n8*3, n8, "g1m_timesFr");
buildApplyKey(module, "g1m_batchApplyKeyMixed", "g1m", "frm", n8*2, n8*3, n8, "g1m_timesFrAffine");
buildApplyKey(module, "g2m_batchApplyKey", "g2m", "frm", n8*2*3, n8*3*2, n8, "g2m_timesFr");
buildApplyKey(module, "g2m_batchApplyKeyMixed", "g2m", "frm", n8*2*2, n8*3*2, n8, "g2m_timesFrAffine");
function toMontgomery(a) {
return BigInt(a) * ( 1n << BigInt(f1size*8)) % q;
}
const G1gen = [
1n,
2n,
1n
];
const pG1gen = module.alloc(
[
...utils.bigInt2BytesLE( toMontgomery(G1gen[0]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G1gen[1]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G1gen[2]), f1size ),
]
);
const G1zero = [
0n,
1n,
0n
];
const pG1zero = module.alloc(
[
...utils.bigInt2BytesLE( toMontgomery(G1zero[0]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G1zero[1]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G1zero[2]), f1size )
]
);
const G2gen = [
[
10857046999023057135944570762232829481370756359578518086990519993285655852781n,
11559732032986387107991004021392285783925812861821192530917403151452391805634n,
],[
8495653923123431417604973247489272438418190587263600148770280649306958101930n,
4082367875863433681332203403145435568316851327593401208105741076214120093531n,
],[
1n,
0n,
]
];
const pG2gen = module.alloc(
[
...utils.bigInt2BytesLE( toMontgomery(G2gen[0][0]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[0][1]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[1][0]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[1][1]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[2][0]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[2][1]), f1size ),
]
);
const G2zero = [
[
0n,
0n,
],[
1n,
0n,
],[
0n,
0n,
]
];
const pG2zero = module.alloc(
[
...utils.bigInt2BytesLE( toMontgomery(G2zero[0][0]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[0][1]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[1][0]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[1][1]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[2][0]), f1size ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[2][1]), f1size ),
]
);
const pOneT = module.alloc([
...utils.bigInt2BytesLE( toMontgomery(1), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
...utils.bigInt2BytesLE( toMontgomery(0), f1size ),
]);
const pNonResidueF6 = module.alloc([
...utils.bigInt2BytesLE( toMontgomery(9), f1size ),
...utils.bigInt2BytesLE( toMontgomery(1), f1size ),
]);
const pTwoInv = module.alloc([
...utils.bigInt2BytesLE( toMontgomery( modInv(2n, q)), f1size ),
...utils.bigInt2BytesLE( 0n, f1size )
]);
const pAltBn128Twist = pNonResidueF6;
const pTwistCoefB = module.alloc([
...utils.bigInt2BytesLE( toMontgomery(19485874751759354771024239261021720505790618469301721065564631296452457478373n), f1size ),
...utils.bigInt2BytesLE( toMontgomery(266929791119991161246907387137283842545076965332900288569378510910307636690n), f1size ),
]);
function build_mulNR6() {
const f = module.addFunction(prefix + "_mulNR6");
f.addParam("x", "i32");
f.addParam("pr", "i32");
const c = f.getCodeBuilder();
f.addCode(
c.call(
f2mPrefix + "_mul",
c.i32_const(pNonResidueF6),
c.getLocal("x"),
c.getLocal("pr")
)
);
}
build_mulNR6();
const f6mPrefix = buildF3m(module, prefix+"_mulNR6", "f6m", "f2m");
function build_mulNR12() {
const f = module.addFunction(prefix + "_mulNR12");
f.addParam("x", "i32");
f.addParam("pr", "i32");
const c = f.getCodeBuilder();
f.addCode(
c.call(
f2mPrefix + "_mul",
c.i32_const(pNonResidueF6),
c.i32_add(c.getLocal("x"), c.i32_const(n8*4)),
c.getLocal("pr")
),
c.call(
f2mPrefix + "_copy",
c.getLocal("x"),
c.i32_add(c.getLocal("pr"), c.i32_const(n8*2)),
),
c.call(
f2mPrefix + "_copy",
c.i32_add(c.getLocal("x"), c.i32_const(n8*2)),
c.i32_add(c.getLocal("pr"), c.i32_const(n8*4)),
)
);
}
build_mulNR12();
const ftmPrefix = buildF2m(module, prefix+"_mulNR12", "ftm", f6mPrefix);
const ateLoopCount = 29793968203157093288n;
const ateLoopBitBytes = bits(ateLoopCount);
const pAteLoopBitBytes = module.alloc(ateLoopBitBytes);
const isLoopNegative = false;
const ateCoefSize = 3 * f2size;
const ateNDblCoefs = ateLoopBitBytes.length-1;
const ateNAddCoefs = ateLoopBitBytes.reduce((acc, b) => acc + ( b!=0 ? 1 : 0) ,0);
const ateNCoefs = ateNAddCoefs + ateNDblCoefs + 1;
const prePSize = 3*2*n8;
const preQSize = 3*n8*2 + ateNCoefs*ateCoefSize;
const finalExpIsNegative = false;
module.modules[prefix] = {
n64: n64,
pG1gen: pG1gen,
pG1zero: pG1zero,
pG1b: pG1b,
pG2gen: pG2gen,
pG2zero: pG2zero,
pG2b: pG2b,
pq: module.modules["f1m"].pq,
pr: pr,
pOneT: pOneT,
prePSize: prePSize,
preQSize: preQSize,
r: r.toString(),
q: q.toString()
};
// console.log("PrePSize: " +prePSize);
// console.log("PreQSize: " +preQSize);
const finalExpZ = 4965661367192848881n;
function naf(n) {
let E = n;
const res = [];
while (E > 0n) {
if (isOdd(E)) {
const z = 2 - Number(E % 4n);
res.push( z );
E = E - BigInt(z);
} else {
res.push( 0 );
}
E = E >> 1n;
}
return res;
}
function bits(n) {
let E = n;
const res = [];
while (E > 0n) {
if (isOdd(E)) {
res.push( 1 );
} else {
res.push( 0 );
}
E = E >> 1n;
}
return res;
}
function buildPrepareG1() {
const f = module.addFunction(prefix+ "_prepareG1");
f.addParam("pP", "i32");
f.addParam("ppreP", "i32");
const c = f.getCodeBuilder();
f.addCode(
c.call(g1mPrefix + "_normalize", c.getLocal("pP"), c.getLocal("ppreP")), // TODO Remove if already in affine
);
}
function buildPrepAddStep() {
const f = module.addFunction(prefix+ "_prepAddStep");
f.addParam("pQ", "i32");
f.addParam("pR", "i32");
f.addParam("pCoef", "i32");
const c = f.getCodeBuilder();
const X2 = c.getLocal("pQ");
const Y2 = c.i32_add(c.getLocal("pQ"), c.i32_const(f2size));
const X1 = c.getLocal("pR");
const Y1 = c.i32_add(c.getLocal("pR"), c.i32_const(f2size));
const Z1 = c.i32_add(c.getLocal("pR"), c.i32_const(2*f2size));
const ELL_0 = c.getLocal("pCoef");
const ELL_VW = c.i32_add(c.getLocal("pCoef"), c.i32_const(f2size));
const ELL_VV = c.i32_add(c.getLocal("pCoef"), c.i32_const(2*f2size));
const D = ELL_VW;
const E = c.i32_const(module.alloc(f2size));
const F = c.i32_const(module.alloc(f2size));
const G = c.i32_const(module.alloc(f2size));
const H = c.i32_const(module.alloc(f2size));
const I = c.i32_const(module.alloc(f2size));
const J = c.i32_const(module.alloc(f2size));
const AUX = c.i32_const(module.alloc(f2size));
f.addCode(
// D = X1 - X2*Z1
c.call(f2mPrefix + "_mul", X2, Z1, D),
c.call(f2mPrefix + "_sub", X1, D, D),
// E = Y1 - Y2*Z1
c.call(f2mPrefix + "_mul", Y2, Z1, E),
c.call(f2mPrefix + "_sub", Y1, E, E),
// F = D^2
c.call(f2mPrefix + "_square", D, F),
// G = E^2
c.call(f2mPrefix + "_square", E, G),
// H = D*F
c.call(f2mPrefix + "_mul", D, F, H),
// I = X1 * F
c.call(f2mPrefix + "_mul", X1, F, I),
// J = H + Z1*G - (I+I)
c.call(f2mPrefix + "_add", I, I, AUX),
c.call(f2mPrefix + "_mul", Z1, G, J),
c.call(f2mPrefix + "_add", H, J, J),
c.call(f2mPrefix + "_sub", J, AUX, J),
// X3 (X1) = D*J
c.call(f2mPrefix + "_mul", D, J, X1),
// Y3 (Y1) = E*(I-J)-(H*Y1)
c.call(f2mPrefix + "_mul", H, Y1, Y1),
c.call(f2mPrefix + "_sub", I, J, AUX),
c.call(f2mPrefix + "_mul", E, AUX, AUX),
c.call(f2mPrefix + "_sub", AUX, Y1, Y1),
// Z3 (Z1) = Z1*H
c.call(f2mPrefix + "_mul", Z1, H, Z1),
// ell_0 = xi * (E * X2 - D * Y2)
c.call(f2mPrefix + "_mul", D, Y2, AUX),
c.call(f2mPrefix + "_mul", E, X2, ELL_0),
c.call(f2mPrefix + "_sub", ELL_0, AUX, ELL_0),
c.call(f2mPrefix + "_mul", ELL_0, c.i32_const(pAltBn128Twist), ELL_0),
// ell_VV = - E (later: * xP)
c.call(f2mPrefix + "_neg", E, ELL_VV),
// ell_VW = D (later: * yP )
// Already assigned
);
}
function buildPrepDoubleStep() {
const f = module.addFunction(prefix+ "_prepDblStep");
f.addParam("pR", "i32");
f.addParam("pCoef", "i32");
const c = f.getCodeBuilder();
const X1 = c.getLocal("pR");
const Y1 = c.i32_add(c.getLocal("pR"), c.i32_const(f2size));
const Z1 = c.i32_add(c.getLocal("pR"), c.i32_const(2*f2size));
const ELL_0 = c.getLocal("pCoef");
const ELL_VW = c.i32_add(c.getLocal("pCoef"), c.i32_const(f2size));
const ELL_VV = c.i32_add(c.getLocal("pCoef"), c.i32_const(2*f2size));
const A = c.i32_const(module.alloc(f2size));
const B = c.i32_const(module.alloc(f2size));
const C = c.i32_const(module.alloc(f2size));
const D = c.i32_const(module.alloc(f2size));
const E = c.i32_const(module.alloc(f2size));
const F = c.i32_const(module.alloc(f2size));
const G = c.i32_const(module.alloc(f2size));
const H = c.i32_const(module.alloc(f2size));
const I = c.i32_const(module.alloc(f2size));
const J = c.i32_const(module.alloc(f2size));
const E2 = c.i32_const(module.alloc(f2size));
const AUX = c.i32_const(module.alloc(f2size));
f.addCode(
// A = X1 * Y1 / 2
c.call(f2mPrefix + "_mul", Y1, c.i32_const(pTwoInv), A),
c.call(f2mPrefix + "_mul", X1, A, A),
// B = Y1^2
c.call(f2mPrefix + "_square", Y1, B),
// C = Z1^2
c.call(f2mPrefix + "_square", Z1, C),
// D = 3 * C
c.call(f2mPrefix + "_add", C, C, D),
c.call(f2mPrefix + "_add", D, C, D),
// E = twist_b * D
c.call(f2mPrefix + "_mul", c.i32_const(pTwistCoefB), D, E),
// F = 3 * E
c.call(f2mPrefix + "_add", E, E, F),
c.call(f2mPrefix + "_add", E, F, F),
// G = (B+F)/2
c.call(f2mPrefix + "_add", B, F, G),
c.call(f2mPrefix + "_mul", G, c.i32_const(pTwoInv), G),
// H = (Y1+Z1)^2-(B+C)
c.call(f2mPrefix + "_add", B, C, AUX),
c.call(f2mPrefix + "_add", Y1, Z1, H),
c.call(f2mPrefix + "_square", H, H),
c.call(f2mPrefix + "_sub", H, AUX, H),
// I = E-B
c.call(f2mPrefix + "_sub", E, B, I),
// J = X1^2
c.call(f2mPrefix + "_square", X1, J),
// E_squared = E^2
c.call(f2mPrefix + "_square", E, E2),
// X3 (X1) = A * (B-F)
c.call(f2mPrefix + "_sub", B, F, AUX),
c.call(f2mPrefix + "_mul", A, AUX, X1),
// Y3 (Y1) = G^2 - 3*E^2
c.call(f2mPrefix + "_add", E2, E2, AUX),
c.call(f2mPrefix + "_add", E2, AUX, AUX),
c.call(f2mPrefix + "_square", G, Y1),
c.call(f2mPrefix + "_sub", Y1, AUX, Y1),
// Z3 (Z1) = B * H
c.call(f2mPrefix + "_mul", B, H, Z1),
// ell_0 = xi * I
c.call(f2mPrefix + "_mul", c.i32_const(pAltBn128Twist), I, ELL_0),
// ell_VW = - H (later: * yP)
c.call(f2mPrefix + "_neg", H, ELL_VW),
// ell_VV = 3*J (later: * xP)
c.call(f2mPrefix + "_add", J, J, ELL_VV),
c.call(f2mPrefix + "_add", J, ELL_VV, ELL_VV),
);
}
function buildMulByQ() {
const f = module.addFunction(prefix + "_mulByQ");
f.addParam("p1", "i32");
f.addParam("pr", "i32");
const c = f.getCodeBuilder();
const x = c.getLocal("p1");
const y = c.i32_add(c.getLocal("p1"), c.i32_const(f2size));
const z = c.i32_add(c.getLocal("p1"), c.i32_const(f2size*2));
const x3 = c.getLocal("pr");
const y3 = c.i32_add(c.getLocal("pr"), c.i32_const(f2size));
const z3 = c.i32_add(c.getLocal("pr"), c.i32_const(f2size*2));
const MulByQX = c.i32_const(module.alloc([
...utils.bigInt2BytesLE( toMontgomery("21575463638280843010398324269430826099269044274347216827212613867836435027261"), f1size ),
...utils.bigInt2BytesLE( toMontgomery("10307601595873709700152284273816112264069230130616436755625194854815875713954"), f1size ),
]));
const MulByQY = c.i32_const(module.alloc([
...utils.bigInt2BytesLE( toMontgomery("2821565182194536844548159561693502659359617185244120367078079554186484126554"), f1size ),
...utils.bigInt2BytesLE( toMontgomery("3505843767911556378687030309984248845540243509899259641013678093033130930403"), f1size ),
]));
f.addCode(
// The frobeniusMap(1) in this field, is the conjugate
c.call(f2mPrefix + "_conjugate", x, x3),
c.call(f2mPrefix + "_mul", MulByQX, x3, x3),
c.call(f2mPrefix + "_conjugate", y, y3),
c.call(f2mPrefix + "_mul", MulByQY, y3, y3),
c.call(f2mPrefix + "_conjugate", z, z3),
);
}
function buildPrepareG2() {
buildMulByQ();
const f = module.addFunction(prefix+ "_prepareG2");
f.addParam("pQ", "i32");
f.addParam("ppreQ", "i32");
f.addLocal("pCoef", "i32");
f.addLocal("i", "i32");
const c = f.getCodeBuilder();
const QX = c.getLocal("pQ");
const pR = module.alloc(f2size*3);
const R = c.i32_const(pR);
const RX = c.i32_const(pR);
const RY = c.i32_const(pR+f2size);
const RZ = c.i32_const(pR+2*f2size);
const cQX = c.i32_add( c.getLocal("ppreQ"), c.i32_const(0));
const cQY = c.i32_add( c.getLocal("ppreQ"), c.i32_const(f2size));
const pQ1 = module.alloc(f2size*3);
const Q1 = c.i32_const(pQ1);
const pQ2 = module.alloc(f2size*3);
const Q2 = c.i32_const(pQ2);
const Q2Y = c.i32_const(pQ2 + f2size);
f.addCode(
c.call(g2mPrefix + "_normalize", QX, cQX), // TODO Remove if already in affine
c.call(f2mPrefix + "_copy", cQX, RX),
c.call(f2mPrefix + "_copy", cQY, RY),
c.call(f2mPrefix + "_one", RZ),
);
f.addCode(
c.setLocal("pCoef", c.i32_add( c.getLocal("ppreQ"), c.i32_const(f2size*3))),
c.setLocal("i", c.i32_const(ateLoopBitBytes.length-2)),
c.block(c.loop(
c.call(prefix + "_prepDblStep", R, c.getLocal("pCoef")),
c.setLocal("pCoef", c.i32_add(c.getLocal("pCoef"), c.i32_const(ateCoefSize))),
c.if(
c.i32_load8_s(c.getLocal("i"), pAteLoopBitBytes),
[
...c.call(prefix + "_prepAddStep", cQX, R, c.getLocal("pCoef")),
...c.setLocal("pCoef", c.i32_add(c.getLocal("pCoef"), c.i32_const(ateCoefSize))),
]
),
c.br_if(1, c.i32_eqz ( c.getLocal("i") )),
c.setLocal("i", c.i32_sub(c.getLocal("i"), c.i32_const(1))),
c.br(0)
))
);
f.addCode(
c.call(prefix + "_mulByQ", cQX, Q1),
c.call(prefix + "_mulByQ", Q1, Q2)
);
if (isLoopNegative) {
f.addCode(
c.call(f2mPrefix + "_neg", RY, RY),
);
}
f.addCode(
c.call(f2mPrefix + "_neg", Q2Y, Q2Y),
c.call(prefix + "_prepAddStep", Q1, R, c.getLocal("pCoef")),
c.setLocal("pCoef", c.i32_add(c.getLocal("pCoef"), c.i32_const(ateCoefSize))),
c.call(prefix + "_prepAddStep", Q2, R, c.getLocal("pCoef")),
c.setLocal("pCoef", c.i32_add(c.getLocal("pCoef"), c.i32_const(ateCoefSize))),
);
}
function buildMulBy024Old() {
const f = module.addFunction(prefix+ "__mulBy024Old");
f.addParam("pEll0", "i32");
f.addParam("pEllVW", "i32");
f.addParam("pEllVV", "i32");
f.addParam("pR", "i32"); // Result in F12
const c = f.getCodeBuilder();
const x0 = c.getLocal("pEll0");
const x2 = c.getLocal("pEllVV");
const x4 = c.getLocal("pEllVW");
const z0 = c.getLocal("pR");
const pAUX12 = module.alloc(ftsize);
const AUX12 = c.i32_const(pAUX12);
const AUX12_0 = c.i32_const(pAUX12);
const AUX12_2 = c.i32_const(pAUX12+f2size);
const AUX12_4 = c.i32_const(pAUX12+f2size*2);
const AUX12_6 = c.i32_const(pAUX12+f2size*3);
const AUX12_8 = c.i32_const(pAUX12+f2size*4);
const AUX12_10 = c.i32_const(pAUX12+f2size*5);
f.addCode(
c.call(f2mPrefix + "_copy", x0, AUX12_0),
c.call(f2mPrefix + "_zero", AUX12_2),
c.call(f2mPrefix + "_copy", x2, AUX12_4),
c.call(f2mPrefix + "_zero", AUX12_6),
c.call(f2mPrefix + "_copy", x4, AUX12_8),
c.call(f2mPrefix + "_zero", AUX12_10),
c.call(ftmPrefix + "_mul", AUX12, z0, z0),
);
}
function buildMulBy024() {
const f = module.addFunction(prefix+ "__mulBy024");
f.addParam("pEll0", "i32");
f.addParam("pEllVW", "i32");
f.addParam("pEllVV", "i32");
f.addParam("pR", "i32"); // Result in F12
const c = f.getCodeBuilder();
const x0 = c.getLocal("pEll0");
const x2 = c.getLocal("pEllVV");
const x4 = c.getLocal("pEllVW");
const z0 = c.getLocal("pR");
const z1 = c.i32_add(c.getLocal("pR"), c.i32_const(2*n8));
const z2 = c.i32_add(c.getLocal("pR"), c.i32_const(4*n8));
const z3 = c.i32_add(c.getLocal("pR"), c.i32_const(6*n8));
const z4 = c.i32_add(c.getLocal("pR"), c.i32_const(8*n8));
const z5 = c.i32_add(c.getLocal("pR"), c.i32_const(10*n8));
const t0 = c.i32_const(module.alloc(f2size));
const t1 = c.i32_const(module.alloc(f2size));
const t2 = c.i32_const(module.alloc(f2size));
const s0 = c.i32_const(module.alloc(f2size));
const T3 = c.i32_const(module.alloc(f2size));
const T4 = c.i32_const(module.alloc(f2size));
const D0 = c.i32_const(module.alloc(f2size));
const D2 = c.i32_const(module.alloc(f2size));
const D4 = c.i32_const(module.alloc(f2size));
const S1 = c.i32_const(module.alloc(f2size));
const AUX = c.i32_const(module.alloc(f2size));
f.addCode(
// D0 = z0 * x0;
c.call(f2mPrefix + "_mul", z0, x0, D0),
// D2 = z2 * x2;
c.call(f2mPrefix + "_mul", z2, x2, D2),
// D4 = z4 * x4;
c.call(f2mPrefix + "_mul", z4, x4, D4),
// t2 = z0 + z4;
c.call(f2mPrefix + "_add", z0, z4, t2),
// t1 = z0 + z2;
c.call(f2mPrefix + "_add", z0, z2, t1),
// s0 = z1 + z3 + z5;
c.call(f2mPrefix + "_add", z1, z3, s0),
c.call(f2mPrefix + "_add", s0, z5, s0),
// For z.a_.a_ = z0.
// S1 = z1 * x2;
c.call(f2mPrefix + "_mul", z1, x2, S1),
// T3 = S1 + D4;
c.call(f2mPrefix + "_add", S1, D4, T3),
// T4 = my_Fp6::non_residue * T3 + D0;
c.call(f2mPrefix + "_mul", c.i32_const(pNonResidueF6), T3, T4),
c.call(f2mPrefix + "_add", T4, D0, z0),
// z0 = T4;
// For z.a_.b_ = z1
// T3 = z5 * x4;
c.call(f2mPrefix + "_mul", z5, x4, T3),
// S1 = S1 + T3;
c.call(f2mPrefix + "_add", S1, T3, S1),
// T3 = T3 + D2;
c.call(f2mPrefix + "_add", T3, D2, T3),
// T4 = my_Fp6::non_residue * T3;
c.call(f2mPrefix + "_mul", c.i32_const(pNonResidueF6), T3, T4),
// T3 = z1 * x0;
c.call(f2mPrefix + "_mul", z1, x0, T3),
// S1 = S1 + T3;
c.call(f2mPrefix + "_add", S1, T3, S1),
// T4 = T4 + T3;
c.call(f2mPrefix + "_add", T4, T3, z1),
// z1 = T4;
// For z.a_.c_ = z2
// t0 = x0 + x2;
c.call(f2mPrefix + "_add", x0, x2, t0),
// T3 = t1 * t0 - D0 - D2;
c.call(f2mPrefix + "_mul", t1, t0, T3),
c.call(f2mPrefix + "_add", D0, D2, AUX),
c.call(f2mPrefix + "_sub", T3, AUX, T3),
// T4 = z3 * x4;
c.call(f2mPrefix + "_mul", z3, x4, T4),
// S1 = S1 + T4;
c.call(f2mPrefix + "_add", S1, T4, S1),
// For z.b_.a_ = z3 (z3 needs z2)
// t0 = z2 + z4;
c.call(f2mPrefix + "_add", z2, z4, t0),
// T3 = T3 + T4;
// z2 = T3;
c.call(f2mPrefix + "_add", T3, T4, z2),
// t1 = x2 + x4;
c.call(f2mPrefix + "_add", x2, x4, t1),
// T3 = t0 * t1 - D2 - D4;
c.call(f2mPrefix + "_mul", t1, t0, T3),
c.call(f2mPrefix + "_add", D2, D4, AUX),
c.call(f2mPrefix + "_sub", T3, AUX, T3),
// T4 = my_Fp6::non_residue * T3;
c.call(f2mPrefix + "_mul", c.i32_const(pNonResidueF6), T3, T4),
// T3 = z3 * x0;
c.call(f2mPrefix + "_mul", z3, x0, T3),
// S1 = S1 + T3;
c.call(f2mPrefix + "_add", S1, T3, S1),
// T4 = T4 + T3;
c.call(f2mPrefix + "_add", T4, T3, z3),
// z3 = T4;
// For z.b_.b_ = z4
// T3 = z5 * x2;
c.call(f2mPrefix + "_mul", z5, x2, T3),
// S1 = S1 + T3;
c.call(f2mPrefix + "_add", S1, T3, S1),
// T4 = my_Fp6::non_residue * T3;
c.call(f2mPrefix + "_mul", c.i32_const(pNonResidueF6), T3, T4),
// t0 = x0 + x4;
c.call(f2mPrefix + "_add", x0, x4, t0),
// T3 = t2 * t0 - D0 - D4;
c.call(f2mPrefix + "_mul", t2, t0, T3),
c.call(f2mPrefix + "_add", D0, D4, AUX),
c.call(f2mPrefix + "_sub", T3, AUX, T3),
// T4 = T4 + T3;
c.call(f2mPrefix + "_add", T4, T3, z4),
// z4 = T4;
// For z.b_.c_ = z5.
// t0 = x0 + x2 + x4;
c.call(f2mPrefix + "_add", x0, x2, t0),
c.call(f2mPrefix + "_add", t0, x4, t0),
// T3 = s0 * t0 - S1;
c.call(f2mPrefix + "_mul", s0, t0, T3),
c.call(f2mPrefix + "_sub", T3, S1, z5),
// z5 = T3;
);
}
function buildMillerLoop() {
const f = module.addFunction(prefix+ "_millerLoop");
f.addParam("ppreP", "i32");
f.addParam("ppreQ", "i32");
f.addParam("r", "i32");
f.addLocal("pCoef", "i32");
f.addLocal("i", "i32");
const c = f.getCodeBuilder();
const preP_PX = c.getLocal("ppreP");
const preP_PY = c.i32_add(c.getLocal("ppreP"), c.i32_const(f1size));
const ELL_0 = c.getLocal("pCoef");
const ELL_VW = c.i32_add(c.getLocal("pCoef"), c.i32_const(f2size));
const ELL_VV = c.i32_add(c.getLocal("pCoef"), c.i32_const(2*f2size));
const pVW = module.alloc(f2size);
const VW = c.i32_const(pVW);
const pVV = module.alloc(f2size);
const VV = c.i32_const(pVV);
const F = c.getLocal("r");
f.addCode(
c.call(ftmPrefix + "_one", F),
c.setLocal("pCoef", c.i32_add( c.getLocal("ppreQ"), c.i32_const(f2size*3))),
c.setLocal("i", c.i32_const(ateLoopBitBytes.length-2)),
c.block(c.loop(
c.call(ftmPrefix + "_square", F, F),
c.call(f2mPrefix + "_mul1", ELL_VW,preP_PY, VW),
c.call(f2mPrefix + "_mul1", ELL_VV, preP_PX, VV),
c.call(prefix + "__mulBy024", ELL_0, VW, VV, F),
c.setLocal("pCoef", c.i32_add(c.getLocal("pCoef"), c.i32_const(ateCoefSize))),
c.if(
c.i32_load8_s(c.getLocal("i"), pAteLoopBitBytes),
[
...c.call(f2mPrefix + "_mul1", ELL_VW, preP_PY, VW),
...c.call(f2mPrefix + "_mul1", ELL_VV, preP_PX, VV),
...c.call(prefix + "__mulBy024", ELL_0, VW, VV, F),
...c.setLocal("pCoef", c.i32_add(c.getLocal("pCoef"), c.i32_const(ateCoefSize))),
]
),
c.br_if(1, c.i32_eqz ( c.getLocal("i") )),
c.setLocal("i", c.i32_sub(c.getLocal("i"), c.i32_const(1))),
c.br(0)
))
);
if (isLoopNegative) {
f.addCode(
c.call(ftmPrefix + "_inverse", F, F),
);
}
f.addCode(
c.call(f2mPrefix + "_mul1", ELL_VW, preP_PY, VW),
c.call(f2mPrefix + "_mul1", ELL_VV, preP_PX, VV),
c.call(prefix + "__mulBy024", ELL_0, VW, VV, F),
c.setLocal("pCoef", c.i32_add(c.getLocal("pCoef"), c.i32_const(ateCoefSize))),
c.call(f2mPrefix + "_mul1", ELL_VW, preP_PY, VW),
c.call(f2mPrefix + "_mul1", ELL_VV, preP_PX, VV),
c.call(prefix + "__mulBy024", ELL_0, VW, VV, F),
c.setLocal("pCoef", c.i32_add(c.getLocal("pCoef"), c.i32_const(ateCoefSize))),
);
}
function buildFrobeniusMap(n) {
const F12 = [
[
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
],
[
[1n, 0n],
[8376118865763821496583973867626364092589906065868298776909617916018768340080n, 16469823323077808223889137241176536799009286646108169935659301613961712198316n],
[21888242871839275220042445260109153167277707414472061641714758635765020556617n, 0n],
[11697423496358154304825782922584725312912383441159505038794027105778954184319n, 303847389135065887422783454877609941456349188919719272345083954437860409601n],
[21888242871839275220042445260109153167277707414472061641714758635765020556616n, 0n],
[3321304630594332808241809054958361220322477375291206261884409189760185844239n, 5722266937896532885780051958958348231143373700109372999374820235121374419868n],
[21888242871839275222246405745257275088696311157297823662689037894645226208582n, 0n],
[13512124006075453725662431877630910996106405091429524885779419978626457868503n, 5418419548761466998357268504080738289687024511189653727029736280683514010267n],
[2203960485148121921418603742825762020974279258880205651966n, 0n],
[10190819375481120917420622822672549775783927716138318623895010788866272024264n, 21584395482704209334823622290379665147239961968378104390343953940207365798982n],
[2203960485148121921418603742825762020974279258880205651967n, 0n],
[18566938241244942414004596690298913868373833782006617400804628704885040364344n, 16165975933942742336466353786298926857552937457188450663314217659523851788715n],
]
];
const F6 = [
[
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
[1n, 0n],
],
[
[1n, 0n],
[21575463638280843010398324269430826099269044274347216827212613867836435027261n, 10307601595873709700152284273816112264069230130616436755625194854815875713954n],
[21888242871839275220042445260109153167277707414472061641714758635765020556616n, 0n],
[3772000881919853776433695186713858239009073593817195771773381919316419345261n, 2236595495967245188281701248203181795121068902605861227855261137820944008926n],
[2203960485148121921418603742825762020974279258880205651966n, 0n],
[18429021223477853657660792034369865839114504446431234726392080002137598044644n, 9344045779998320333812420223237981029506012124075525679208581902008406485703n],
],
[
[1n, 0n],
[2581911344467009335267311115468803099551665605076196740867805258568234346338n, 19937756971775647987995932169929341994314640652964949448313374472400716661030n],
[2203960485148121921418603742825762020974279258880205651966n, 0n],
[5324479202449903542726783395506214481928257762400643279780343368557297135718n, 16208900380737693084919495127334387981393726419856888799917914180988844123039n],
[21888242871839275220042445260109153167277707414472061641714758635765020556616n, 0n],
[13981852324922362344252311234282257507216387789820983642040889267519694726527n, 7629828391165209371577384193250820201684255241773809077146787135900891633097n],
]
];
const f = module.addFunction(prefix+ "__frobeniusMap"+n);
f.addParam("x", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
for (let i=0; i<6; i++) {
const X = (i==0) ? c.getLocal("x") : c.i32_add(c.getLocal("x"), c.i32_const(i*f2size));
const Xc0 = X;
const Xc1 = c.i32_add(c.getLocal("x"), c.i32_const(i*f2size + f1size));
const R = (i==0) ? c.getLocal("r") : c.i32_add(c.getLocal("r"), c.i32_const(i*f2size));
const Rc0 = R;
const Rc1 = c.i32_add(c.getLocal("r"), c.i32_const(i*f2size + f1size));
const coef = mul2(F12[Math.floor(i/3)][n%12] , F6[i%3][n%6]);
const pCoef = module.alloc([
...utils.bigInt2BytesLE(toMontgomery(coef[0]), 32),
...utils.bigInt2BytesLE(toMontgomery(coef[1]), 32),
]);
if (n%2 == 1) {
f.addCode(
c.call(f1mPrefix + "_copy", Xc0, Rc0),
c.call(f1mPrefix + "_neg", Xc1, Rc1),
c.call(f2mPrefix + "_mul", R, c.i32_const(pCoef), R),
);
} else {
f.addCode(c.call(f2mPrefix + "_mul", X, c.i32_const(pCoef), R));
}
}
function mul2(a, b) {
const ac0 = BigInt(a[0]);
const ac1 = BigInt(a[1]);
const bc0 = BigInt(b[0]);
const bc1 = BigInt(b[1]);
const res = [
(ac0 * bc0 - ( ac1 * bc1) ) % q,
(ac0 * bc1 + ( ac1 * bc0) ) % q,
];
if (isNegative(res[0])) res[0] = res[0] + q;
return res;
}
}
function buildFinalExponentiationFirstChunk() {
const f = module.addFunction(prefix+ "__finalExponentiationFirstChunk");
f.addParam("x", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
const elt = c.getLocal("x");
const eltC0 = elt;
const eltC1 = c.i32_add(elt, c.i32_const(n8*6));
const r = c.getLocal("r");
const pA = module.alloc(ftsize);
const A = c.i32_const(pA);
const Ac0 = A;
const Ac1 = c.i32_const(pA + n8*6);
const B = c.i32_const(module.alloc(ftsize));
const C = c.i32_const(module.alloc(ftsize));
const D = c.i32_const(module.alloc(ftsize));
f.addCode(
// const alt_bn128_Fq12 A = alt_bn128_Fq12(elt.c0,-elt.c1);
c.call(f6mPrefix + "_copy", eltC0, Ac0),
c.call(f6mPrefix + "_neg", eltC1, Ac1),
// const alt_bn128_Fq12 B = elt.inverse();
c.call(ftmPrefix + "_inverse", elt, B),
// const alt_bn128_Fq12 C = A * B;
c.call(ftmPrefix + "_mul", A, B, C),
// const alt_bn128_Fq12 D = C.Frobenius_map(2);
c.call(prefix + "__frobeniusMap2", C, D),
// const alt_bn128_Fq12 result = D * C;
c.call(ftmPrefix + "_mul", C, D, r),
);
}
function buildCyclotomicSquare() {
const f = module.addFunction(prefix+ "__cyclotomicSquare");
f.addParam("x", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
const x0 = c.getLocal("x");
const x4 = c.i32_add(c.getLocal("x"), c.i32_const(f2size));
const x3 = c.i32_add(c.getLocal("x"), c.i32_const(2*f2size));
const x2 = c.i32_add(c.getLocal("x"), c.i32_const(3*f2size));
const x1 = c.i32_add(c.getLocal("x"), c.i32_const(4*f2size));
const x5 = c.i32_add(c.getLocal("x"), c.i32_const(5*f2size));
const r0 = c.getLocal("r");
const r4 = c.i32_add(c.getLocal("r"), c.i32_const(f2size));
const r3 = c.i32_add(c.getLocal("r"), c.i32_const(2*f2size));
const r2 = c.i32_add(c.getLocal("r"), c.i32_const(3*f2size));
const r1 = c.i32_add(c.getLocal("r"), c.i32_const(4*f2size));
const r5 = c.i32_add(c.getLocal("r"), c.i32_const(5*f2size));
const t0 = c.i32_const(module.alloc(f2size));
const t1 = c.i32_const(module.alloc(f2size));
const t2 = c.i32_const(module.alloc(f2size));
const t3 = c.i32_const(module.alloc(f2size));
const t4 = c.i32_const(module.alloc(f2size));
const t5 = c.i32_const(module.alloc(f2size));
const tmp = c.i32_const(module.alloc(f2size));
const AUX = c.i32_const(module.alloc(f2size));
f.addCode(
// // t0 + t1*y = (z0 + z1*y)^2 = a^2
// tmp = z0 * z1;
// t0 = (z0 + z1) * (z0 + my_Fp6::non_residue * z1) - tmp - my_Fp6::non_residue * tmp;
// t1 = tmp + tmp;
c.call(f2mPrefix + "_mul", x0, x1, tmp),
c.call(f2mPrefix + "_mul", x1, c.i32_const(pNonResidueF6), t0),
c.call(f2mPrefix + "_add", x0, t0, t0),
c.call(f2mPrefix + "_add", x0, x1, AUX),
c.call(f2mPrefix + "_mul", AUX, t0, t0),
c.call(f2mPrefix + "_mul", c.i32_const(pNonResidueF6), tmp, AUX),
c.call(f2mPrefix + "_add", tmp, AUX, AUX),
c.call(f2mPrefix + "_sub", t0, AUX, t0),
c.call(f2mPrefix + "_add", tmp, tmp, t1),
// // t2 + t3*y = (z2 + z3*y)^2 = b^2
// tmp = z2 * z3;
// t2 = (z2 + z3) * (z2 + my_Fp6::non_residue * z3) - tmp - my_Fp6::non_residue * tmp;
// t3 = tmp + tmp;
c.call(f2mPrefix + "_mul", x2, x3, tmp),
c.call(f2mPrefix + "_mul", x3, c.i32_const(pNonResidueF6), t2),
c.call(f2mPrefix + "_add", x2, t2, t2),
c.call(f2mPrefix + "_add", x2, x3, AUX),
c.call(f2mPrefix + "_mul", AUX, t2, t2),
c.call(f2mPrefix + "_mul", c.i32_const(pNonResidueF6), tmp, AUX),
c.call(f2mPrefix + "_add", tmp, AUX, AUX),
c.call(f2mPrefix + "_sub", t2, AUX, t2),
c.call(f2mPrefix + "_add", tmp, tmp, t3),
// // t4 + t5*y = (z4 + z5*y)^2 = c^2
// tmp = z4 * z5;
// t4 = (z4 + z5) * (z4 + my_Fp6::non_residue * z5) - tmp - my_Fp6::non_residue * tmp;
// t5 = tmp + tmp;
c.call(f2mPrefix + "_mul", x4, x5, tmp),
c.call(f2mPrefix + "_mul", x5, c.i32_const(pNonResidueF6), t4),
c.call(f2mPrefix + "_add", x4, t4, t4),
c.call(f2mPrefix + "_add", x4, x5, AUX),
c.call(f2mPrefix + "_mul", AUX, t4, t4),
c.call(f2mPrefix + "_mul", c.i32_const(pNonResidueF6), tmp, AUX),
c.call(f2mPrefix + "_add", tmp, AUX, AUX),
c.call(f2mPrefix + "_sub", t4, AUX, t4),
c.call(f2mPrefix + "_add", tmp, tmp, t5),
// For A
// z0 = 3 * t0 - 2 * z0
c.call(f2mPrefix + "_sub", t0, x0, r0),
c.call(f2mPrefix + "_add", r0, r0, r0),
c.call(f2mPrefix + "_add", t0, r0, r0),
// z1 = 3 * t1 + 2 * z1
c.call(f2mPrefix + "_add", t1, x1, r1),
c.call(f2mPrefix + "_add", r1, r1, r1),
c.call(f2mPrefix + "_add", t1, r1, r1),
// For B
// z2 = 3 * (xi * t5) + 2 * z2
c.call(f2mPrefix + "_mul", t5, c.i32_const(pAltBn128Twist), AUX),
c.call(f2mPrefix + "_add", AUX, x2, r2),
c.call(f2mPrefix + "_add", r2, r2, r2),
c.call(f2mPrefix + "_add", AUX, r2, r2),
// z3 = 3 * t4 - 2 * z3
c.call(f2mPrefix + "_sub", t4, x3, r3),
c.call(f2mPrefix + "_add", r3, r3, r3),
c.call(f2mPrefix + "_add", t4, r3, r3),
// For C
// z4 = 3 * t2 - 2 * z4
c.call(f2mPrefix + "_sub", t2, x4, r4),
c.call(f2mPrefix + "_add", r4, r4, r4),
c.call(f2mPrefix + "_add", t2, r4, r4),
// z5 = 3 * t3 + 2 * z5
c.call(f2mPrefix + "_add", t3, x5, r5),
c.call(f2mPrefix + "_add", r5, r5, r5),
c.call(f2mPrefix + "_add", t3, r5, r5),
);
}
function buildCyclotomicExp(exponent, fnName) {
const exponentNafBytes = naf(exponent).map( (b) => (b==-1 ? 0xFF: b) );
const pExponentNafBytes = module.alloc(exponentNafBytes);
const f = module.addFunction(prefix+ "__cyclotomicExp_"+fnName);
f.addParam("x", "i32");
f.addParam("r", "i32");
f.addLocal("bit", "i32");
f.addLocal("i", "i32");
const c = f.getCodeBuilder();
const x = c.getLocal("x");
const res = c.getLocal("r");
const inverse = c.i32_const(module.alloc(ftsize));
f.addCode(
c.call(ftmPrefix + "_conjugate", x, inverse),
c.call(ftmPrefix + "_one", res),
c.if(
c.teeLocal("bit", c.i32_load8_s(c.i32_const(exponentNafBytes.length-1), pExponentNafBytes)),
c.if(
c.i32_eq(
c.getLocal("bit"),
c.i32_const(1)
),
c.call(ftmPrefix + "_mul", res, x, res),
c.call(ftmPrefix + "_mul", res, inverse, res),
)
),
c.setLocal("i", c.i32_const(exponentNafBytes.length-2)),
c.block(c.loop(
c.call(prefix + "__cyclotomicSquare", res, res),
c.if(
c.teeLocal("bit", c.i32_load8_s(c.getLocal("i"), pExponentNafBytes)),
c.if(
c.i32_eq(
c.getLocal("bit"),
c.i32_const(1)
),
c.call(ftmPrefix + "_mul", res, x, res),
c.call(ftmPrefix + "_mul", res, inverse, res),
)
),
c.br_if(1, c.i32_eqz ( c.getLocal("i") )),
c.setLocal("i", c.i32_sub(c.getLocal("i"), c.i32_const(1))),
c.br(0)
))
);
}
function buildFinalExponentiationLastChunk() {
buildCyclotomicSquare();
buildCyclotomicExp(finalExpZ, "w0");
const f = module.addFunction(prefix+ "__finalExponentiationLastChunk");
f.addParam("x", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
const elt = c.getLocal("x");
const result = c.getLocal("r");
const A = c.i32_const(module.alloc(ftsize));
const B = c.i32_const(module.alloc(ftsize));
const C = c.i32_const(module.alloc(ftsize));
const D = c.i32_const(module.alloc(ftsize));
const E = c.i32_const(module.alloc(ftsize));
const F = c.i32_const(module.alloc(ftsize));
const G = c.i32_const(module.alloc(ftsize));
const H = c.i32_const(module.alloc(ftsize));
const I = c.i32_const(module.alloc(ftsize));
const J = c.i32_const(module.alloc(ftsize));
const K = c.i32_const(module.alloc(ftsize));
const L = c.i32_const(module.alloc(ftsize));
const M = c.i32_const(module.alloc(ftsize));
const N = c.i32_const(module.alloc(ftsize));
const O = c.i32_const(module.alloc(ftsize));
const P = c.i32_const(module.alloc(ftsize));
const Q = c.i32_const(module.alloc(ftsize));
const R = c.i32_const(module.alloc(ftsize));
const S = c.i32_const(module.alloc(ftsize));
const T = c.i32_const(module.alloc(ftsize));
const U = c.i32_const(module.alloc(ftsize));
f.addCode(
// A = exp_by_neg_z(elt) // = elt^(-z)
c.call(prefix + "__cyclotomicExp_w0", elt, A),
finalExpIsNegative ? [] : c.call(ftmPrefix + "_conjugate", A, A),
// B = A^2 // = elt^(-2*z)
c.call(prefix + "__cyclotomicSquare", A, B),
// C = B^2 // = elt^(-4*z)
c.call(prefix + "__cyclotomicSquare", B, C),
// D = C * B // = elt^(-6*z)
c.call(ftmPrefix + "_mul", C, B, D),
// E = exp_by_neg_z(D) // = elt^(6*z^2)
c.call(prefix + "__cyclotomicExp_w0", D, E),
finalExpIsNegative ? [] : c.call(ftmPrefix + "_conjugate", E, E),
// F = E^2 // = elt^(12*z^2)
c.call(prefix + "__cyclotomicSquare", E, F),
// G = epx_by_neg_z(F) // = elt^(-12*z^3)
c.call(prefix + "__cyclotomicExp_w0", F, G),
finalExpIsNegative ? [] : c.call(ftmPrefix + "_conjugate", G, G),
// H = conj(D) // = elt^(6*z)
c.call(ftmPrefix + "_conjugate", D, H),
// I = conj(G) // = elt^(12*z^3)
c.call(ftmPrefix + "_conjugate", G, I),
// J = I * E // = elt^(12*z^3 + 6*z^2)
c.call(ftmPrefix + "_mul", I, E, J),
// K = J * H // = elt^(12*z^3 + 6*z^2 + 6*z)
c.call(ftmPrefix + "_mul", J, H, K),
// L = K * B // = elt^(12*z^3 + 6*z^2 + 4*z)
c.call(ftmPrefix + "_mul", K, B, L),
// M = K * E // = elt^(12*z^3 + 12*z^2 + 6*z)
c.call(ftmPrefix + "_mul", K, E, M),
// N = M * elt // = elt^(12*z^3 + 12*z^2 + 6*z + 1)
c.call(ftmPrefix + "_mul", M, elt, N),
// O = L.Frobenius_map(1) // = elt^(q*(12*z^3 + 6*z^2 + 4*z))
c.call(prefix + "__frobeniusMap1", L, O),
// P = O * N // = elt^(q*(12*z^3 + 6*z^2 + 4*z) * (12*z^3 + 12*z^2 + 6*z + 1))
c.call(ftmPrefix + "_mul", O, N, P),
// Q = K.Frobenius_map(2) // = elt^(q^2 * (12*z^3 + 6*z^2 + 6*z))
c.call(prefix + "__frobeniusMap2", K, Q),
// R = Q * P // = elt^(q^2 * (12*z^3 + 6*z^2 + 6*z) + q*(12*z^3 + 6*z^2 + 4*z) * (12*z^3 + 12*z^2 + 6*z + 1))
c.call(ftmPrefix + "_mul", Q, P, R),
// S = conj(elt) // = elt^(-1)
c.call(ftmPrefix + "_conjugate", elt, S),
// T = S * L // = elt^(12*z^3 + 6*z^2 + 4*z - 1)
c.call(ftmPrefix + "_mul", S, L, T),
// U = T.Frobenius_map(3) // = elt^(q^3(12*z^3 + 6*z^2 + 4*z - 1))
c.call(prefix + "__frobeniusMap3", T, U),
// V = U * R // = elt^(q^3(12*z^3 + 6*z^2 + 4*z - 1) + q^2 * (12*z^3 + 6*z^2 + 6*z) + q*(12*z^3 + 6*z^2 + 4*z) * (12*z^3 + 12*z^2 + 6*z + 1))
c.call(ftmPrefix + "_mul", U, R, result),
// result = V
);
}
function buildFinalExponentiation() {
buildFinalExponentiationFirstChunk();
buildFinalExponentiationLastChunk();
const f = module.addFunction(prefix+ "_finalExponentiation");
f.addParam("x", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
const elt = c.getLocal("x");
const result = c.getLocal("r");
const eltToFirstChunk = c.i32_const(module.alloc(ftsize));
f.addCode(
c.call(prefix + "__finalExponentiationFirstChunk", elt, eltToFirstChunk ),
c.call(prefix + "__finalExponentiationLastChunk", eltToFirstChunk, result )
);
}
function buildFinalExponentiationOld() {
const f = module.addFunction(prefix+ "_finalExponentiationOld");