wasmcurves
Version:
elliptic curves implementations in wasm
1,265 lines (1,026 loc) • 61.4 kB
JavaScript
const utils = require("../utils");
const buildF1m =require("../build_f1m.js");
const buildF1 =require("../build_f1.js");
const buildF2m =require("../build_f2m.js");
const buildF3m =require("../build_f3m.js");
const buildCurve =require("../build_curve_projective.js");
const { bitLength, isOdd, modInv } = require("../bigint.js");
//
// Curve definition: https://coinlist.co/build/coda/pages/mnt6753
//
module.exports = function buildMNT6753(module, _prefix) {
const prefix = _prefix || "mnt6753";
if (module.modules[prefix]) return prefix; // already builded
const q = 41898490967918953402344214791240637128170709919953949071783502921025352812571106773058893763790338921418070971888458477323173057491593855069696241854796396165721416325350064441470418137846398469611935719059908164220784476160001n;
const n64 = Math.floor((bitLength(q - 1n) - 1)/64) +1;
const n8 = n64*8;
const f1size = n8;
const f2size = f1size * 2;
const f3size = f1size * 3;
const ftsize = f1size*6;
const f1mPrefix = buildF1m(module, q, "f1m");
buildF1(module, q, "f1", "f1m");
function toMontgomery(a) {
return BigInt(a) * ( 1n << BigInt(f1size*8)) % q;
}
function build_mulNR3() {
const f = module.addFunction(prefix + "_mulNR3");
f.addParam("x", "i32");
f.addParam("pr", "i32");
const c = f.getCodeBuilder();
const pNonResidueF3 = module.alloc(utils.bigInt2BytesLE( toMontgomery(11), 96 ));
f.addCode(
c.call(
f1mPrefix + "_mul",
c.i32_const(pNonResidueF3),
c.getLocal("x"),
c.getLocal("pr")
)
);
}
build_mulNR3();
const f2mPrefix = buildF2m(module, prefix + "_mulNR3", "f2m", f1mPrefix);
const f3mPrefix = buildF3m(module, prefix + "_mulNR3", "f3m", f1mPrefix);
function build_mulNR6() {
const f = module.addFunction(prefix + "_mulNR6");
f.addParam("x", "i32");
f.addParam("pr", "i32");
const c = f.getCodeBuilder();
const pNonResidueF3 = module.alloc(utils.bigInt2BytesLE( toMontgomery(11), 96 ));
f.addCode(
c.call(
f1mPrefix + "_mul",
c.i32_const(pNonResidueF3),
c.i32_add(c.getLocal("x"), c.i32_const(2*n8)),
c.getLocal("pr")
),
c.call(
f1mPrefix + "_copy",
c.getLocal("x"),
c.i32_add(c.getLocal("pr"), c.i32_const(n8)),
),
c.call(
f1mPrefix + "_copy",
c.i32_add(c.getLocal("x"), c.i32_const(n8)),
c.i32_add(c.getLocal("pr"), c.i32_const(2*n8)),
)
);
}
build_mulNR6();
const ftmPrefix = buildF2m(module, prefix + "_mulNR6", "ftm", f3mPrefix);
function build_mulByA1() {
const f = module.addFunction(prefix + "_mulByA1");
f.addParam("x", "i32");
f.addParam("pr", "i32");
const c = f.getCodeBuilder();
const pA = module.alloc(utils.bigInt2BytesLE( toMontgomery(11), 96 ));
f.addCode(
c.call(
f1mPrefix + "_mul",
c.i32_const(pA),
c.getLocal("x"),
c.getLocal("pr")
)
);
}
build_mulByA1();
const g1mPrefix = buildCurve(module, "g1m", f1mPrefix, prefix + "_mulByA1");
function build_mulByA2() {
const f = module.addFunction(prefix + "_mulByA2");
f.addParam("x", "i32");
f.addParam("pr", "i32");
const c = f.getCodeBuilder();
const pA = module.alloc(utils.bigInt2BytesLE( toMontgomery(11), 96 ));
const pANR = module.alloc(utils.bigInt2BytesLE( toMontgomery(11*11), 96 ));
// [ b*A*nr, c*A*nr, a*A]
f.addCode(
c.call(
f1mPrefix + "_mul",
c.i32_const(pANR),
c.i32_add(c.getLocal("x"), c.i32_const(n8)),
c.getLocal("pr")
),
c.call(
f1mPrefix + "_mul",
c.i32_const(pANR),
c.i32_add(c.getLocal("x"), c.i32_const(2*n8)),
c.i32_add(c.getLocal("pr"), c.i32_const(n8)),
),
c.call(
f1mPrefix + "_mul",
c.i32_const(pA),
c.getLocal("x"),
c.i32_add(c.getLocal("pr"), c.i32_const(2*n8)),
)
);
}
build_mulByA2();
const g2mPrefix = buildCurve(module, "g2m", f3mPrefix, prefix + "_mulByA2");
const G1gen = [
16364236387491689444759057944334173579070747473738339749093487337644739228935268157504218078126401066954815152892688541654726829424326599038522503517302466226143788988217410842672857564665527806044250003808514184274233938437290n,
4510127914410645922431074687553594593336087066778984214797709122300210966076979927285161950203037801392624582544098750667549188549761032654706830225743998064330900301346566408501390638273322467173741629353517809979540986561128n,
1n
];
const pG1gen = module.alloc(
[
...utils.bigInt2BytesLE( toMontgomery(G1gen[0]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G1gen[1]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G1gen[2]), 96 )
]
);
const G1zero = [
0n,
1n,
0n
];
const pG1zero = module.alloc(
[
...utils.bigInt2BytesLE( toMontgomery(G1zero[0]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G1zero[1]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G1zero[2]), 96 )
]
);
const G2gen = [
[
46538297238006280434045879335349383221210789488441126073640895239023832290080310125413049878152095926176013036314720850781686614265244307536450228450615346834324267478485994670716807428718518299710702671895190475661871557310n,
10329739935427016564561842963551883445915701424214177782911128765230271790215029185795830999583638744119368571742929964793955375930677178544873424392910884024986348059137449389533744851691082159233065444766899262771358355816328n,
19962817058174334691864015232062671736353756221485896034072814261894530786568591431279230352444205682361463997175937973249929732063490256813101714586199642571344378012210374327764059557816647980334733538226843692316285591005879n
],[
5648166377754359996653513138027891970842739892107427747585228022871109585680076240624013411622970109911154113378703562803827053335040877618934773712021441101121297691389632155906182656254145368668854360318258860716497525179898n,
26817850356025045630477313828875808893994935265863280918207940412617168254772789578700316551065949899971937475487458539503514034928974530432009759562975983077355912050606509147904958229398389093697494174311832813615564256810453n,
32332319709358578441696731586704495581796858962594701633932927358040566210788542624963749336109940335257143899293177116050031684054348958813290781394131284657165540476824211295508498842102093219808642563477603392470909217611033n
],[
1n,
0n,
0n
]
];
const pG2gen = module.alloc(
[
...utils.bigInt2BytesLE( toMontgomery(G2gen[0][0]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[0][1]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[0][2]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[1][0]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[1][1]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[1][2]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[2][0]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[2][1]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2gen[2][2]), 96 ),
]
);
const G2zero = [
[
0n,
0n,
0n,
],[
1n,
0n,
0n
],[
0n,
0n,
0n
]
];
const pG2zero = module.alloc(
[
...utils.bigInt2BytesLE( toMontgomery(G2zero[0][0]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[0][1]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[0][2]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[1][0]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[1][1]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[1][2]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[2][0]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[2][1]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(G2zero[2][2]), 96 ),
]
);
const r = 41898490967918953402344214791240637128170709919953949071783502921025352812571106773058893763790338921418070971888253786114353726529584385201591605722013126468931404347949840543007986327743462853720628051692141265303114721689601n;
const pr = module.alloc(utils.bigInt2BytesLE( r, 96 ));
const pOneT = module.alloc([
...utils.bigInt2BytesLE( toMontgomery(1), 96 ),
...utils.bigInt2BytesLE( toMontgomery(0), 96 ),
...utils.bigInt2BytesLE( toMontgomery(0), 96 ),
...utils.bigInt2BytesLE( toMontgomery(0), 96 ),
...utils.bigInt2BytesLE( toMontgomery(0), 96 ),
...utils.bigInt2BytesLE( toMontgomery(0), 96 )
]);
module.modules[prefix] = {
n64: n64,
pG1gen: pG1gen,
pG1zero: pG1zero,
pG2gen: pG2gen,
pG2zero: pG2zero,
pq: module.modules["f1m"].pq,
pr: pr,
pOneT: pOneT
};
//////////////
/// Pairing //
//////////////
const ateLoopCount = 204691208819330962009469868104636132783269696790011977400223898462431810102935615891307667367766898917669754470400n;
const ateLoopBitBytes = bits(ateLoopCount);
const pAteLoopBitBytes = module.alloc(ateLoopBitBytes);
const isLoopNegative = false;
function naf(n) {
let E = n;
const res = [];
while (E > 0n) {
if (isOdd(E)) {
const z = 2 - Number(E % 4n);
res.push( z );
E = E - BigInt(z);
} else {
res.push( 0 );
}
E = E >> 1n;
}
return res;
}
function bits(n) {
let E = n;
const res = [];
while (E > 0n) {
if (isOdd(E)) {
res.push( 1 );
} else {
res.push( 0 );
}
E = E >> 1n;
}
return res;
}
const prePSize = f1size * 2 + f3size*2;
module.modules[prefix].prePSize = prePSize;
function buildPrepareG1() {
const f = module.addFunction(prefix+ "_prepareG1");
f.addParam("pP", "i32");
f.addParam("ppreP", "i32");
const c = f.getCodeBuilder();
const PX = c.getLocal("pP");
const PY = c.i32_add( c.getLocal("pP"), c.i32_const(n8));
const PZ = c.i32_add( c.getLocal("pP"), c.i32_const(n8*2));
const cPX = c.i32_add( c.getLocal("ppreP"), c.i32_const(0));
const cPY = c.i32_add( c.getLocal("ppreP"), c.i32_const(n8));
const cPZ = c.i32_add( c.getLocal("ppreP"), c.i32_const(n8*2));
const cPXtwist_0 = c.i32_add( c.getLocal("ppreP"), c.i32_const(n8*2));
const cPXtwist_1 = c.i32_add( c.getLocal("ppreP"), c.i32_const(n8*3));
const cPXtwist_2 = c.i32_add( c.getLocal("ppreP"), c.i32_const(n8*4));
const cPYtwist_0 = c.i32_add( c.getLocal("ppreP"), c.i32_const(n8*5));
const cPYtwist_1 = c.i32_add( c.getLocal("ppreP"), c.i32_const(n8*6));
const cPYtwist_2 = c.i32_add( c.getLocal("ppreP"), c.i32_const(n8*7));
f.addCode(
c.call(f1mPrefix + "_copy", PX, cPX),
c.call(f1mPrefix + "_copy", PY, cPY),
c.call(f1mPrefix + "_copy", PZ, cPZ),
c.call(g1mPrefix + "_normalize", cPX, cPX),
c.call(f1mPrefix + "_zero", cPXtwist_0),
c.call(f1mPrefix + "_copy", cPX, cPXtwist_1),
c.call(f1mPrefix + "_zero", cPXtwist_2),
c.call(f1mPrefix + "_zero", cPYtwist_0),
c.call(f1mPrefix + "_copy", cPY, cPYtwist_1),
c.call(f1mPrefix + "_zero", cPYtwist_2),
);
}
/*
struct mnt6753_ate_G2_precomp {
mnt6753_Fq3 QX;
mnt6753_Fq3 QY;
mnt6753_Fq3 QY2;
mnt6753_Fq3 QX_over_twist;
mnt6753_Fq3 QY_over_twist;
std::vector<mnt6753_ate_dbl_coeffs> dbl_coeffs;
std::vector<mnt6753_ate_add_coeffs> add_coeffs;
bool operator==(const mnt6753_ate_G2_precomp &other) const;
friend std::ostream& operator<<(std::ostream &out, const mnt6753_ate_G2_precomp &prec_Q);
friend std::istream& operator>>(std::istream &in, mnt6753_ate_G2_precomp &prec_Q);
};
*/
const ateDblCoefSize = 4 * f3size;
const ateAddCoefSize = 2 * f3size;
const ateNDblCoefs = ateLoopBitBytes.length-1;
const ateNAddCoefs = ateLoopBitBytes.reduce((acc, b) => acc + ( b!=0 ? 1 : 0) ,0);
const preQSize = f3size * f3size + ateNDblCoefs*ateDblCoefSize + ateNAddCoefs*ateAddCoefSize;
module.modules[prefix].preQSize = preQSize;
function buildPrepareG2() {
const f = module.addFunction(prefix+ "_prepareG2");
f.addParam("pQ", "i32");
f.addParam("ppreQ", "i32");
f.addLocal("pDbl", "i32");
f.addLocal("pAdd", "i32");
f.addLocal("i", "i32");
const c = f.getCodeBuilder();
const QX = c.getLocal("pQ");
const QY = c.i32_add( c.getLocal("pQ"), c.i32_const(f3size));
const QZ = c.i32_add( c.getLocal("pQ"), c.i32_const(f3size*2));
const pR = module.alloc(f3size*4);
const R = c.i32_const(pR);
const RX = c.i32_const(pR);
const RY = c.i32_const(pR+f3size);
const RZ = c.i32_const(pR+2*f3size);
const RT = c.i32_const(pR+3*f3size);
const pTwistInv = module.alloc(f3size);
const cQX = c.i32_add( c.getLocal("ppreQ"), c.i32_const(0));
const cQY = c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size));
const cQZ = c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*2));
const cQY2 = c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*2));
const cQX_over_twist = c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*3));
const cQY_over_twist = c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*4));
f.addCode(
c.call(f3mPrefix + "_inverse", c.i32_const(pTwist), c.i32_const(pTwistInv)),
c.call(f3mPrefix + "_copy", QX, cQX),
c.call(f3mPrefix + "_copy", QY, cQY),
c.call(f3mPrefix + "_copy", QZ, cQZ),
c.call(g2mPrefix + "_normalize", cQX, cQX), // TODO Remove if already in affine
c.call(f3mPrefix + "_square", cQY, cQY2),
c.call(f3mPrefix + "_mul", cQX, c.i32_const(pTwistInv), cQX_over_twist),
c.call(f3mPrefix + "_mul", cQY, c.i32_const(pTwistInv), cQY_over_twist),
c.call(f3mPrefix + "_copy", cQX, RX),
c.call(f3mPrefix + "_copy", cQY, RY),
c.call(f3mPrefix + "_one", RZ),
c.call(f3mPrefix + "_one", RT),
);
f.addCode(
c.setLocal("pDbl", c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*5))),
c.setLocal("pAdd", c.i32_add( c.getLocal("pDbl"), c.i32_const(ateNDblCoefs*ateDblCoefSize))),
c.setLocal("i", c.i32_const(ateLoopBitBytes.length-2)),
c.block(c.loop(
c.call(prefix + "_prepDblStep", R, c.getLocal("pDbl")),
c.setLocal("pDbl", c.i32_add(c.getLocal("pDbl"), c.i32_const(ateDblCoefSize))),
c.if(
c.i32_load8_s(c.getLocal("i"), pAteLoopBitBytes),
[
...c.call(prefix + "_prepAddStep", cQX, cQY, cQY2, R, c.getLocal("pAdd")),
...c.setLocal("pAdd", c.i32_add(c.getLocal("pAdd"), c.i32_const(ateAddCoefSize))),
]
),
c.br_if(1, c.i32_eqz ( c.getLocal("i") )),
c.setLocal("i", c.i32_sub(c.getLocal("i"), c.i32_const(1))),
c.br(0)
))
);
if (isLoopNegative) {
const RZ_INV = c.i32_const(module.alloc(f3size));
const RZ_INV2 = c.i32_const(module.alloc(f3size));
const RZ_INV3 = c.i32_const(module.alloc(f3size));
const minus_R_affine_X = c.i32_const(module.alloc(f3size));
const minus_R_affine_Y = c.i32_const(module.alloc(f3size));
const minus_R_affine_Y2 = c.i32_const(module.alloc(f3size));
f.addCode(
c.call(f3mPrefix + "_inverse", RZ, RZ_INV),
c.call(f3mPrefix + "_square", RZ_INV, RZ_INV2),
c.call(f3mPrefix + "_mul", RZ_INV, RZ_INV2, RZ_INV3),
c.call(f3mPrefix + "_mul", RX, RZ_INV, minus_R_affine_X),
c.call(f3mPrefix + "_mul", RY, RZ_INV3, minus_R_affine_Y),
c.call(f3mPrefix + "_square", minus_R_affine_Y, minus_R_affine_Y2),
c.call(prefix + "_prepAddStep", minus_R_affine_X, minus_R_affine_Y, minus_R_affine_Y2, R, c.getLocal("pAdd")),
// c.setLocal("pAdd", c.i32_add(c.getLocal("pAdd"), c.i32_const(ateAddCoefSize))),
);
}
}
function buildPrepAddStep() {
const f = module.addFunction(prefix+ "_prepAddStep");
f.addParam("pQX", "i32");
f.addParam("pQY", "i32");
f.addParam("pQY2", "i32");
f.addParam("pR", "i32");
f.addParam("pAdd", "i32");
const c = f.getCodeBuilder();
const QX = c.getLocal("pQX");
const QY = c.getLocal("pQY");
const QY2 = c.getLocal("pQY2");
const RX = c.getLocal("pR");
const RY = c.i32_add(c.getLocal("pR"), c.i32_const(f3size));
const RZ = c.i32_add(c.getLocal("pR"), c.i32_const(2*f3size));
const RT = c.i32_add(c.getLocal("pR"), c.i32_const(3*f3size));
const AC_L1 = c.getLocal("pAdd");
const AC_RZ = c.i32_add(c.getLocal("pAdd"), c.i32_const(f3size));
const B = c.i32_const(module.alloc(f3size));
const D = c.i32_const(module.alloc(f3size));
const H = c.i32_const(module.alloc(f3size));
const I = c.i32_const(module.alloc(f3size));
const E = c.i32_const(module.alloc(f3size));
const J = c.i32_const(module.alloc(f3size));
const V = c.i32_const(module.alloc(f3size));
const AUX = c.i32_const(module.alloc(f3size));
f.addCode(
// B = x2 * T1
c.call(f3mPrefix + "_mul", QX, RT, B),
// D = ((y2 + Z1)^2 - y2squared - T1) * T1
c.call(f3mPrefix + "_add", QY, RZ, D),
c.call(f3mPrefix + "_square", D, D),
c.call(f3mPrefix + "_sub", D, QY2, D),
c.call(f3mPrefix + "_sub", D, RT, D),
c.call(f3mPrefix + "_mul", D, RT, D),
// H = B - X1
c.call(f3mPrefix + "_sub", B, RX, H),
// I = H^2
c.call(f3mPrefix + "_square", H, I),
// E = 4*I
c.call(f3mPrefix + "_add", I, I, E),
c.call(f3mPrefix + "_add", E, E, E),
// J = H * E
c.call(f3mPrefix + "_mul", H, E, J),
// V = X1 * E
c.call(f3mPrefix + "_mul", RX, E, V),
// L1 = D - 2 * Y1
c.call(f3mPrefix + "_add", RY, RY, AC_L1),
c.call(f3mPrefix + "_sub", D, AC_L1, AC_L1),
// X3 = L1^2 - J - 2*V
c.call(f3mPrefix + "_square", AC_L1, RX),
c.call(f3mPrefix + "_add", V, V, AUX),
c.call(f3mPrefix + "_add", AUX, J, AUX),
c.call(f3mPrefix + "_sub", RX, AUX, RX),
// Y3 = L1 * (V-X3) - 2*Y1 * J
c.call(f3mPrefix + "_add", RY, RY, AUX),
c.call(f3mPrefix + "_mul", AUX, J, AUX),
c.call(f3mPrefix + "_sub", V, RX, RY),
c.call(f3mPrefix + "_mul", AC_L1, RY, RY),
c.call(f3mPrefix + "_sub", RY, AUX, RY),
// Z3 = (Z1 + H)^2 - T1 - I
c.call(f3mPrefix + "_add", RZ, H, RZ),
c.call(f3mPrefix + "_square", RZ, RZ),
c.call(f3mPrefix + "_add", RT, I, AUX),
c.call(f3mPrefix + "_sub", RZ, AUX, RZ),
// T3 = Z3^2
c.call(f3mPrefix + "_square", RZ, RT),
c.call(f3mPrefix + "_copy", RZ, AC_RZ),
);
}
const TwistCoefA = [
0n,
0n,
11n
];
const pTwistCoefA = module.alloc(
[
...utils.bigInt2BytesLE( toMontgomery(TwistCoefA[0]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(TwistCoefA[1]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(TwistCoefA[2]), 96 ),
]
);
const Twist = [
0n,
1n,
0n
];
const pTwist = module.alloc(
[
...utils.bigInt2BytesLE( toMontgomery(Twist[0]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(Twist[1]), 96 ),
...utils.bigInt2BytesLE( toMontgomery(Twist[2]), 96 ),
]
);
function buildPrepDblStep() {
const f = module.addFunction(prefix+ "_prepDblStep");
f.addParam("pR", "i32");
f.addParam("pDbl", "i32");
const c = f.getCodeBuilder();
const DC_H = c.getLocal("pDbl");
const DC_4C = c.i32_add(c.getLocal("pDbl"), c.i32_const(f3size));
const DC_J = c.i32_add(c.getLocal("pDbl"), c.i32_const(2*f3size));
const DC_L = c.i32_add(c.getLocal("pDbl"), c.i32_const(3*f3size));
const RX = c.getLocal("pR");
const RY = c.i32_add(c.getLocal("pR"), c.i32_const(f3size));
const RZ = c.i32_add(c.getLocal("pR"), c.i32_const(2*f3size));
const RT = c.i32_add(c.getLocal("pR"), c.i32_const(3*f3size));
const A = c.i32_const(module.alloc(f3size));
const B = c.i32_const(module.alloc(f3size));
const C = c.i32_const(module.alloc(f3size));
const D = c.i32_const(module.alloc(f3size));
const E = c.i32_const(module.alloc(f3size));
const F = c.i32_const(module.alloc(f3size));
const G = c.i32_const(module.alloc(f3size));
const AUX = c.i32_const(module.alloc(f3size));
const X = c.i32_const(module.alloc(f3size));
const Y = c.i32_const(module.alloc(f3size));
const T = c.i32_const(module.alloc(f3size));
f.addCode(
// Save T, X, Y
c.call(f3mPrefix + "_copy", RX, X),
c.call(f3mPrefix + "_copy", RY, Y),
c.call(f3mPrefix + "_copy", RT, T),
// A = T1^2
c.call(f3mPrefix + "_square", RT, A),
// B = X1^2
c.call(f3mPrefix + "_square", RX, B),
// C = Y1^2
c.call(f3mPrefix + "_square", RY, C),
// D = C^2
c.call(f3mPrefix + "_square", C, D),
// E = (X1+C)^2-B-D
c.call(f3mPrefix + "_add", X, C, E),
c.call(f3mPrefix + "_square", E, E),
c.call(f3mPrefix + "_sub", E, B, E),
c.call(f3mPrefix + "_sub", E, D, E),
// F = 3*B + a *A
c.call(f3mPrefix + "_mul", c.i32_const(pTwistCoefA), A, F),
c.call(f3mPrefix + "_add", B, F, F),
c.call(f3mPrefix + "_add", B, F, F),
c.call(f3mPrefix + "_add", B, F, F),
// G = F^2
c.call(f3mPrefix + "_square", F, G),
// X3 = -4*E+G
c.call(f3mPrefix + "_add", E, E, RX),
c.call(f3mPrefix + "_add", RX, RX, RX),
c.call(f3mPrefix + "_sub", G, RX, RX),
// Y3 = -8*D + F*(2*E-X3)
c.call(f3mPrefix + "_add", E, E, RY),
c.call(f3mPrefix + "_sub", RY, RX, RY),
c.call(f3mPrefix + "_mul", RY, F, RY),
c.call(f3mPrefix + "_add", D, D, AUX),
c.call(f3mPrefix + "_add", AUX, AUX, AUX),
c.call(f3mPrefix + "_add", AUX, AUX, AUX),
c.call(f3mPrefix + "_sub", RY, AUX, RY),
// Z3 = (Y1+Z1)^2-C-Z1^2
c.call(f3mPrefix + "_add", Y , RZ, AUX),
c.call(f3mPrefix + "_square", AUX, AUX),
c.call(f3mPrefix + "_square", RZ, RZ),
c.call(f3mPrefix + "_add", RZ, C, RZ),
c.call(f3mPrefix + "_sub", AUX, RZ, RZ),
// T3 = Z3^2
c.call(f3mPrefix + "_square", RZ, RT),
// H = (Z3+T1)^2-T3-A
c.call(f3mPrefix + "_add", RZ, T, DC_H),
c.call(f3mPrefix + "_square", DC_H, DC_H),
c.call(f3mPrefix + "_sub", DC_H, RT, DC_H),
c.call(f3mPrefix + "_sub", DC_H, A, DC_H),
// fourC = 4*C
c.call(f3mPrefix + "_add", C, C, DC_4C),
c.call(f3mPrefix + "_add", DC_4C, DC_4C, DC_4C),
// J = (F+T1)^2-G-A
c.call(f3mPrefix + "_add", F, T, DC_J),
c.call(f3mPrefix + "_square", DC_J, DC_J),
c.call(f3mPrefix + "_sub", DC_J, G, DC_J),
c.call(f3mPrefix + "_sub", DC_J, A, DC_J),
// L = (F+X1)^2-G-B
c.call(f3mPrefix + "_add", F, X, DC_L),
c.call(f3mPrefix + "_square", DC_L, DC_L),
c.call(f3mPrefix + "_sub", DC_L, G, DC_L),
c.call(f3mPrefix + "_sub", DC_L, B, DC_L),
);
}
function buildMillerLoop() {
const f = module.addFunction(prefix+ "_millerLoop");
f.addParam("ppreP", "i32");
f.addParam("ppreQ", "i32");
f.addParam("r", "i32");
f.addLocal("pDbl", "i32");
f.addLocal("pAdd", "i32");
f.addLocal("i", "i32");
const c = f.getCodeBuilder();
const preP_PX = c.getLocal("ppreP");
const preP_PX_twist = c.i32_add(c.getLocal("ppreP"), c.i32_const(f1size*2));
const preP_PY_twist = c.i32_add(c.getLocal("ppreP"), c.i32_const(f1size*2 + f3size));
const preQ_QX_twist = c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*3));
const preQ_QY_twist = c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*4));
const pL1Coef = module.alloc(f3size);
const L1Coef = c.i32_const(pL1Coef);
const L1Coef_0 = c.i32_const(pL1Coef);
const L1Coef_1 = c.i32_const(pL1Coef+f1size);
const L1Coef_2 = c.i32_const(pL1Coef+2*f1size);
const pEV_at_P = module.alloc(ftsize);
const EV_at_P = c.i32_const(pEV_at_P);
const EV_at_P_0 = c.i32_const(pEV_at_P);
const EV_at_P_1 = c.i32_const(pEV_at_P+f3size);
const DC_H = c.getLocal("pDbl");
const DC_4C = c.i32_add(c.getLocal("pDbl"), c.i32_const(f3size));
const DC_J = c.i32_add(c.getLocal("pDbl"), c.i32_const(2*f3size));
const DC_L = c.i32_add(c.getLocal("pDbl"), c.i32_const(3*f3size));
const AC_L1 = c.getLocal("pAdd");
const AC_RZ = c.i32_add(c.getLocal("pAdd"), c.i32_const(f3size));
const F = c.getLocal("r");
const pAUX = module.alloc(f3size);
const AUX = c.i32_const(pAUX);
f.addCode(
c.call(f1mPrefix + "_copy", preP_PX, L1Coef_0),
c.call(f1mPrefix + "_zero", L1Coef_1),
c.call(f1mPrefix + "_zero", L1Coef_2),
c.call(f3mPrefix + "_sub", L1Coef, preQ_QX_twist, L1Coef),
c.call(ftmPrefix + "_one", F),
c.setLocal("pDbl", c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*5))),
c.setLocal("pAdd", c.i32_add( c.getLocal("pDbl"), c.i32_const(ateNDblCoefs*ateDblCoefSize))),
c.setLocal("i", c.i32_const(ateLoopBitBytes.length-2)),
c.block(c.loop(
c.call(f3mPrefix + "_mul", DC_J, preP_PX_twist, EV_at_P_0),
c.call(f3mPrefix + "_sub", DC_L, EV_at_P_0, EV_at_P_0),
c.call(f3mPrefix + "_sub", EV_at_P_0, DC_4C, EV_at_P_0),
c.call(f3mPrefix + "_mul", DC_H, preP_PY_twist, EV_at_P_1),
c.call(ftmPrefix + "_square", F, F),
c.call(ftmPrefix + "_mul", F, EV_at_P, F),
c.setLocal("pDbl", c.i32_add(c.getLocal("pDbl"), c.i32_const(ateDblCoefSize))),
c.if(
c.i32_load8_s(c.getLocal("i"), pAteLoopBitBytes),
[
...c.call(f3mPrefix + "_mul", AC_RZ, preP_PY_twist, EV_at_P_0),
...c.call(f3mPrefix + "_mul", AC_RZ, preQ_QY_twist, EV_at_P_1),
...c.call(f3mPrefix + "_mul", L1Coef, AC_L1, AUX),
...c.call(f3mPrefix + "_add", EV_at_P_1, AUX, EV_at_P_1),
...c.call(f3mPrefix + "_neg", EV_at_P_1, EV_at_P_1),
...c.call(ftmPrefix + "_mul", F, EV_at_P, F),
...c.setLocal("pAdd", c.i32_add(c.getLocal("pAdd"), c.i32_const(ateAddCoefSize))),
]
),
c.br_if(1, c.i32_eqz ( c.getLocal("i") )),
c.setLocal("i", c.i32_sub(c.getLocal("i"), c.i32_const(1))),
c.br(0)
))
);
if (isLoopNegative) {
f.addCode(
c.call(f3mPrefix + "_mul", AC_RZ, preP_PY_twist, EV_at_P_0),
c.call(f3mPrefix + "_mul", AC_RZ, preQ_QY_twist, EV_at_P_1),
c.call(f3mPrefix + "_mul", L1Coef, AC_L1, AUX),
c.call(f3mPrefix + "_add", EV_at_P_1, AUX, EV_at_P_1),
c.call(f3mPrefix + "_neg", EV_at_P_1, EV_at_P_1),
c.call(ftmPrefix + "_mul", F, EV_at_P, F),
/// Next line not needed because it's going to be the last one.
// c.setLocal("pAdd", c.i32_add(c.getLocal("pAdd"), c.i32_const(ateAddCoefSize))),
);
}
}
function buildComputeLineFunctions() {
const f = module.addFunction(prefix+ "_computeLineFunctions");
f.addParam("ppreP", "i32");
f.addParam("ppreQ", "i32");
f.addParam("r", "i32");
f.addLocal("pDbl", "i32");
f.addLocal("pAdd", "i32");
f.addLocal("i", "i32");
const c = f.getCodeBuilder();
const preP_PX = c.getLocal("ppreP");
const preP_PX_twist = c.i32_add(c.getLocal("ppreP"), c.i32_const(f1size*2));
const preP_PY_twist = c.i32_add(c.getLocal("ppreP"), c.i32_const(f1size*2 + f3size));
const preQ_QX_twist = c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*3));
const preQ_QY_twist = c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*4));
const pL1Coef = module.alloc(f3size);
const L1Coef = c.i32_const(pL1Coef);
const L1Coef_0 = c.i32_const(pL1Coef);
const L1Coef_1 = c.i32_const(pL1Coef+f1size);
const L1Coef_2 = c.i32_const(pL1Coef+2*f1size);
const pEV_at_P = module.alloc(ftsize);
const EV_at_P = c.i32_const(pEV_at_P);
const EV_at_P_0 = c.i32_const(pEV_at_P);
const EV_at_P_1 = c.i32_const(pEV_at_P+f3size);
const DC_H = c.getLocal("pDbl");
const DC_4C = c.i32_add(c.getLocal("pDbl"), c.i32_const(f3size));
const DC_J = c.i32_add(c.getLocal("pDbl"), c.i32_const(2*f3size));
const DC_L = c.i32_add(c.getLocal("pDbl"), c.i32_const(3*f3size));
const AC_L1 = c.getLocal("pAdd");
const AC_RZ = c.i32_add(c.getLocal("pAdd"), c.i32_const(f3size));
const pRes = c.getLocal("r");
const pAUX = module.alloc(f3size);
const AUX = c.i32_const(pAUX);
function getPResOffset() {
return c.i32_add( pRes, c.i32_mul( c.getLocal("i"), c.i32_const(ftsize)));
}
f.addCode(
c.call(f1mPrefix + "_copy", preP_PX, L1Coef_0),
c.call(f1mPrefix + "_zero", L1Coef_1),
c.call(f1mPrefix + "_zero", L1Coef_2),
c.call(f3mPrefix + "_sub", L1Coef, preQ_QX_twist, L1Coef),
c.setLocal("pDbl", c.i32_add( c.getLocal("ppreQ"), c.i32_const(f3size*5))),
c.setLocal("pAdd", c.i32_add( c.getLocal("pDbl"), c.i32_const(ateNDblCoefs*ateDblCoefSize))),
c.setLocal("i", c.i32_const(ateLoopBitBytes.length-2)),
c.block(c.loop(
c.call(f3mPrefix + "_mul", DC_J, preP_PX_twist, EV_at_P_0),
c.call(f3mPrefix + "_sub", DC_L, EV_at_P_0, EV_at_P_0),
c.call(f3mPrefix + "_sub", EV_at_P_0, DC_4C, EV_at_P_0),
c.call(f3mPrefix + "_mul", DC_H, preP_PY_twist, EV_at_P_1),
c.call(ftmPrefix + "_mul", getPResOffset(), EV_at_P, getPResOffset()),
c.setLocal("pDbl", c.i32_add(c.getLocal("pDbl"), c.i32_const(ateDblCoefSize))),
c.if(
c.i32_load8_s(c.getLocal("i"), pAteLoopBitBytes),
[
...c.call(f3mPrefix + "_mul", AC_RZ, preP_PY_twist, EV_at_P_0),
...c.call(f3mPrefix + "_mul", AC_RZ, preQ_QY_twist, EV_at_P_1),
...c.call(f3mPrefix + "_mul", L1Coef, AC_L1, AUX),
...c.call(f3mPrefix + "_add", EV_at_P_1, AUX, EV_at_P_1),
...c.call(f3mPrefix + "_neg", EV_at_P_1, EV_at_P_1),
...c.call(ftmPrefix + "_mul", getPResOffset(), EV_at_P, getPResOffset()),
...c.setLocal("pAdd", c.i32_add(c.getLocal("pAdd"), c.i32_const(ateAddCoefSize))),
]
),
c.br_if(1, c.i32_eqz ( c.getLocal("i") )),
c.setLocal("i", c.i32_sub(c.getLocal("i"), c.i32_const(1))),
c.br(0)
))
);
if (isLoopNegative) {
f.addCode(
c.setLocal("i", c.i32_const(ateLoopBitBytes.length-1)),
c.call(f3mPrefix + "_mul", AC_RZ, preP_PY_twist, EV_at_P_0),
c.call(f3mPrefix + "_mul", AC_RZ, preQ_QY_twist, EV_at_P_1),
c.call(f3mPrefix + "_mul", L1Coef, AC_L1, AUX),
c.call(f3mPrefix + "_add", EV_at_P_1, AUX, EV_at_P_1),
c.call(f3mPrefix + "_neg", EV_at_P_1, EV_at_P_1),
c.call(ftmPrefix + "_mul", getPResOffset(), EV_at_P, getPResOffset()),
/// Next line not needed because it's going to be the last one.
// c.setLocal("pAdd", c.i32_add(c.getLocal("pAdd"), c.i32_const(ateAddCoefSize))),
);
}
}
function buildFusedMillerLoop() {
const f = module.addFunction(prefix+ "_fusedMillerLoop");
f.addParam("l", "i32");
f.addParam("r", "i32");
f.addLocal("i", "i32");
const c = f.getCodeBuilder();
const l = c.getLocal("l");
const F = c.getLocal("r");
function getLineFunctionOffset() {
return c.i32_add( l, c.i32_mul( c.getLocal("i"), c.i32_const(ftsize)));
}
f.addCode(
c.call(ftmPrefix + "_one", F),
c.setLocal("i", c.i32_const(ateLoopBitBytes.length-2)),
c.block(c.loop(
c.call(ftmPrefix + "_square", F, F),
c.call(ftmPrefix + "_mul", F, getLineFunctionOffset(), F),
c.br_if(1, c.i32_eqz ( c.getLocal("i") )),
c.setLocal("i", c.i32_sub(c.getLocal("i"), c.i32_const(1))),
c.br(0)
))
);
if (isLoopNegative) {
f.addCode(
c.setLocal("i", c.i32_const(ateLoopBitBytes.length-1)),
c.call(ftmPrefix + "_mul", F, getLineFunctionOffset(), F),
/// Next line not needed because it's going to be the last one.
// c.setLocal("pAdd", c.i32_add(c.getLocal("pAdd"), c.i32_const(ateAddCoefSize))),
);
}
}
function buildCombineLineFunctions() {
const f = module.addFunction(prefix+ "_combineLineFunctions");
f.addParam("l", "i32");
f.addParam("r", "i32");
f.addParam("res", "i32");
f.addLocal("i", "i32");
const c = f.getCodeBuilder();
const l = c.getLocal("l");
const r = c.getLocal("r");
const pRes = c.getLocal("res");
function getLLineFunctionOffset() {
return c.i32_add( l, c.i32_mul( c.getLocal("i"), c.i32_const(ftsize)));
}
function getRLineFunctionOffset() {
return c.i32_add( r, c.i32_mul( c.getLocal("i"), c.i32_const(ftsize)));
}
function getPResLineFunctionOffset() {
return c.i32_add( pRes, c.i32_mul( c.getLocal("i"), c.i32_const(ftsize)));
}
f.addCode(
c.setLocal("i", c.i32_const(ateLoopBitBytes.length-2)),
c.block(c.loop(
c.call(ftmPrefix + "_mul", getLLineFunctionOffset(), getRLineFunctionOffset(), getPResLineFunctionOffset()),
c.br_if(1, c.i32_eqz ( c.getLocal("i") )),
c.setLocal("i", c.i32_sub(c.getLocal("i"), c.i32_const(1))),
c.br(0)
))
);
if (isLoopNegative) {
f.addCode(
c.setLocal("i", c.i32_const(ateLoopBitBytes.length-1)),
c.call(ftmPrefix + "_mul", getLLineFunctionOffset(), getRLineFunctionOffset(), getPResLineFunctionOffset()),
/// Next line not needed because it's going to be the last one.
// c.setLocal("pAdd", c.i32_add(c.getLocal("pAdd"), c.i32_const(ateAddCoefSize))),
);
}
}
function buildFrobeniusMap(n) {
const F6 = [
[
1n,
1n,
1n,
1n,
1n,
1n,
],
[
1n,
24129022407817241407134263419936114379815707076943508280977368156625538709102831814843582780138963119807143081677569721953561801075623741378629346409604471234573396989178424163772589090105392407118197799904755622897541183052133n,
24129022407817241407134263419936114379815707076943508280977368156625538709102831814843582780138963119807143081677569721953561801075623741378629346409604471234573396989178424163772589090105392407118197799904755622897541183052132n,
41898490967918953402344214791240637128170709919953949071783502921025352812571106773058893763790338921418070971888458477323173057491593855069696241854796396165721416325350064441470418137846398469611935719059908164220784476160000n,
17769468560101711995209951371304522748355002843010440790806134764399814103468274958215310983651375801610927890210888755369611256415970113691066895445191924931148019336171640277697829047741006062493737919155152541323243293107868n,
17769468560101711995209951371304522748355002843010440790806134764399814103468274958215310983651375801610927890210888755369611256415970113691066895445191924931148019336171640277697829047741006062493737919155152541323243293107869n,
]
];
const F3 = [
[
1n,
1n,
1n,
],
[
1n,
24129022407817241407134263419936114379815707076943508280977368156625538709102831814843582780138963119807143081677569721953561801075623741378629346409604471234573396989178424163772589090105392407118197799904755622897541183052132n,
17769468560101711995209951371304522748355002843010440790806134764399814103468274958215310983651375801610927890210888755369611256415970113691066895445191924931148019336171640277697829047741006062493737919155152541323243293107868n,
],
[
1n,
17769468560101711995209951371304522748355002843010440790806134764399814103468274958215310983651375801610927890210888755369611256415970113691066895445191924931148019336171640277697829047741006062493737919155152541323243293107868n,
24129022407817241407134263419936114379815707076943508280977368156625538709102831814843582780138963119807143081677569721953561801075623741378629346409604471234573396989178424163772589090105392407118197799904755622897541183052132n,
]
];
const f = module.addFunction(prefix+ "__frobeniusMap"+n);
f.addParam("x", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
for (let i=0; i<6; i++) {
const X = (i==0) ? c.getLocal("x") : c.i32_add(c.getLocal("x"), c.i32_const(i*f1size));
const R = (i==0) ? c.getLocal("r") : c.i32_add(c.getLocal("r"), c.i32_const(i*f1size));
const coef = F6[Math.floor(i/3)][n%6] * F3[i%3][n%3];
if (coef !== 1n) {
const pCoef = module.alloc(
utils.bigInt2BytesLE(
toMontgomery(coef),
96
)
);
f.addCode(c.call(f1mPrefix + "_mul", X, c.i32_const(pCoef), R));
} else {
f.addCode(c.call(f1mPrefix + "_copy", X, R));
}
}
}
function buildCyclotomicSquare() {
const f = module.addFunction(prefix+ "__cyclotomicSquare");
f.addParam("x", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
const C00 = c.getLocal("x");
const C01 = c.i32_add(c.getLocal("x"), c.i32_const(f1size));
const C02 = c.i32_add(c.getLocal("x"), c.i32_const(2*f1size));
const C10 = c.i32_add(c.getLocal("x"), c.i32_const(3*f1size));
const C11 = c.i32_add(c.getLocal("x"), c.i32_const(4*f1size));
const C12 = c.i32_add(c.getLocal("x"), c.i32_const(5*f1size));
const pA = module.alloc(f1size*2);
const A = c.i32_const(pA);
const A0 = c.i32_const(pA);
const A1 = c.i32_const(pA+f1size);
const pB = module.alloc(f1size*2);
const B = c.i32_const(pB);
const B0 = c.i32_const(pB);
const B1 = c.i32_const(pB+f1size);
const pC = module.alloc(f1size*2);
const C = c.i32_const(pC);
const C0 = c.i32_const(pC);
const C1 = c.i32_const(pC+f1size);
const pASQ = module.alloc(f2size);
const ASQ = c.i32_const(pASQ);
const ASQ0 = c.i32_const(pASQ);
const ASQ1 = c.i32_const(pASQ+f1size);
const pBSQ = module.alloc(f2size);
const BSQ = c.i32_const(pBSQ);
const BSQ0 = c.i32_const(pBSQ);
const BSQ1 = c.i32_const(pBSQ+f1size);
const pCSQ = module.alloc(f2size);
const CSQ = c.i32_const(pCSQ);
const CSQ0 = c.i32_const(pCSQ);
const CSQ1 = c.i32_const(pCSQ+f1size);
const Aa = c.getLocal("r");
const Ab = c.i32_add(c.getLocal("r"), c.i32_const(4*f1size));
const Ba = c.i32_add(c.getLocal("r"), c.i32_const(3*f1size));
const Bb = c.i32_add(c.getLocal("r"), c.i32_const(2*f1size));
const Ca = c.i32_add(c.getLocal("r"), c.i32_const(1*f1size));
const Cb = c.i32_add(c.getLocal("r"), c.i32_const(5*f1size));
const AUX = c.i32_const(module.alloc(f1size));
f.addCode(
c.call(f1mPrefix + "_copy", C00, A0),
c.call(f1mPrefix + "_copy", C11, A1),
c.call(f1mPrefix + "_copy", C10, B0),
c.call(f1mPrefix + "_copy", C02, B1),
c.call(f1mPrefix + "_copy", C01, C0),
c.call(f1mPrefix + "_copy", C12, C1),
c.call(f2mPrefix + "_square", A, ASQ),
c.call(f2mPrefix + "_square", B, BSQ),
c.call(f2mPrefix + "_square", C, CSQ),
// A = vector(3*a^2 - 2*Fp2([vector(a)[0],-vector(a)[1]]))
// Aa = 3 * ASQ0 - 2 * A0;
c.call(f1mPrefix + "_sub", ASQ0, A0, Aa),
c.call(f1mPrefix + "_add", Aa, Aa, Aa),
c.call(f1mPrefix + "_add", Aa, ASQ0, Aa),
// Ab = 3 * ASQ1 + 2 * A1;
c.call(f1mPrefix + "_add", ASQ1, A1, Ab),
c.call(f1mPrefix + "_add", Ab, Ab, Ab),
c.call(f1mPrefix + "_add", Ab, ASQ1, Ab),
// B = vector(3*Fp2([non_residue*c2[1],c2[0]]) + 2*Fp2([vector(b)[0],-vector(b)[1]]))
// Ba = 3 * non_residue * CSQ1 + 2 * B0;
c.call(prefix + "_mulNR3", CSQ1, AUX),
c.call(f1mPrefix + "_add", AUX, B0, Ba),
c.call(f1mPrefix + "_add", Ba, Ba, Ba),
c.call(f1mPrefix + "_add", Ba, AUX, Ba),
// Bb = 3*CSQ0 - 2 * B1;
c.call(f1mPrefix + "_sub", CSQ0, B1, Bb),
c.call(f1mPrefix + "_add", Bb, Bb, Bb),
c.call(f1mPrefix + "_add", Bb, CSQ0, Bb),
// C = vector(3*b^2 - 2*Fp2([vector(c)[0],-vector(c)[1]]))
// Ca = 3 * BSQ0 - 2 * C0;
c.call(f1mPrefix + "_sub", BSQ0, C0, Ca),
c.call(f1mPrefix + "_add", Ca, Ca, Ca),
c.call(f1mPrefix + "_add", Ca, BSQ0, Ca),
// Cb = 3 * BSQ1 + 2 * C1;
c.call(f1mPrefix + "_add", BSQ1, C1, Cb),
c.call(f1mPrefix + "_add", Cb, Cb, Cb),
c.call(f1mPrefix + "_add", Cb, BSQ1, Cb),
);
}
function buildCyclotomicExp(exponent, fnName) {
const exponentNafBytes = naf(exponent).map( (b) => (b==-1 ? 0xFF: b) );
const pExponentNafBytes = module.alloc(exponentNafBytes);
const f = module.addFunction(prefix+ "__cyclotomicExp_"+fnName);
f.addParam("x", "i32");
f.addParam("r", "i32");
f.addLocal("bit", "i32");
f.addLocal("i", "i32");
const c = f.getCodeBuilder();
const x = c.getLocal("x");
const x00 = c.getLocal("x");
const x01 = c.i32_add(c.getLocal("x"), c.i32_const(f1size));
const x02 = c.i32_add(c.getLocal("x"), c.i32_const(2*f1size));
const x10 = c.i32_add(c.getLocal("x"), c.i32_const(3*f1size));
const x11 = c.i32_add(c.getLocal("x"), c.i32_const(4*f1size));
const x12 = c.i32_add(c.getLocal("x"), c.i32_const(5*f1size));
const res = c.getLocal("r");
const pInverse = module.alloc(ftsize);
const inverse = c.i32_const(pInverse);
const inverse00 = c.i32_const(pInverse);
const inverse01 = c.i32_const(pInverse+f1size);
const inverse02 = c.i32_const(pInverse+2*f1size);
const inverse10 = c.i32_const(pInverse+3*f1size);
const inverse11 = c.i32_const(pInverse+4*f1size);
const inverse12 = c.i32_const(pInverse+5*f1size);
f.addCode(
c.call(f1mPrefix + "_copy", x00, inverse00),
c.call(f1mPrefix + "_copy", x01, inverse01),
c.call(f1mPrefix + "_copy", x02, inverse02),
c.call(f1mPrefix + "_neg", x10, inverse10),
c.call(f1mPrefix + "_neg", x11, inverse11),
c.call(f1mPrefix + "_neg", x12, inverse12),
c.call(ftmPrefix + "_one", res),
c.if(
c.teeLocal("bit", c.i32_load8_s(c.i32_const(exponentNafBytes.length-1), pExponentNafBytes)),
c.if(
c.i32_eq(
c.getLocal("bit"),
c.i32_const(1)
),
c.call(ftmPrefix + "_mul", res, x, res),
c.call(ftmPrefix + "_mul", res, inverse, res),
)
),
c.setLocal("i", c.i32_const(exponentNafBytes.length-2)),
c.block(c.loop(
c.call(prefix + "__cyclotomicSquare", res, res),
c.if(
c.teeLocal("bit", c.i32_load8_s(c.getLocal("i"), pExponentNafBytes)),
c.if(
c.i32_eq(
c.getLocal("bit"),
c.i32_const(1)
),
c.call(ftmPrefix + "_mul", res, x, res),
c.call(ftmPrefix + "_mul", res, inverse, res),
)
),
c.br_if(1, c.i32_eqz ( c.getLocal("i") )),
c.setLocal("i", c.i32_sub(c.getLocal("i"), c.i32_const(1))),
c.br(0)
))
);
}
function buildFinalExponentiationLastChunk() {
const MNT6753_final_exponent_last_chunk_abs_of_w0 = 204691208819330962009469868104636132783269696790011977400223898462431810102935615891307667367766898917669754470400n;
buildCyclotomicSquare();
buildCyclotomicExp(MNT6753_final_exponent_last_chunk_abs_of_w0, "w0");
const f = module.addFunction(prefix+ "__finalExponentiationLastChunk");
f.addParam("x", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
const elt = c.getLocal("x");
const result = c.getLocal("r");
const w1part = c.i32_const(module.alloc(ftsize));
const w0part = c.i32_const(module.alloc(ftsize));
f.addCode(
c.call(prefix + "__frobeniusMap1", elt, w1part),
c.call(prefix + "__cyclotomicExp_w0", elt, w0part),
c.call(ftmPrefix + "_mul", w1part, w0part, result),
);
}
function buildFinalExponentiationFirstChunk() {
const f = module.addFunction(prefix+ "__finalExponentiationFirstChunk");
f.addParam("x", "i32");
f.addParam("xi", "i32");
f.addParam("r", "i32");
const c = f.getCodeBuilder();
const elt = c.getLocal("x");
const eltInv = c.getLocal("xi");
const beta = c.getLocal("r");
const eltQ3 = c.i32_const(module.alloc(ftsize));
const eltQ3OverElt = c.i32_const(module.alloc(ftsize));
const alpha = c.i32_const(module.alloc(ftsize));
f.addCode(
c.call(prefix + "__frobeniusMap3", elt, elt