UNPKG

espruino

Version:

Command Line Interface and library for Communications with Espruino JavaScript Microcontrollers

322 lines (307 loc) 12.9 kB
/** Copyright 2014 Gordon Williams (gw@pur3.co.uk) This Source Code is subject to the terms of the Mozilla Public License, v2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. ------------------------------------------------------------------ Pretokenise code before it uploads ------------------------------------------------------------------ **/ "use strict"; (function(){ if (typeof acorn == "undefined") { console.log("pretokenise: needs acorn, disabling."); return; } function init() { Espruino.Core.Config.add("PRETOKENISE", { section : "Minification", name : "Pretokenise code before upload", description : "All whitespace and comments are removed and all reserved words are converted to tokens before upload. This means a faster upload, less memory used, and increased performance (+10%) at the expense of code readability.", type : { 0: "Never", 1: "Auto (tokenise Strings on 2v20.48 or later)", 2: "Yes (always tokenise everything, regardless of version)" }, defaultValue : 0 }); // When code is sent to Espruino, search it for modules and add extra code required to load them Espruino.addProcessor("transformForEspruino", function(code, callback) { if (Espruino.Config.PRETOKENISE == 0) return callback(code); // disabled? if (Espruino.Config.SAVE_ON_SEND == 0) { console.log("pretokenise> Can't pretokenise code sent to REPL (RAM)"); return callback(code); } pretokenise(code, callback); }); // When code is sent to Espruino, search it for modules and add extra code required to load them Espruino.addProcessor("transformModuleForEspruino", function(module, callback) { if (Espruino.Config.PRETOKENISE == 0 || // disabled? Espruino.Config.MODULE_AS_FUNCTION) return callback(module); /* if MODULE_AS_FUNCTION is specified the module is uploaded inside a 'function' block, in which case it will be pretokenised anyway in a later step */ pretokenise(module.code, function(code) { module.code = code; callback(module); }); }); } var LEX_OPERATOR_START = 138; var TOKENS = [// plundered from jslex.c /* LEX_EQUAL : */ "==", /* LEX_TYPEEQUAL : */ "===", /* LEX_NEQUAL : */ "!=", /* LEX_NTYPEEQUAL : */ "!==", /* LEX_LEQUAL : */ "<=", /* LEX_LSHIFT : */ "<<", /* LEX_LSHIFTEQUAL : */ "<<=", /* LEX_GEQUAL : */ ">=", /* LEX_RSHIFT : */ ">>", /* LEX_RSHIFTUNSIGNED */ ">>>", /* LEX_RSHIFTEQUAL : */ ">>=", /* LEX_RSHIFTUNSIGNEDEQUAL */ ">>>=", /* LEX_PLUSEQUAL : */ "+=", /* LEX_MINUSEQUAL : */ "-=", /* LEX_PLUSPLUS : */ "++", /* LEX_MINUSMINUS */ "--", /* LEX_MULEQUAL : */ "*=", /* LEX_DIVEQUAL : */ "/=", /* LEX_MODEQUAL : */ "%=", /* LEX_ANDEQUAL : */ "&=", /* LEX_ANDAND : */ "&&", /* LEX_OREQUAL : */ "|=", /* LEX_OROR : */ "||", /* LEX_XOREQUAL : */ "^=", /* LEX_ARROW_FUNCTION */ "=>", // reserved words /*LEX_R_IF : */ "if", /*LEX_R_ELSE : */ "else", /*LEX_R_DO : */ "do", /*LEX_R_WHILE : */ "while", /*LEX_R_FOR : */ "for", /*LEX_R_BREAK : */ "break", /*LEX_R_CONTINUE */ "continue", /*LEX_R_FUNCTION */ "function", /*LEX_R_RETURN */ "return", /*LEX_R_VAR : */ "var", /*LEX_R_LET : */ "let", /*LEX_R_CONST : */ "const", /*LEX_R_THIS : */ "this", /*LEX_R_THROW : */ "throw", /*LEX_R_TRY : */ "try", /*LEX_R_CATCH : */ "catch", /*LEX_R_FINALLY : */ "finally", /*LEX_R_TRUE : */ "true", /*LEX_R_FALSE : */ "false", /*LEX_R_NULL : */ "null", /*LEX_R_UNDEFINED */ "undefined", /*LEX_R_NEW : */ "new", /*LEX_R_IN : */ "in", /*LEX_R_INSTANCEOF */ "instanceof", /*LEX_R_SWITCH */ "switch", /*LEX_R_CASE */ "case", /*LEX_R_DEFAULT */ "default", /*LEX_R_DELETE */ "delete", /*LEX_R_TYPEOF : */ "typeof", /*LEX_R_VOID : */ "void", /*LEX_R_DEBUGGER : */ "debugger", /*LEX_R_CLASS : */ "class", /*LEX_R_EXTENDS : */ "extends", /*LEX_R_SUPER : */ "super", /*LEX_R_STATIC : */ "static", /*LEX_R_OF : */ "of" ]; const LEX_RAW_STRING8 = 0xD1; const LEX_RAW_STRING16 = 0xD2; const LEX_RAW_INT0 = 0xD3; const LEX_RAW_INT8 = 0xD4; const LEX_RAW_INT16 = 0xD5; function pretokenise(code, callback) { callback(tokenise(code)); } function tokenise(code) { var pretokeniseStrings = false; // only works on 2v20.48 and later var pretokeniseInts = false; // only works on 2v25.396 and later var boardData = Espruino.Core.Env.getBoardData(); if (Espruino.Config.PRETOKENISE==2) { // force all options always pretokeniseStrings = true; pretokeniseInts = true; } else if (boardData && boardData.VERSION) { // Espruino.Config.PRETOKENISE is nonzero or we wouldn't be called var v = parseFloat(boardData.VERSION.replace("v","0")); if (v >= 2020.48) pretokeniseStrings = true; if (v >= 2026) pretokeniseInts = true; } var lex = (function() { let t = acorn.tokenizer(code, { ecmaVersion : 2020 }); return { next : function() { let tk = t.getToken(); let tkStr = code.substring(tk.start, tk.end), tkValue = tk.value, tkEnd = tk.end; if (tk.type.label=="eof") return undefined; let tp = "?"; if (tk.type.label=="`") { // template string // acorn splits these up into tokens, so we have to work through to the end, then just include the full text let tk2, hasTemplate = false, nesting=0; do { tk2 = t.getToken(); if (tk2.type.label=="${") { hasTemplate = true; nesting++; } if (tk2.type.label=="{") nesting++; if (tk2.type.label=="}") nesting--; } while (nesting>0 || tk2.type.label!="`"); tkEnd = tk2.end; tkStr = code.substring(tk.start, tkEnd); tp = hasTemplate ? "TEMPLATEDSTRING" : "STRING"; // if we don't have any templates, treat as a normal string (https://github.com/espruino/Espruino/issues/2577) tkValue = hasTemplate ? tkStr : eval(tkStr); // don't evaluate if it has templates as it must be done at runtime! } else if (tk.type.label=="string") tp="STRING"; else if (tk.type.label=="num") tp="NUMBER"; else if (tk.type.keyword || tk.type.label=="name") tp="ID"; else if (tp=="?" && tk.start+1==tk.end) tp="CHAR"; return { startIdx : tk.start, endIdx : tkEnd, str : tkStr, value : tkValue, type : tp }; }}; })(); var brackets = 0; var resultCode = ""; var lastIdx = 0; var lastTok = {str:""}, lastlastTok = {str:""}; var tok = lex.next(); while (tok!==undefined) { var previousString = code.substring(lastIdx, tok.startIdx); var tokenString = code.substring(tok.startIdx, tok.endIdx); var tokenId = LEX_OPERATOR_START + TOKENS.indexOf(tokenString); if (tokenId<LEX_OPERATOR_START) tokenId=undefined; // Workaround for https://github.com/espruino/Espruino/issues/1868 if (tokenString=="catch") tokenId=undefined; //console.log("prev "+JSON.stringify(previousString)+" next "+tokenString); if (tok.str=="(" || tok.str=="{" || tok.str=="[") brackets++; // TODO: check for eg. two IDs/similar which can't be merged without a space // preserve newlines at root scope to avoid us filling up the command buffer all at once if (brackets==0 && previousString.indexOf("\n")>=0) resultCode += "\n"; if (tok.str==")" || tok.str=="}" || tok.str=="]") brackets--; // if we have a token for something, use that - else use the string if (pretokeniseStrings && tok.type == "STRING" && tok.value.split("").reduce((r,ch) => r&&(ch.charCodeAt(0)<256), true)/*all 8 bit*/) { let str = tok.value; // get string value lastIdx = tok.endIdx; // get next token lastlastTok = lastTok; lastTok = tok; tok = lex.next(); let hadAtoB = resultCode.endsWith("atob(") && tok.str==")"; // were we surrounded by 'atob'? if (hadAtoB) { str = Espruino.Core.Utils.atob(str); resultCode = resultCode.substring(0, resultCode.length-5); // remove 'atob(' } let length = str.length; if (length==0) { // it's shorter just to write quotes resultCode += tokenString; } else if (length<256) resultCode += String.fromCharCode(LEX_RAW_STRING8, length) + str; else if (length<65536) resultCode += String.fromCharCode(LEX_RAW_STRING16, length&255, (length>>8)&255)+str; if (!hadAtoB) continue; // if not atob, we already got the last token ready } else if (pretokeniseInts && tok.type == "NUMBER") { let val = tok.value; // get string value if (val==Math.round(val)) { // ensure it's an integer // Was there a '-' in a place where it's not a subtraction? if (lastTok.str=="-" && [",","(",":","?","="].includes(lastlastTok.str)) { resultCode = resultCode.slice(0,-1); // remove - val = -val; // negate value } if (val==0) { // it's shorter just to write quotes resultCode += String.fromCharCode(LEX_RAW_INT0); } else if (val>=-128 && val<128) resultCode += String.fromCharCode(LEX_RAW_INT8, val&255); else if (val>=-32768 && val<32768) resultCode += String.fromCharCode(LEX_RAW_INT16, val&255, (val>>8)&255); else { if (val<0) resultCode += "-"; // re-add the '-' we took off resultCode += tokenString; } } else resultCode += tokenString; } else if (tokenId) { //console.log(JSON.stringify(tok.str)+" => "+tokenId); resultCode += String.fromCharCode(tokenId); tok.type = "TOKENISED"; } else { if ((tok.type=="ID" || tok.type=="NUMBER") && (lastTok.type=="ID" || lastTok.type=="NUMBER")) resultCode += " "; resultCode += tokenString; } // next lastIdx = tok.endIdx; lastlastTok = lastTok; lastTok = tok; tok = lex.next(); } return resultCode; } function isTokenised(code) { for (var i=0;i<code.length;i++) { var ch = code.charCodeAt(i); // check for chars out of range if (ch>=LEX_OPERATOR_START+TOKENS.length) return false; } return true; } function untokenise(code) { function needSpaceBetween(lastch, ch) { var chAlphaNum="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$0123456789"; return (lastch>=LEX_OPERATOR_START || ch>=LEX_OPERATOR_START) && (lastch>=LEX_OPERATOR_START || chAlphaNum.includes(String.fromCharCode(lastch))) && (ch>=LEX_OPERATOR_START || chAlphaNum.includes(String.fromCharCode(ch))); } var resultCode = ""; var lastCh = 0; for (var i=0;i<code.length;i++) { var ch = code.charCodeAt(i); if (needSpaceBetween(lastCh, ch)) resultCode += " "; if (ch>=LEX_OPERATOR_START) { if (ch==LEX_RAW_STRING8) { // decode raw strings let len = code.charCodeAt(i+1); resultCode += Espruino.Core.Utils.toJSONishString(code.substring(i+2, i+2+len)); i+=1+len; } else if (ch==LEX_RAW_STRING16) { let len = code.charCodeAt(i+1) | (code.charCodeAt(i+2)<<8); resultCode += Espruino.Core.Utils.toJSONishString(code.substring(i+3, i+3+len)); i+=2+len; } else if (ch==LEX_RAW_INT0) { // decode raw strings resultCode += "0"; } else if (ch==LEX_RAW_INT8) { // decode raw strings let val = code.charCodeAt(i+1); resultCode += val.toString(); i+=1; } else if (ch==LEX_RAW_INT16) { let val = code.charCodeAt(i+1) | (code.charCodeAt(i+2)<<8); resultCode += val.toString(); i+=2; } else if (ch<LEX_OPERATOR_START+TOKENS.length) // decoded other tokens resultCode += TOKENS[ch-LEX_OPERATOR_START]; else { console.warn("Unexpected pretokenised string code:", ch); resultCode += code[i]; } } else resultCode += code[i]; lastCh = ch; } return resultCode; } Espruino.Plugins.Pretokenise = { init : init, sortOrder : 100, // after most plugins, before saveOnSend isTokenised : isTokenised, // could the given data be tokenised JS? untokenise : untokenise, // fn(code) convert a file containing tokens back into strings tokenise : tokenise // fn(code) convert a file containing tokens back into strings }; }());