UNPKG

zs-extract

Version:
1 lines 11.6 kB
{"version":3,"sources":["extract.ts"],"names":["vm","url","cheerio","fetch","WINDOW","request","options","cb","response","statusCode","headers","encoding","res","method","compress","gzip","status","headersRaw","raw","headersObject","p","Object","keys","join","data","buffer","toString","then","err","requestP","req","r","Promise","resolve","reject","error","body","codeWindow","JSON","stringify","codeExtract","entries","map","a","extractScripts","html","$","load","each","_elI","el","push","extractScript","script","result","includes","ctxObj","create","Error","ctx","createContext","runOpts","timeout","codePre","codePost","dlbutton","runInContext","parse","extract","uri","requester","bodyType","scripts","download","filename","decodeURI","pathname","split","pop"],"mappings":"AAAA,OAAOA,EAAP,MAAe,IAAf;AACA,OAAOC,GAAP,MAAgB,KAAhB;AAEA,OAAOC,OAAP,MAAoB,SAApB;AACA,OAAOC,KAAP,MAAkB,YAAlB;AAEA,SAAQC,MAAR,QAAqB,YAArB;;AAsDA;;;;;;AAMA,SAASC,OAAT,CACCC,OADD,EAECC,EAFD,EAGE;AACD,MAAIC,QAA0B,GAAG;AAChCC,IAAAA,UAAU,EAAE,CADoB;AAEhCC,IAAAA,OAAO,EAAE;AAFuB,GAAjC;AAIA,QAAM;AAACC,IAAAA;AAAD,MAAaL,OAAnB;AACA,GAAC,YAAY;AACZ,UAAMM,GAAG,GAAG,MAAMT,KAAK,CAACG,OAAO,CAACL,GAAT,EAAc;AACpCY,MAAAA,MAAM,EAAEP,OAAO,CAACO,MAAR,IAAkB,KADU;AAEpCH,MAAAA,OAAO,EAAE;AACR,sBAAc,GADN;AAER,YAAIJ,OAAO,CAACI,OAAR,IAAmB,EAAvB;AAFQ,OAF2B;AAMpCI,MAAAA,QAAQ,EAAE,CAAC,CAACR,OAAO,CAACS;AANgB,KAAd,CAAvB;AAQA,UAAM;AAACC,MAAAA,MAAD;AAASN,MAAAA;AAAT,QAAoBE,GAA1B;AACA,UAAMK,UAAU,GAAGP,OAAO,CAACQ,GAAR,EAAnB;AACA,UAAMC,aAAsC,GAAG,EAA/C;;AACA,SAAK,MAAMC,CAAX,IAAgBC,MAAM,CAACC,IAAP,CAAYL,UAAZ,CAAhB,EAAyC;AACxCE,MAAAA,aAAa,CAACC,CAAD,CAAb,GAAmBH,UAAU,CAACG,CAAD,CAAV,CAAcG,IAAd,CAAmB,IAAnB,CAAnB;AACA;;AACDf,IAAAA,QAAQ,GAAG;AACVC,MAAAA,UAAU,EAAEO,MADF;AAEVN,MAAAA,OAAO,EAAES;AAFC,KAAX;AAIA,UAAMK,IAAI,GAAG,MAAMZ,GAAG,CAACa,MAAJ,EAAnB;AACA,WAAOd,QAAQ,KAAK,IAAb,GAAoBa,IAApB,GAA2BA,IAAI,CAACE,QAAL,CAAcf,QAAd,CAAlC;AACA,GArBD,IAsBEgB,IAtBF,CAuBEH,IAAI,IAAI;AACPjB,IAAAA,EAAE,CAAC,IAAD,EAAOC,QAAP,EAAiBgB,IAAjB,CAAF;AACA,GAzBH,EA0BEI,GAAG,IAAI;AACNrB,IAAAA,EAAE,CAACqB,GAAD,EAAMpB,QAAN,EAAgB,IAAhB,CAAF;AACA,GA5BH;AA8BA;AAED;;;;;;;;;AAOA,eAAeqB,QAAf,CACCC,GADD,EAECxB,OAFD,EAGE;AACD,QAAMyB,CAAC,GAAG,MAAM,IAAIC,OAAJ,CAWb,CAACC,OAAD,EAAUC,MAAV,KAAqB;AACvBJ,IAAAA,GAAG,CAACxB,OAAD,EAAU,CAAC6B,KAAD,EAAQ3B,QAAR,EAAkB4B,IAAlB,KAA2B;AACvC,UAAID,KAAJ,EAAW;AACVD,QAAAA,MAAM,CAACC,KAAD,CAAN;AACA;AACA;;AACDF,MAAAA,OAAO,CAAC;AACPzB,QAAAA,QADO;AAEP4B,QAAAA;AAFO,OAAD,CAAP;AAIA,KATE,CAAH;AAUA,GAtBe,CAAhB;AAuBA,SAAOL,CAAP;AACA;AAED;;;;;;;;AAMA,SAASM,UAAT,CAAoBD,IAApB,EAAkC;AACjC,SAAQ,IAAGhC,MAAO,UAASkC,IAAI,CAACC,SAAL,CAAeH,IAAf,CAAqB,GAAhD;AACA;AAED;;;;;;;;AAMA,SAASI,WAAT,CAAqBhB,IAArB,EAAkD;AACjD,QAAMY,IAAI,GAAGf,MAAM,CAACoB,OAAP,CAAejB,IAAf,EACXkB,GADW,CACPC,CAAC,IAAIA,CAAC,CAACpB,IAAF,CAAO,GAAP,CADE,EAEXA,IAFW,CAEN,GAFM,CAAb;AAGA,SAAQ,uBAAsBa,IAAK,KAAnC;AACA;AAED;;;;;;;;AAMA,SAASQ,cAAT,CAAwBC,IAAxB,EAAsC;AACrC,QAAMd,CAAW,GAAG,EAApB;AACA,QAAMe,CAAC,GAAG5C,OAAO,CAAC6C,IAAR,CAAaF,IAAb,CAAV;AACAC,EAAAA,CAAC,CAAC,QAAD,CAAD,CAAYE,IAAZ,CAAiB,CAACC,IAAD,EAAOC,EAAP,KAAc;AAC9B,UAAM1B,IAAI,GAAGsB,CAAC,CAACI,EAAD,CAAD,CAAML,IAAN,EAAb;;AACA,QAAIrB,IAAJ,EAAU;AACTO,MAAAA,CAAC,CAACoB,IAAF,CAAO3B,IAAP;AACA;AACD,GALD;AAMA,SAAOO,CAAP;AACA;AAED;;;;;;;;;AAOA,SAASqB,aAAT,CAAuBhB,IAAvB,EAAqCiB,MAArC,EAAqD;AACpD,MAAIC,MAAqB,GAAG,IAA5B;;AACA,MAAI,CAACD,MAAM,CAACE,QAAP,CAAgB,UAAhB,CAAL,EAAkC;AACjC,WAAOD,MAAP;AACA,GAJmD,CAMpD;AACA;AACA;;;AACA,QAAME,MAAM,GAAGnC,MAAM,CAACoC,MAAP,CAAc,IAAd,CAAf;;AACA,MAAID,MAAM,CAAC9B,QAAX,EAAqB;AACpB,UAAM,IAAIgC,KAAJ,CAAU,2CAAV,CAAN;AACA;;AACD,QAAMC,GAAG,GAAG3D,EAAE,CAAC4D,aAAH,CAAiBJ,MAAjB,CAAZ;AACA,QAAMK,OAAO,GAAG;AACfC,IAAAA,OAAO,EAAE;AADM,GAAhB,CAdoD,CAkBpD;;AACA,QAAMC,OAAO,GAAG1B,UAAU,CAACD,IAAD,CAA1B,CAnBoD,CAqBpD;;AACA,QAAM4B,QAAQ,GAAGxB,WAAW,CAC3B;AACCyB,IAAAA,QAAQ,EAAE;AADX,GAD2B,CAA5B,CAtBoD,CA4BpD;;AACA,MAAI;AACH;AACAjE,IAAAA,EAAE,CAACkE,YAAH,CAAgBH,OAAhB,EAAyBJ,GAAzB,EAA8BE,OAA9B,EAFG,CAIH;;AACA7D,IAAAA,EAAE,CAACkE,YAAH,CAAgBb,MAAhB,EAAwBM,GAAxB,EAA6BE,OAA7B,EALG,CAOH;AACA;AACA;AACA;;AACAP,IAAAA,MAAM,GAAGhB,IAAI,CAAC6B,KAAL,CAAW,KAAKnE,EAAE,CAACkE,YAAH,CAAgBF,QAAhB,EAA0BL,GAA1B,EAA+BE,OAA/B,CAAhB,CAAT;AACA,GAZD,CAaA,OAAOjC,GAAP,EAAY,CACX;AACA;;AACD,SAAO0B,MAAP;AACA;AAED;;;;;;;;;AAOA,OAAO,eAAec,OAAf,CACNC,GADM,EAENvC,GAAoB,GAAG,IAFjB,EAGL;AACD,QAAMwC,SAAS,GAAGxC,GAAG,IAAKzB,OAA1B;AACA,QAAM;AAACG,IAAAA,QAAD;AAAW4B,IAAAA;AAAX,MAAmB,MAAMP,QAAQ,CAACyC,SAAD,EAAY;AAClDrE,IAAAA,GAAG,EAAEoE,GAD6C;AAElDtD,IAAAA,IAAI,EAAE;AAF4C,GAAZ,CAAvC;AAIA,QAAM;AAACN,IAAAA;AAAD,MAAeD,QAArB;;AACA,MAAIC,UAAU,KAAK,GAAnB,EAAwB;AACvB,UAAM,IAAIiD,KAAJ,CAAW,wBAAuBjD,UAAW,EAA7C,CAAN;AACA;;AACD,QAAM8D,QAAQ,GAAG,OAAOnC,IAAxB;;AACA,MAAImC,QAAQ,KAAK,QAAjB,EAA2B;AAC1B,UAAM,IAAIb,KAAJ,CAAW,sBAAqBa,QAAS,EAAzC,CAAN;AACA;;AAED,QAAMC,OAAO,GAAG5B,cAAc,CAACR,IAAD,CAA9B;AACA,MAAIkB,MAAkB,GAAG,IAAzB;;AACA,OAAK,MAAMD,MAAX,IAAqBmB,OAArB,EAA8B;AAC7BlB,IAAAA,MAAM,GAAGF,aAAa,CAAChB,IAAD,EAAOiB,MAAP,CAAtB;;AACA,QAAIC,MAAJ,EAAY;AACX;AACA;AACD;;AACD,MAAI,CAACA,MAAD,IAAW,CAACA,MAAM,CAACW,QAAvB,EAAiC;AAChC,UAAM,IAAIP,KAAJ,CAAU,wBAAV,CAAN;AACA;;AAED,QAAMe,QAAQ,GAAGxE,GAAG,CAACgC,OAAJ,CAAYoC,GAAZ,EAAiBf,MAAM,CAACW,QAAxB,CAAjB;AACA,QAAMS,QAAQ,GAAGC,SAAS,CACzB,CAAC1E,GAAG,CAACkE,KAAJ,CAAUM,QAAV,EAAoBG,QAApB,IAAgC,EAAjC,EAAqCC,KAArC,CAA2C,GAA3C,EAAgDC,GAAhD,MAAyD,EADhC,CAAT,IAEZ,IAFL;AAIA,SAAO;AACNL,IAAAA,QADM;AAENC,IAAAA;AAFM,GAAP;AAIA","sourcesContent":["import vm from 'vm';\nimport url from 'url';\n\nimport cheerio from 'cheerio';\nimport fetch from 'node-fetch';\n\nimport {WINDOW} from './data';\n\nexport interface IRequestOptions {\n\n\t/**\n\t * URL string.\n\t */\n\turl: string;\n\n\t/**\n\t * Request method.\n\t */\n\tmethod?: string;\n\n\t/**\n\t * Request headers.\n\t */\n\theaders?: {[key: string]: string};\n\n\t/**\n\t * Gzip compression.\n\t */\n\tgzip?: boolean;\n\n\t/**\n\t * Body encoding used for callback functions.\n\t */\n\tencoding?: string | null;\n}\n\nexport interface IRequestResponse {\n\n\t/**\n\t * Status code.\n\t */\n\tstatusCode: number;\n\n\t/**\n\t * Response headers, all lowercase.\n\t */\n\theaders: {[key: string]: string};\n}\n\nexport type IRequestCallback = (\n\terror: any,\n\tresponse: IRequestResponse,\n\tbody: any\n) => void;\n\nexport type IRequest = (\n\toptions: IRequestOptions,\n\tcb?: IRequestCallback\n) => any;\n\n/**\n * The default request implementation.\n *\n * @param options Options object.\n * @param cb Callback function.\n */\nfunction request(\n\toptions: IRequestOptions,\n\tcb: IRequestCallback\n) {\n\tlet response: IRequestResponse = {\n\t\tstatusCode: 0,\n\t\theaders: {}\n\t};\n\tconst {encoding} = options;\n\t(async () => {\n\t\tconst res = await fetch(options.url, {\n\t\t\tmethod: options.method || 'GET',\n\t\t\theaders: {\n\t\t\t\t'User-Agent': '-',\n\t\t\t\t...(options.headers || {})\n\t\t\t},\n\t\t\tcompress: !!options.gzip\n\t\t});\n\t\tconst {status, headers} = res;\n\t\tconst headersRaw = headers.raw();\n\t\tconst headersObject: {[key: string]: string} = {};\n\t\tfor (const p of Object.keys(headersRaw)) {\n\t\t\theadersObject[p] = headersRaw[p].join(', ');\n\t\t}\n\t\tresponse = {\n\t\t\tstatusCode: status,\n\t\t\theaders: headersObject\n\t\t};\n\t\tconst data = await res.buffer();\n\t\treturn encoding === null ? data : data.toString(encoding as any);\n\t})()\n\t\t.then(\n\t\t\tdata => {\n\t\t\t\tcb(null, response, data);\n\t\t\t},\n\t\t\terr => {\n\t\t\t\tcb(err, response, null);\n\t\t\t}\n\t\t);\n}\n\n/**\n * A request promise wrapper.\n *\n * @param req Request function.\n * @param options Request options.\n * @returns Request response and body.\n */\nasync function requestP(\n\treq: IRequest,\n\toptions: IRequestOptions\n) {\n\tconst r = await new Promise<{\n\n\t\t/**\n\t\t * Response object.\n\t\t */\n\t\tresponse: IRequestResponse;\n\n\t\t/**\n\t\t * Response body.\n\t\t */\n\t\tbody: any;\n\t}>((resolve, reject) => {\n\t\treq(options, (error, response, body) => {\n\t\t\tif (error) {\n\t\t\t\treject(error);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tresolve({\n\t\t\t\tresponse,\n\t\t\t\tbody\n\t\t\t});\n\t\t});\n\t});\n\treturn r;\n}\n\n/**\n * Code to create window.\n *\n * @param body HTML body.\n * @returns JavaScript code.\n */\nfunction codeWindow(body: string) {\n\treturn `(${WINDOW})(this,${JSON.stringify(body)})`;\n}\n\n/**\n * Code to extract data from window.\n *\n * @param data Data object.\n * @returns JavaScript code.\n */\nfunction codeExtract(data: {[k: string]: string}) {\n\tconst body = Object.entries(data)\n\t\t.map(a => a.join(':'))\n\t\t.join(',');\n\treturn `(\"\"+JSON.stringify({${body}}))`;\n}\n\n/**\n * Extract script code from HTML code.\n *\n * @param html HTML code.\n * @returns Script code.\n */\nfunction extractScripts(html: string) {\n\tconst r: string[] = [];\n\tconst $ = cheerio.load(html);\n\t$('script').each((_elI, el) => {\n\t\tconst data = $(el).html();\n\t\tif (data) {\n\t\t\tr.push(data);\n\t\t}\n\t});\n\treturn r;\n}\n\n/**\n * Attempt to extract info from script.\n *\n * @param body HTML body.\n * @param script Script code.\n * @returns Result object or null.\n */\nfunction extractScript(body: string, script: string) {\n\tlet result: object | null = null;\n\tif (!script.includes('dlbutton')) {\n\t\treturn result;\n\t}\n\n\t// Create a context with wich to run code in\n\t// Creating the object with a null prototype is very important.\n\t// Prevents host variables from leaking into the sanbox.\n\tconst ctxObj = Object.create(null);\n\tif (ctxObj.toString) {\n\t\tthrow new Error('Failed to create object without prototype');\n\t}\n\tconst ctx = vm.createContext(ctxObj);\n\tconst runOpts = {\n\t\ttimeout: 1000\n\t};\n\n\t// Setup environment.\n\tconst codePre = codeWindow(body);\n\n\t// Extract info from environment.\n\tconst codePost = codeExtract(\n\t\t{\n\t\t\tdlbutton: 'document.getElementById(\"dlbutton\").href'\n\t\t}\n\t);\n\n\t// Attempt to run code in sanbox and extract the info.\n\ttry {\n\t\t// Run the pre script.\n\t\tvm.runInContext(codePre, ctx, runOpts);\n\n\t\t// Run the script code.\n\t\tvm.runInContext(script, ctx, runOpts);\n\n\t\t// Run the post script.\n\t\t// Force return value to be string, with concatenation, NOT casting.\n\t\t// This prevents any funny business from sandboxed code.\n\t\t// eslint-disable-next-line\n\t\tresult = JSON.parse('' + vm.runInContext(codePost, ctx, runOpts));\n\t}\n\tcatch (err) {\n\t\t// Ignore failure.\n\t}\n\treturn result;\n}\n\n/**\n * Extract file info from a URL.\n *\n * @param uri The URI to extract info from.\n * @param req Optional custom request function or null.\n * @returns File info.\n */\nexport async function extract(\n\turi: string,\n\treq: IRequest | null = null\n) {\n\tconst requester = req || (request as IRequest);\n\tconst {response, body} = await requestP(requester, {\n\t\turl: uri,\n\t\tgzip: true\n\t});\n\tconst {statusCode} = response;\n\tif (statusCode !== 200) {\n\t\tthrow new Error(`Invalid status code: ${statusCode}`);\n\t}\n\tconst bodyType = typeof body;\n\tif (bodyType !== 'string') {\n\t\tthrow new Error(`Invalid body type: ${bodyType}`);\n\t}\n\n\tconst scripts = extractScripts(body);\n\tlet result: any | null = null;\n\tfor (const script of scripts) {\n\t\tresult = extractScript(body, script);\n\t\tif (result) {\n\t\t\tbreak;\n\t\t}\n\t}\n\tif (!result || !result.dlbutton) {\n\t\tthrow new Error('Failed to extract info');\n\t}\n\n\tconst download = url.resolve(uri, result.dlbutton);\n\tconst filename = decodeURI(\n\t\t(url.parse(download).pathname || '').split('/').pop() || ''\n\t) || null;\n\n\treturn {\n\t\tdownload,\n\t\tfilename\n\t};\n}\n"],"file":"extract.mjs","sourceRoot":"../src"}