webhead
Version:
An easy-to-use Node web crawler storing cookies, following redirects, traversing pages and submitting forms.
3 lines (2 loc) • 4.01 kB
JavaScript
import e from"cheerio";import r from"form-data";import t from"fs-extra";import o from"node-fetch";import n from"jquery-param";import a from"tough-cookie";function s(){return(s=Object.assign||function(e){for(var r=1;r<arguments.length;r++){var t=arguments[r];for(var o in t)Object.prototype.hasOwnProperty.call(t,o)&&(e[o]=t[o])}return e}).apply(this,arguments)}var i=a.CookieJar;export default function(a){var c,u,l,d=a||{},p=d.jarFile,f=d.userAgent,h=d.verbose,m=d.beforeSend,v=d.complete,y={},j={},C=function e(r,t,o){try{var n={method:r.toUpperCase(),url:g(t),options:S(o)};return m&&((n=m(n,j)).method=n.method.toUpperCase(),n.url=g(n.url),n.options=S(n.options)),Promise.resolve(P(n)).then(function(r){var t=r.response,o=r.redirect;return o?e(o.method,o.url,o.options):(y.url=n.url,y.cookie=w(y.url.href),y.response=t,v&&v(n,j,y),t)})}catch(e){return Promise.reject(e)}},g=function(e){return e.constructor==URL&&(e=e.href),new URL(e,y.url)},S=function(e){return e||(e={}),e.headers=k(e.headers),e},k=function(e){return e?Object.entries(e).reduce(function(e,r){var t=r[0],o=r[1];return"set-cookie"!=t.toLowerCase()&&Array.isArray(o)&&(o=o.join("; ")),e[t.replace(/\b./g,function(e){return e.toUpperCase()})]=o,e},{}):{}},P=function(e){var a=e.method,i=e.url,c=e.options;try{var u=c.headers,l=c.data,d=c.multiPartData,p=c.json,m=w(i.href),v={method:a,headers:Object.assign({},u),redirect:"manual"};if(v.headers.Host=i.host,i=i.href,m.length&&(v.headers.Cookie=m),!v.headers["User-Agent"]&&f&&(v.headers["User-Agent"]=f),l)"GET"==a?i+=(i.match(/\?/)?"&":"?")+n(l):(v.headers["Content-Type"]||(v.headers["Content-Type"]="application/x-www-form-urlencoded"),v.body=n(l));else if(d){var y=new r;(d||[]).forEach(function(e){y.append(e.name,e.file?t.createReadStream(e.file):e.hasOwnProperty("value")?e.value:e.contents)}),v.body=y,v.headers=s({},v.headers,y.getHeaders())}return p&&(v.headers["Content-Type"]="application/json",v.body=JSON.stringify(p)),h&&console.log(a,i,v),Promise.resolve(o(i,s({method:a},v))).then(function(e){return Promise.resolve(b(a,i,c,e))})}catch(e){return Promise.reject(e)}},b=function(e,r,o,n){try{var a=n.status;return Promise.resolve(n.text()).then(function(s){var i,d=k(n.headers.raw());if(h&&console.log({statusCode:a,data:s,headers:d}),d["Set-Cookie"]){var f=O(r);if(d["Set-Cookie"].forEach(function(e){c.setCookieSync(e,f)}),p){var m=c.toJSON().cookies,v={};t.pathExistsSync(p)&&(v=t.readJsonSync(p)),v.constructor==Object?v.cookies=m:v=m,t.writeFileSync(p,JSON.stringify(v,null,2))}}return/^3/.test(""+a)&&(i={method:e,url:d.Location,options:o},a<=303&&(i.method="GET",delete i.options.data)),u=void 0,l=void 0,{response:{statusCode:a,data:s,headers:d},redirect:i}})}catch(e){return Promise.reject(e)}},O=function(e){return e.replace(/\?.*/,"")},w=function(e){return c.getCookiesSync(O(e)).join("; ")};if("get post put patch delete head options".split(" ").forEach(function(e){y[e]=function(){try{var r=arguments;return Promise.resolve(C.apply(void 0,[e].concat([].slice.call(r))))}catch(e){return Promise.reject(e)}}}),y.text=function(){return y.response?y.response.data:""},y.json=function(){if(!l&&y.response){var e=y.response,r=e.data;r&&(""+e.headers["Content-Type"]).match("json")&&(l=JSON.parse(r))}return l},y.$=function(){if(!u&&y.response){var r=y.response,t=r.data,o=r.headers,n=(""+o["Content-Type"]).match(/(html|xml)/);n&&(u=e.load(t,{xmlMode:"xml"==n[1]}))}return u?u.apply(void 0,[].slice.call(arguments)):[]},y.submit=function(e,r,t){try{var o=y.$(e);return Promise.resolve(function(){if(o.length){var e=o.attr("action"),n=o.attr("method")||"GET";return r=Object.assign(o.serializeArray().reduce(function(e,r){return e[r.name]=r.value,e},{}),r||{}),Promise.resolve(C(n,e,s({},t,{data:r})))}}())}catch(e){return Promise.reject(e)}},y.clearCookies=function(){c.removeAllCookiesSync()},t.pathExistsSync(p)){var x=t.readJsonSync(p),A=x.cookies||x;c=i.fromJSON({cookies:A.constructor==Array?A:[]})}else c=new i;return y}
//# sourceMappingURL=webhead.module.js.map