webhead
Version:
An easy-to-use Node web crawler storing cookies, following redirects, traversing pages and submitting forms.
3 lines (2 loc) • 3.36 kB
JavaScript
import e from"cheerio";import t from"form-data";import o from"fs-extra";import r from"node-fetch";import s from"jquery-param";import a from"tough-cookie";function n(){return(n=Object.assign||function(e){for(var t=1;t<arguments.length;t++){var o=arguments[t];for(var r in o)Object.prototype.hasOwnProperty.call(o,r)&&(e[r]=o[r])}return e}).apply(this,arguments)}const{CookieJar:i}=a;export default a=>{const{jarFile:c,userAgent:d,verbose:l,beforeSend:p,complete:h}=a||{};let u,m,f,y={},C={},g=async(e,t,o)=>{let r={method:e.toUpperCase(),url:w(t),options:S(o)};p&&(r=p(r,C),r.method=r.method.toUpperCase(),r.url=w(r.url),r.options=S(r.options));const{response:s,redirect:a}=await j(r);return a?g(a.method,a.url,a.options):(y.url=r.url,y.cookie=v(y.url.href),y.response=s,h&&h(r,C,y),s)},w=e=>(e.constructor==URL&&(e=e.href),new URL(e,y.url)),S=e=>(e||(e={}),e.headers=k(e.headers),e),k=e=>e?Object.entries(e).reduce((e,[t,o])=>("set-cookie"!=t.toLowerCase()&&Array.isArray(o)&&(o=o.join("; ")),e[t.replace(/\b./g,e=>e.toUpperCase())]=o,e),{}):{},j=async({method:e,url:a,options:i})=>{let{headers:c,data:p,multiPartData:h,json:u}=i;const m=v(a.href),f={method:e,headers:Object.assign({},c),redirect:"manual"};if(f.headers.Host=a.host,a=a.href,m.length&&(f.headers.Cookie=m),!f.headers["User-Agent"]&&d&&(f.headers["User-Agent"]=d),p)"GET"==e?a+=(a.match(/\?/)?"&":"?")+s(p):(f.headers["Content-Type"]||(f.headers["Content-Type"]="application/x-www-form-urlencoded"),f.body=s(p));else if(h){const e=new t;(h||[]).forEach(t=>{e.append(t.name,t.file?o.createReadStream(t.file):t.hasOwnProperty("value")?t.value:t.contents)}),f.body=e,f.headers=n({},f.headers,e.getHeaders())}u&&(f.headers["Content-Type"]="application/json",f.body=JSON.stringify(u)),l&&console.log(e,a,f);let y=await r(a,n({method:e},f));return await b(e,a,i,y)},b=async(e,t,r,s)=>{const a=s.status,n=await s.text(),i=k(s.headers.raw());if(l&&console.log({statusCode:a,data:n,headers:i}),i["Set-Cookie"]){const e=O(t);if(i["Set-Cookie"].forEach(t=>{u.setCookieSync(t,e)}),c){const e=u.toJSON().cookies;let t={};o.pathExistsSync(c)&&(t=o.readJsonSync(c)),t.constructor==Object?t.cookies=e:t=e,o.writeFileSync(c,JSON.stringify(t,null,2))}}let d;return/^3/.test(""+a)&&(d={method:e,url:i.Location,options:r},a<=303&&(d.method="GET",delete d.options.data)),m=void 0,f=void 0,{response:{statusCode:a,data:n,headers:i},redirect:d}},O=e=>e.replace(/\?.*/,""),v=e=>u.getCookiesSync(O(e)).join("; ");if("get post put patch delete head options".split(" ").forEach(e=>{y[e]=async(...t)=>await g(e,...t)}),y.text=()=>y.response?y.response.data:"",y.json=()=>{if(!f&&y.response){const{data:e,headers:t}=y.response;e&&(""+t["Content-Type"]).match("json")&&(f=JSON.parse(e))}return f},y.$=(...t)=>{if(!m&&y.response){const{data:t,headers:o}=y.response,r=(""+o["Content-Type"]).match(/(html|xml)/);r&&(m=e.load(t,{xmlMode:"xml"==r[1]}))}return m?m(...t):[]},y.submit=async(e,t,o)=>{const r=y.$(e);if(r.length){const e=r.attr("action"),s=r.attr("method")||"GET";return t=Object.assign(r.serializeArray().reduce((e,{name:t,value:o})=>(e[t]=o,e),{}),t||{}),await g(s,e,n({},o,{data:t}))}},y.clearCookies=()=>{u.removeAllCookiesSync()},o.pathExistsSync(c)){const e=o.readJsonSync(c),t=e.cookies||e;u=i.fromJSON({cookies:t.constructor==Array?t:[]})}else u=new i;return y};
//# sourceMappingURL=webhead.modern.js.map