webhead
Version:
An easy-to-use Node web crawler storing cookies, following redirects, traversing pages and submitting forms.
3 lines (2 loc) • 4.71 kB
JavaScript
!function(e,r){"object"==typeof exports&&"undefined"!=typeof module?module.exports=r(require("cheerio"),require("form-data"),require("fs-extra"),require("node-fetch"),require("jquery-param"),require("tough-cookie")):"function"==typeof define&&define.amd?define(["cheerio","form-data","fs-extra","node-fetch","jquery-param","tough-cookie"],r):(e=e||self).webhead=r(e.cheerio,e.formData,e.fsExtra,e.nodeFetch,e.jqueryParam,e.toughCookie)}(this,function(e,r,t,o,n,a){function s(){return(s=Object.assign||function(e){for(var r=1;r<arguments.length;r++){var t=arguments[r];for(var o in t)Object.prototype.hasOwnProperty.call(t,o)&&(e[o]=t[o])}return e}).apply(this,arguments)}e=e&&Object.prototype.hasOwnProperty.call(e,"default")?e.default:e,r=r&&Object.prototype.hasOwnProperty.call(r,"default")?r.default:r,t=t&&Object.prototype.hasOwnProperty.call(t,"default")?t.default:t,o=o&&Object.prototype.hasOwnProperty.call(o,"default")?o.default:o,n=n&&Object.prototype.hasOwnProperty.call(n,"default")?n.default:n;var i=(a=a&&Object.prototype.hasOwnProperty.call(a,"default")?a.default:a).CookieJar;return function(a){var c,u,l,d=a||{},f=d.jarFile,p=d.userAgent,h=d.verbose,y=d.beforeSend,m=d.complete,v={},j={},O=function e(r,t,o){try{var n={method:r.toUpperCase(),url:P(t),options:b(o)};return y&&((n=y(n,j)).method=n.method.toUpperCase(),n.url=P(n.url),n.options=b(n.options)),Promise.resolve(C(n)).then(function(r){var t=r.response,o=r.redirect;return o?e(o.method,o.url,o.options):(v.url=n.url,v.cookie=S(v.url.href),v.response=t,m&&m(n,j,v),t)})}catch(e){return Promise.reject(e)}},P=function(e){return e.constructor==URL&&(e=e.href),new URL(e,v.url)},b=function(e){return e||(e={}),e.headers=g(e.headers),e},g=function(e){return e?Object.entries(e).reduce(function(e,r){var t=r[0],o=r[1];return"set-cookie"!=t.toLowerCase()&&Array.isArray(o)&&(o=o.join("; ")),e[t.replace(/\b./g,function(e){return e.toUpperCase()})]=o,e},{}):{}},C=function(e){var a=e.method,i=e.url,c=e.options;try{var u=c.headers,l=c.data,d=c.multiPartData,f=c.json,y=S(i.href),m={method:a,headers:Object.assign({},u),redirect:"manual"};if(m.headers.Host=i.host,i=i.href,y.length&&(m.headers.Cookie=y),!m.headers["User-Agent"]&&p&&(m.headers["User-Agent"]=p),l)"GET"==a?i+=(i.match(/\?/)?"&":"?")+n(l):(m.headers["Content-Type"]||(m.headers["Content-Type"]="application/x-www-form-urlencoded"),m.body=n(l));else if(d){var v=new r;(d||[]).forEach(function(e){v.append(e.name,e.file?t.createReadStream(e.file):e.hasOwnProperty("value")?e.value:e.contents)}),m.body=v,m.headers=s({},m.headers,v.getHeaders())}return f&&(m.headers["Content-Type"]="application/json",m.body=JSON.stringify(f)),h&&console.log(a,i,m),Promise.resolve(o(i,s({method:a},m))).then(function(e){return Promise.resolve(w(a,i,c,e))})}catch(e){return Promise.reject(e)}},w=function(e,r,o,n){try{var a=n.status;return Promise.resolve(n.text()).then(function(s){var i,d=g(n.headers.raw());if(h&&console.log({statusCode:a,data:s,headers:d}),d["Set-Cookie"]){var p=k(r);if(d["Set-Cookie"].forEach(function(e){c.setCookieSync(e,p)}),f){var y=c.toJSON().cookies,m={};t.pathExistsSync(f)&&(m=t.readJsonSync(f)),m.constructor==Object?m.cookies=y:m=y,t.writeFileSync(f,JSON.stringify(m,null,2))}}return/^3/.test(""+a)&&(i={method:e,url:d.Location,options:o},a<=303&&(i.method="GET",delete i.options.data)),u=void 0,l=void 0,{response:{statusCode:a,data:s,headers:d},redirect:i}})}catch(e){return Promise.reject(e)}},k=function(e){return e.replace(/\?.*/,"")},S=function(e){return c.getCookiesSync(k(e)).join("; ")};if("get post put patch delete head options".split(" ").forEach(function(e){v[e]=function(){try{var r=arguments;return Promise.resolve(O.apply(void 0,[e].concat([].slice.call(r))))}catch(e){return Promise.reject(e)}}}),v.text=function(){return v.response?v.response.data:""},v.json=function(){if(!l&&v.response){var e=v.response,r=e.data;r&&(""+e.headers["Content-Type"]).match("json")&&(l=JSON.parse(r))}return l},v.$=function(){if(!u&&v.response){var r=v.response,t=r.data,o=r.headers,n=(""+o["Content-Type"]).match(/(html|xml)/);n&&(u=e.load(t,{xmlMode:"xml"==n[1]}))}return u?u.apply(void 0,[].slice.call(arguments)):[]},v.submit=function(e,r,t){try{var o=v.$(e);return Promise.resolve(function(){if(o.length){var e=o.attr("action"),n=o.attr("method")||"GET";return r=Object.assign(o.serializeArray().reduce(function(e,r){return e[r.name]=r.value,e},{}),r||{}),Promise.resolve(O(n,e,s({},t,{data:r})))}}())}catch(e){return Promise.reject(e)}},v.clearCookies=function(){c.removeAllCookiesSync()},t.pathExistsSync(f)){var x=t.readJsonSync(f),q=x.cookies||x;c=i.fromJSON({cookies:q.constructor==Array?q:[]})}else c=new i;return v}});
//# sourceMappingURL=webhead.umd.js.map