@extractus/feed-extractor
Version:
To read and normalize RSS/ATOM/JSON feed data
4 lines (3 loc) • 10.1 kB
JavaScript
var rt=Object.create;var k=Object.defineProperty;var nt=Object.getOwnPropertyDescriptor;var ot=Object.getOwnPropertyNames;var st=Object.getPrototypeOf,it=Object.prototype.hasOwnProperty;var ct=(t,e)=>{for(var r in e)k(t,r,{get:e[r],enumerable:!0})},P=(t,e,r,n)=>{if(e&&typeof e=="object"||typeof e=="function")for(let o of ot(e))!it.call(t,o)&&o!==r&&k(t,o,{get:()=>e[o],enumerable:!(n=nt(e,o))||n.enumerable});return t};var at=(t,e,r)=>(r=t!=null?rt(st(t)):{},P(e||!t||!t.__esModule?k(r,"default",{value:t,enumerable:!0}):r,t)),ut=t=>P(k({},"__esModule",{value:!0}),t);var Rt={};ct(Rt,{extract:()=>tt,extractFromJson:()=>Z,extractFromXml:()=>v,read:()=>kt});module.exports=ut(Rt);var I=(t="")=>{try{let e=new URL(t);return e!==null&&e.protocol.startsWith("http")}catch{return!1}},R=(t="",e="")=>{try{return new URL(e,t).toString()}catch{return""}},lt=["CNDID","__twitter_impression","_hsenc","_openstat","action_object_map","action_ref_map","action_type_map","amp","fb_action_ids","fb_action_types","fb_ref","fb_source","fbclid","ga_campaign","ga_content","ga_medium","ga_place","ga_source","ga_term","gs_l","hmb_campaign","hmb_medium","hmb_source","mbid","mc_cid","mc_eid","mkt_tok","referrer","spJobID","spMailingID","spReportId","spUserID","utm_brand","utm_campaign","utm_cid","utm_content","utm_int","utm_mailing","utm_medium","utm_name","utm_place","utm_pubreferrer","utm_reader","utm_social","utm_source","utm_swu","utm_term","utm_userid","utm_viz_id","wt_mc_o","yclid","WT.mc_id","WT.mc_ev","WT.srch","pk_source","pk_medium","pk_campaign"],O=t=>{try{let e=new URL(t);return lt.forEach(r=>{e.searchParams.delete(r)}),e.toString().replace(e.hash,"")}catch{return null}};var A=at(require("cross-fetch"),1),j=require("fast-xml-parser"),mt=async(t,e={})=>{let{proxy:r={},signal:n=null}=e,{target:o,headers:s={}}=r;return await(0,A.default)(o+encodeURIComponent(t),{headers:s,signal:n})},pt=t=>{try{let e=t.split(`
`)[0].trim().replace("<?","<").replace("?>",">"),n=new j.XMLParser({ignoreAttributes:!1}).parse(e),{xml:o={}}=n;return o["@_encoding"]||"utf8"}catch{return"utf8"}},M=async(t,e={})=>{let{headers:r={"user-agent":"Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0"},proxy:n=null,agent:o=null,signal:s=null}=e,i=n?await mt(t,{proxy:n,signal:s}):await(0,A.default)(t,{headers:r,agent:o,signal:s}),p=i.status;if(p>=400)throw new Error(`Request failed with error code ${p}`);let c=i.headers.get("content-type"),a=await i.arrayBuffer(),g=a?Buffer.from(a).toString().trim():"";if(/(\+|\/)(xml|html)/.test(c)){let d=c.split("charset="),u=d.length===2?d[1].trim():pt(g);return{type:"xml",text:new TextDecoder(u).decode(a).trim(),status:p,contentType:c}}if(/(\+|\/)json/.test(c))try{return{type:"json",json:JSON.parse(g),status:p,contentType:c}}catch{throw new Error("Failed to convert data to JSON object")}throw new Error(`Invalid content type: ${c}`)};var y=require("@ndaidong/bellajs"),T=require("fast-xml-parser"),N=(t={})=>(0,y.hasProperty)(t,"rss")&&(0,y.hasProperty)(t.rss,"channel"),J=(t={})=>(0,y.hasProperty)(t,"feed")&&(0,y.hasProperty)(t.feed,"entry"),X=(t={})=>(0,y.hasProperty)(t,"rdf:RDF")&&(0,y.hasProperty)(t["rdf:RDF"],"channel"),z=t=>!(0,y.isString)(t)||!t.length?!1:T.XMLValidator.validate(t)===!0,W=(t="",e={})=>{let r={attributeNamePrefix:"@_",ignoreAttributes:!1,...e};return new T.XMLParser(r).parse(t)};var B=require("@ndaidong/bellajs");var f=require("@ndaidong/bellajs"),$=require("html-entities");var b=t=>{try{return t?new Date(t).toISOString():""}catch{return""}},F=(t,e=0)=>{let r=(0,f.stripTags)(String(t).trim().replace(/^<!\[CDATA\[|\]\]>$/g,""));return(e>0?(0,f.truncate)(r,e):r).replace(/\n+/g," ")},l=t=>{let e=(0,f.isObject)(t)?t._text||t["#text"]||t._cdata||t.$t:t;return e?(0,$.decode)(String(e).trim()):""},C=(t=[],e="")=>{if((0,f.isObject)(e)&&(0,f.hasProperty)(e,"@_isPermaLink")&&e["@_isPermaLink"]==="true")return l(e);let r=o=>{let s=o.map(i=>C(i));return s.length>0?s[0]:""},n=(0,f.isString)(t)?l(t):(0,f.isObject)(t)&&(0,f.hasProperty)(t,"href")?l(t.href):(0,f.isObject)(t)&&(0,f.hasProperty)(t,"@_href")?l(t["@_href"]):(0,f.isObject)(t)&&(0,f.hasProperty)(t,"@_url")?l(t["@_url"]):(0,f.isObject)(t)&&(0,f.hasProperty)(t,"_attributes")?l(t._attributes.href):(0,f.isArray)(t)?r(t):"";return n||(I(e)?e:"")},_=(t,e="",r)=>{let n=C(t,e),o=O(n);return n?o||R(r,n):""},dt=t=>Math.abs(t.split("").reduce((e,r)=>Math.imul(31,e)+r.charCodeAt(0)|0,0)).toString(36),D=(t,e,r)=>t?l(t):dt(_(e))+"-"+new Date(r).getTime(),ft=t=>{let e=(0,f.hasProperty)(t,"@_url")?t["@_url"]:"",r=(0,f.hasProperty)(t,"@_type")?t["@_type"]:"",n=Number((0,f.hasProperty)(t,"@_length")?t["@_length"]:0);return!e||!r?null:{url:e,type:r,length:n}},V=t=>(0,f.isObject)(t)?{text:l(t),domain:t["@_domain"]}:t,G=(t,e)=>e==="source"?{text:l(t),url:C(t)}:e==="category"?(0,f.isArray)(t)?t.map(V):V(t):e==="enclosure"?ft(t):t;var gt=(t,e)=>{let{useISODateFormat:r,descriptionMaxLen:n,baseUrl:o,getExtraEntryFields:s}=e,{id:i="",title:p="",url:c="",date_published:a="",summary:g="",content_html:d="",content_text:u=""}=t,x=r?b(a):a,h=s(t);return{...{id:D(i,c,a),title:p,link:_(c,"",o),published:x,description:F(u||d||g,n)},...h}},xt=(t,e)=>{let{normalization:r,baseUrl:n,getExtraFeedFields:o}=e;if(!r)return t;let{title:s="",home_page_url:i="",description:p="",language:c="",items:a=[]}=t,g=o(t),d=(0,B.isArray)(a)?a:[a];return{title:s,link:O(i)||R(n,i),description:p,language:c,published:"",generator:"",...g,entries:d.map(u=>gt(u,e))}},q=(t,e={})=>xt(t,e);var S=require("@ndaidong/bellajs");var _t=(t,e)=>{let{useISODateFormat:r,descriptionMaxLen:n,baseUrl:o,getExtraEntryFields:s}=e,{guid:i="",title:p="",link:c="",pubDate:a="",description:g="","content:encoded":d=""}=t,u=r?b(a):a,x=l(g||d),h={id:D(i,c,a),title:l(p),link:_(c,i,o),published:u,description:F(x,n)},m=s(t);return{...h,...m}},ht=(t,e)=>{let{title:r="",link:n="",item:o}=t,i=((0,S.isArray)(o)?o:[o]).map(c=>{let{id:a,title:g="",link:d=""}=c,u={...c,title:l(g),link:_(d,a,e)};return"guid description source".split(" ").forEach(m=>{(0,S.hasProperty)(c,m)&&(u[m]=l(c[m]))}),"source category enclosure author image".split(" ").forEach(m=>{(0,S.hasProperty)(u,m)&&(c[m]=G(u[m],m))}),u});return{...t,title:l(r),link:_(n,e),item:(0,S.isArray)(o)?i:i[0]}},bt=(t,e={})=>{let{normalization:r,baseUrl:n,getExtraFeedFields:o}=e,s=t.rss.channel;if(!r)return ht(s,n);let{title:i="",link:p="",description:c="",generator:a="",language:g="",lastBuildDate:d="",item:u=[]}=s,x=o(s),h=(0,S.isArray)(u)?u:[u],m=e.useISODateFormat?b(d):d;return{title:l(i),link:_(p,"",n),description:c,language:g,generator:a,published:m,...x,entries:h.map(E=>_t(E,e))}},K=(t,e={})=>bt(t,e);var w=require("@ndaidong/bellajs");var yt=(t,e)=>{let{useISODateFormat:r,descriptionMaxLen:n,baseUrl:o,getExtraEntryFields:s}=e,{id:i="",title:p="",issued:c="",modified:a="",updated:g="",published:d="",link:u="",summary:x="",content:h=""}=t,m=g||a||d||c,E=l(x||h),L={id:D(i,u,m),title:l(p),link:_(u,i,o),published:r?b(m):m,description:F(E,n)},et=s(t);return{...L,...et}},Ft=(t,e)=>{let{id:r,title:n="",link:o="",entry:s}=t,p=((0,w.isArray)(s)?s:[s]).map(a=>{let{id:g,title:d="",link:u="",summary:x="",content:h=""}=a,m={...a,title:l(d),link:_(u,g,e)};return(0,w.hasProperty)(m,"summary")&&(m.summary=l(x)),(0,w.hasProperty)(m,"content")&&(m.content=l(h)),m});return{...t,title:l(n),link:_(o,r,e),entry:(0,w.isArray)(s)?p:p[0]}},Dt=(t,e={})=>{let{normalization:r,baseUrl:n,getExtraFeedFields:o}=e,s=t.feed;if(!r)return Ft(s,n);let{id:i="",title:p="",link:c="",subtitle:a="",generator:g="",language:d="",updated:u="",entry:x=[]}=s,h=o(s),m=(0,w.isArray)(x)?x:[x],E=e.useISODateFormat?b(u):u;return{title:l(p),link:_(c,i,n),description:a,language:d,generator:g,published:E,...h,entries:m.map(L=>yt(L,e))}},H=(t,e={})=>Dt(t,e);var U=require("@ndaidong/bellajs");var Et=(t,e)=>{let{useISODateFormat:r,descriptionMaxLen:n,baseUrl:o,getExtraEntryFields:s}=e,{guid:i="",title:p="",link:c="","dc:date":a="",description:g="","content:encoded":d=""}=t,u=r?b(a):a,x=l(g||d),h={id:D(i,c,a),title:l(p),link:_(c,i,o),published:u,description:F(x,n)},m=s(t);return{...h,...m}},St=(t,e)=>{let{title:r="",link:n="",item:o}=t,i=((0,U.isArray)(o)?o:[o]).map(c=>{let{id:a,title:g="",link:d=""}=c;return{...c,title:l(g),link:_(d,a,e)}});return{...t,title:l(r),link:_(n,e),item:(0,U.isArray)(o)?i:i[0]}},wt=(t,e={})=>{let{normalization:r,baseUrl:n,getExtraFeedFields:o}=e,s=t["rdf:RDF"];if(!r)return St(s,n);let{title:i="",link:p="",description:c="",generator:a="","dc:language":g="","dc:date":d=""}=s.channel,{item:u}=s,x=o(s),h=(0,U.isArray)(u)?u:[u],m=e.useISODateFormat?b(d):d;return{title:l(i),link:_(p,"",n),description:c,language:g,generator:a,published:m,...x,entries:h.map(E=>Et(E,e))}},Q=(t,e={})=>wt(t,e);var Y=(t={})=>{let{normalization:e=!0,descriptionMaxLen:r=250,useISODateFormat:n=!0,xmlParserOptions:o={},baseUrl:s="",getExtraFeedFields:i=()=>({}),getExtraEntryFields:p=()=>({})}=t;return{normalization:e,descriptionMaxLen:r,useISODateFormat:n,xmlParserOptions:o,baseUrl:s,getExtraFeedFields:i,getExtraEntryFields:p}},Z=(t,e={})=>q(t,Y(e)),v=(t,e={})=>{if(!z(t))throw new Error("The XML document is not well-formed");let r=Y(e),n=W(t,r.xmlParserOptions);return N(n)?K(n,r):J(n)?H(n,r):X(n)?Q(n,r):null},tt=async(t,e={},r={})=>{if(!I(t))throw new Error("Input param must be a valid URL");let n=await M(t,r);if(!n.text&&!n.json)throw new Error(`Failed to load content from "${t}"`);let{type:o,json:s,text:i}=n;return o==="json"?Z(s,e):v(i,e)},kt=async(t,e,r)=>(console.warn("WARNING: read() is deprecated. Please use extract() instead!"),tt(t,e,r));function It(){var r,n;if(((r=process.env.FEED_EXTRACTOR_CJS_IGNORE_WARNING)==null?void 0:r.toLowerCase())==="true")return;let t=o=>`\x1B[33m${o}\x1B[39m`;(((n=process.env.FEED_EXTRACTOR_CJS_TRACE_WARNING)==null?void 0:n.toLowerCase())==="true"?console.trace:console.warn)(t("The CJS build of @extractus/feed-extractor is deprecated. See https://github.com/extractus/feed-extractor#cjs-deprecated for details."))}It();0&&(module.exports={extract,extractFromJson,extractFromXml,read});
//# sourceMappingURL=bundle.cjs.map