UNPKG

microformats-parser

Version:

A JavaScript microformats v2 parser for the browser and node.js

3 lines (2 loc) 16.4 kB
"use strict";var e=require("parse5");const t=(e,t)=>e.attrs.find((e=>e.name===t)),r=(e,r)=>{var i;const n=null===(i=t(e,r))||void 0===i?void 0:i.value;return(null==n?void 0:n.length)?n:void 0},i=(e,t)=>{var i;const n=(null===(i=r(e,"class"))||void 0===i?void 0:i.split(" "))||[];return t?n.filter((e=>"string"==typeof t?e.startsWith(t):e.match(t))):n},n=(e,t)=>i(e).filter((e=>t.includes(e))),o=(e,t)=>i(e).some((e=>t.includes(e))),a=(e,t,i)=>t.includes(e.tagName)?r(e,i):void 0,l=(e,t)=>{var i;return Boolean(null===(i=r(e,"rel"))||void 0===i?void 0:i.split(" ").some((e=>t.includes(e))))},s=(e,t)=>[...e,...t],u={adr:{type:["h-adr"],properties:{"country-name":"p-country-name",locality:"p-locality",region:"p-region","street-address":"p-street-address","postal-code":"p-postal-code","extended-address":"p-extended-address"}},geo:{type:["h-geo"],properties:{latitude:"p-latitude",longitude:"p-longitude"}},hentry:{type:["h-entry"],properties:{author:"p-author","entry-content":"e-content","entry-summary":"p-summary","entry-title":"p-name",updated:"dt-updated"},rels:{bookmark:"u-url",tag:"p-category"}},hfeed:{type:["h-feed"],properties:{author:"p-author",photo:"u-photo",url:"u-url"},rels:{tag:"p-category"}},hnews:{type:["h-news"],properties:{entry:"p-entry","source-org":"p-source-org",dateline:"p-dateline",geo:"p-geo"},rels:{principles:"u-principles"}},hproduct:{type:["h-product"],properties:{price:"p-price",description:"p-description",fn:"p-name",review:"p-review",brand:"p-brand",url:"u-url",photo:"u-photo"},rels:{tag:"p-category"}},hreview:{type:["h-review"],properties:{item:"p-item",rating:"p-rating",reviewer:"p-author",summary:"p-name",url:"u-url",description:"e-content"},rels:{bookmark:"u-url",tag:"p-category"}},vcard:{type:["h-card"],properties:{fn:"p-name",url:"u-url",org:"p-org",adr:"p-adr",tel:"p-tel",title:"p-job-title",email:"u-email",photo:"u-photo",agent:"p-agent","family-name":"p-family-name","given-name":"p-given-name","additional-name":"p-additional-name","honorific-prefix":"p-honorific-prefix","honorific-suffix":"p-honorific-suffix",key:"p-key",label:"p-label",logo:"u-logo",mailer:"p-mailer",nickname:"p-nickname",note:"p-note",sound:"u-sound",geo:"p-geo",bday:"dt-bday",class:"p-class",rev:"p-rev",role:"p-role","sort-string":"p-sort-string",tz:"p-tz",uid:"u-uid"},rels:{tag:"p-category"}},hresume:{type:["h-resume"],properties:{contact:"p-contact",experience:"p-experience",summary:"p-summary",skill:"p-skill",education:"p-education",affiliation:"p-affiliation"}},vevent:{type:["h-event"],properties:{summary:"p-name",dtstart:"dt-start",dtend:"dt-end",duration:"dt-duration",description:"p-description",attendee:"p-attendee",location:"p-location",url:"u-url"}},item:{type:["h-item"],properties:{fn:"p-name",photo:"u-photo",url:"u-url"}},"hreview-aggregate":{type:["h-review-aggregate"],properties:{rating:"p-rating",average:"p-average",best:"p-best",count:"p-count",item:"p-item",url:"u-url",fn:"p-name"}}},d=Object.keys(u),p=e=>n(e,d),c=e=>new RegExp(`^${e}-([a-z0-9]+-)?([a-z]+-)*[a-z]+$`),m=c("h"),h=c("(p|e|u|dt)"),g=e=>"tagName"in e&&"childNodes"in e,f=e=>t=>g(t)&&t.tagName===e,v=e=>"value"in e,y=e=>i(e).some((e=>e.match(m))),N=e=>y(e)||(e=>o(e,d))(e),b=(e,t)=>((e,t)=>t.some((t=>{const{properties:r,rels:i}=u[t];return o(e,Object.keys(r))||i&&l(e,Object.keys(i))})))(e,t),w=e=>i(e,h).length>0,$=(e,t)=>!w(e)&&!b(e,t)&&N(e),x=e=>Boolean(g(e)&&"base"===e.tagName&&t(e,"href")),k=e=>g(e)&&o(e,["value","value-title"]),j=e=>Boolean(g(e)&&e.attrs.some((e=>"rel"===e.name))&&e.attrs.some((e=>"href"===e.name))),E=e=>e.childNodes.filter(Boolean).filter(g),T=(e,t,r)=>{const{matcher:i,roots:n}=r,o=i(t,n)&&t;if(o&&N(t))return[...e,t];if(N(t))return e;const a=E(t).reduce(((e,t)=>T(e,t,r)),o?[o]:[]);return[...e,...a]},U=(e,t)=>{const r={roots:g(e)?p(e):[],matcher:t};return E(e).reduce(((e,t)=>T(e,t,r)),[])},M=(e,t)=>e&&e.experimental&&e.experimental[t]||!1,C=e=>{var t,i,n;return null!==(i=null===(t=r(e,"alt"))||void 0===t?void 0:t.trim())&&void 0!==i?i:null===(n=r(e,"src"))||void 0===n?void 0:n.trim()},R=(e,t)=>{if(g(t)){if(["style","script"].includes(t.tagName))return e;if("img"===t.tagName){const r=C(t);if(r)return`${e} ${r} `}return t.childNodes.reduce(R,e)}return v(t)?`${e}${t.value}`:e},O=(e,t)=>g(t)?["style","script"].includes(t.tagName)?e:"img"===t.tagName?`${e}${r(t,"alt")||""}`:t.childNodes.reduce(O,e):v(t)?`${e}${t.value}`:e,z=(e,t)=>{if(g(t)){if(["style","script"].includes(t.tagName))return e;if("img"===t.tagName){const r=C(t);if(r)return`${e} ${r} `}return"br"===t.tagName?`${e}\n`:"p"===t.tagName?t.childNodes.reduce(z,`${e}\n`):t.childNodes.reduce(z,e)}if(v(t)){const r=t.value.replace(/[\t\n\r]/g," ");if(r)return`${e}${r}`}return e},B=e=>e.childNodes.reduce(z,"").replace(/ +/g," ").replace(/ ?\n ?/g,"\n").trim(),W=(e,t)=>M(t,"textContent")?B(e):e.childNodes.reduce(R,"").trim(),_=(e,t)=>M(t,"textContent")?B(e):e.childNodes.reduce(O,"").trim(),L=e=>{var t;return null!==(t=a(e,["img","area"],"alt"))&&void 0!==t?t:a(e,["abbr"],"title")},A=e=>{const t=e.childNodes.filter(g);return t.length?L(t[0]):void 0},K=(e,t,r)=>{var n,o,a;if(!t.some((e=>i(e,/^(p|e|h)-/).length)))return null!==(a=null!==(o=null!==(n=L(e))&&void 0!==n?n:A(e))&&void 0!==o?o:(e=>{const t=e.childNodes.filter(g);return 1===t.length?A(t[0]):void 0})(e))&&void 0!==a?a:_(e,r)},S=e=>a(e,["a","area"],"href"),q=e=>{const t=e.childNodes.filter(g),r=t.filter((e=>"a"===e.tagName)),i=t.filter((e=>"area"===e.tagName));for(const e of[r,i])if(1===e.length&&!y(e[0]))return S(e[0])},H=(e,t)=>{var r,n;if(!t.some((e=>i(e,"u-").length)))return null!==(n=null!==(r=S(e))&&void 0!==r?r:q(e))&&void 0!==n?n:(e=>{const t=e.childNodes.filter(g);return 1===t.length?q(t[0]):void 0})(e)},P=(e,{inherited:t}={})=>{if("img"!==e.tagName)return;const i=(!t||!t.roots||!t.roots.length)&&r(e,"alt"),n=r(e,"src");return i?{alt:i,value:n}:n},I=e=>"img"===e.tagName?P(e):"object"===e.tagName?r(e,"data"):void 0,D=e=>{const t=e.childNodes.filter(g),r=t.filter((e=>"img"===e.tagName)),i=t.filter((e=>"object"===e.tagName));for(const e of[r,i])if(1===e.length&&!y(e[0]))return I(e[0])},F=(e,t)=>{var r,n;if(!t.some((e=>i(e,"u-").length)))return null!==(n=null!==(r=I(e))&&void 0!==r?r:D(e))&&void 0!==n?n:(e=>{const t=e.childNodes.filter(g);return 1===t.length?D(t[0]):void 0})(e)},G=e=>{if(((e,t)=>i(e).some((e=>"value-title"===e)))(e))return r(e,"title")},J=(e,t)=>{const i=U(e,k);if(i.length){if(t.datetime){const e=i.map((e=>{var i,n;return null!==(n=null!==(i=(e=>r(e,"datetime"))(e))&&void 0!==i?i:G(e))&&void 0!==n?n:W(e,t)}));return e.sort((e=>e.match(/^[0-9]{4}/)?-1:1)).join(" ").trim().replace(/((\+|-)[0-2][0-9]):([0-5][0-9])$/,(e=>e.replace(":",""))).replace(/([0-2]?[0-9])(:[0-5][0-9])?(:[0-5][0-9])?(a\.?m\.?|p\.?m\.?)/i,((e,t,r,i,n)=>`${/a/i.test(n)?t.padStart(2,"0"):`${parseInt(t,10)+12}`}${r||":00"}${i||""}`)).toUpperCase()}return i.map((e=>{var r;return null!==(r=G(e))&&void 0!==r?r:W(e,t)})).join("").trim()}},Q=e=>!e.includes("://")&&!e.startsWith("#"),V=(e,t)=>new URL(e,t).toString(),X=/^(p|u|e|dt)-/,Y=(e,t)=>{var r,i,n,o,l;return null!==(l=null!==(o=null!==(n=null!==(i=null!==(r=J(e,{...t,datetime:!0}))&&void 0!==r?r:a(e,["time","ins","del"],"datetime"))&&void 0!==i?i:a(e,["abbr"],"title"))&&void 0!==n?n:a(e,["data","input"],"value"))&&void 0!==o?o:a(e,["meta"],"content"))&&void 0!==l?l:W(e,t)},Z=(t,i)=>{const n={value:W(t,i),html:e.serialize(t).trim()},o=M(i,"lang")&&(r(t,"lang")||i.inherited.lang);return o?{...n,lang:o}:n},ee=(e,t,r)=>"p"===t?((e,t)=>{var r,i,n,o,l;return null!==(l=null!==(o=null!==(n=null!==(i=null!==(r=J(e,t))&&void 0!==r?r:a(e,["abbr","link"],"title"))&&void 0!==i?i:a(e,["data"],"value"))&&void 0!==n?n:a(e,["img","area"],"alt"))&&void 0!==o?o:a(e,["meta"],"content"))&&void 0!==l?l:_(e,t)})(e,r):"e"===t?Z(e,r):"u"===t?((e,t)=>{var r,i,n,o,l,s,u,d,p;const c=null!==(p=null!==(d=null!==(u=null!==(s=null!==(l=null!==(o=null!==(n=null!==(i=null!==(r=a(e,["a","area","link"],"href"))&&void 0!==r?r:P(e,t))&&void 0!==i?i:a(e,["audio","source","iframe","video"],"src"))&&void 0!==n?n:a(e,["video"],"poster"))&&void 0!==o?o:a(e,["object"],"data"))&&void 0!==l?l:J(e,t))&&void 0!==s?s:a(e,["abbr"],"title"))&&void 0!==u?u:a(e,["data","input"],"value"))&&void 0!==d?d:a(e,["meta"],"content"))&&void 0!==p?p:W(e,t);return"string"==typeof c&&Q(c)?V(c,t.baseUrl):"string"==typeof c?c.trim():c})(e,r):Y(e,r),te=(e,t)=>((e,{inherited:t})=>t.roots.length?((e,t)=>[...new Set(t.map((t=>{const{properties:i,rels:o}=u[t],a=n(e,Object.keys(i)).map((e=>i[e])),l=o&&((e,t)=>{var i;return(null===(i=r(e,"rel"))||void 0===i?void 0:i.split(" ").filter((e=>t.includes(e))))||[]})(e,Object.keys(o)).map((e=>o[e]))||[];return[...a,...l]})).reduce(s))])(e,t.roots):i(e,/^(p|u|e|dt)-/))(e,t).map((r=>{const i=(e=>(e.startsWith("p-")?"p":e.startsWith("u-")&&"u")||e.startsWith("e-")&&"e"||"dt")(r),n=r.replace(X,"");return{type:i,key:n,value:["u","p","e","dt"].includes(i)&&N(e)?le(e,{...t,valueType:i,valueKey:n}):ee(e,i,t)}})).filter((e=>Boolean(e))),re=(e,t,r)=>{var i;if("dt"===e.type&&"end"===e.key&&"string"==typeof e.value&&!e.value.match(/^[0-9]{4}-[0-9]{2}-[0-9]{2}/)&&e.value.match(/^[0-9]{2}:[0-9]{2}/)){const t=null===(i=r.find((t=>"dt"===t.type&&"start"===t.key&&"string"==typeof e.value)))||void 0===i?void 0:i.value;if(t){const r=t.match(/^[0-9]{4}-[0-9]{2}-[0-9]{2}/);return{...e,value:`${r} ${e.value}`}}}return e},ie=(e,{key:t,value:r})=>{void 0!==r&&(e[t]||Array.isArray(e[t])?e[t].push(r):e[t]=[r])},ne=(e,t)=>{const r={},i=((e,t)=>t.inherited.roots.length?U(e,b):U(e,w))(e,t);return i.map((e=>te(e,t))).reduce(s,[]).map(re).forEach((e=>ie(r,e))),t.implyProperties&&!t.inherited.roots.length&&(void 0===r.name&&ie(r,{key:"name",value:K(e,i,t)}),void 0===r.url&&ie(r,{key:"url",value:H(e,i)}),void 0===r.photo&&ie(r,{key:"photo",value:F(e,i)})),r},oe=(e,t)=>{const n=(e=>{const t=r(e,"itemref");if(t)return t.split(" ");if(i(e).includes("include")){const t="object"===e.tagName?"data":"href",i=r(e,t);if(i&&i.startsWith("#"))return[i.substring(1)]}const n="td"===e.tagName&&r(e,"headers");return n?[n]:[]})(e);n.forEach((r=>{const i=t.idRefs[r];i&&e.childNodes.push(i)})),e.childNodes.forEach((e=>g(e)&&!N(e)&&oe(e,t)))},ae=e=>{const t=i(e,"h-");return t.length?t:(e=>{const t=p(e).map((e=>u[e].type)).reduce(s);return t.length>1?t.filter((e=>"h-item"!==e)):t})(e)},le=(e,t)=>{var i,n,o;((e,t)=>{y(e)||oe(e,t)})(e,t);const a=(e=>y(e)?[]:p(e))(e),l=(e=>y(e)?r(e,"id"):void 0)(e),s=r(e,"lang")||t.inherited.lang,u=U(e,$),d={lang:s,roots:a},c={type:ae(e).sort(),properties:ne(e,{...t,implyProperties:!U(e,N).length,inherited:d})};if(l&&(c.id=l),M(t,"lang")&&s&&(c.lang=s),u.length&&(c.children=u.map((e=>le(e,{...t,inherited:d})))),"p"===t.valueType&&(c.value=null!==(n=null!==(i=c.properties.name&&c.properties.name[0])&&void 0!==i?i:r(e,"title"))&&void 0!==n?n:W(e,t)),"u"===t.valueType&&(c.value=null!==(o=c.properties.url&&c.properties.url[0])&&void 0!==o?o:W(e,t)),"dt"===t.valueType&&(c.value=Y(e,t)),"e"===t.valueType)return{...Z(e,t),...c};if(t.valueKey&&!c.value){const e=c.properties[t.valueKey]&&c.properties[t.valueKey][0];e&&(c.value="string"==typeof e?e:e.value)}return c},se=(e,t)=>{if(void 0===e)throw new TypeError(`Microformats parser: ${t} not provided`);if("string"!=typeof e)throw new TypeError(`Microformats parser: ${t} is not a string`);if(""===e)throw new TypeError(`Microformats parser: ${t} cannot be empty`);return e},ue=(e,t)=>{if("boolean"!=typeof e)throw new TypeError(`Microformats parser: ${t} is not a boolean`);return e},de=(e,t,r)=>{if(void 0===e)throw new TypeError(`Microformats parser: ${r} is not provided`);if("object"!=typeof e)throw new TypeError(`Microformats parser: ${r} is not an object`);if(Array.isArray(e))throw new TypeError(`Microformats parser: ${r} is not an object`);if(null===e)throw new TypeError(`Microformats parser: ${r} cannot be null`);const i=Object.keys(e).filter((e=>!t.includes(e)));if(i.length)throw new TypeError(`Microformats parser: ${r} contains unknown properties: ${i.join(", ")}`);return e},pe=(e,{rels:t,relUrls:i},n)=>{var o;const a=((e,t)=>M(t,"textContent")?B(e):e.childNodes.reduce(O,""))(e,n),l=r(e,"rel"),s=null===(o=r(e,"href"))||void 0===o?void 0:o.trim(),u=r(e,"title"),d=r(e,"media"),p=r(e,"hreflang"),c=r(e,"type");l&&s&&l.split(" ").forEach((e=>{t[e]||(t[e]=[]),t[e].includes(s)||t[e].push(s),i[s]?i[s].rels.includes(e)||(i[s].rels.push(e),i[s].rels.sort()):i[s]={rels:[e],text:a},a&&!i[s].text&&(i[s].text=a),u&&!i[s].title&&(i[s].title=u),d&&!i[s].media&&(i[s].media=d),p&&!i[s].hreflang&&(i[s].hreflang=p),c&&!i[s].type&&(i[s].type=c)}))},ce=e=>{for(const t of e.childNodes){if(!g(t))continue;if(x(t))return r(t,"href");const e=ce(t);if(e)return e}},me=(e,i,n)=>{for(const o in e.childNodes){const a=e.childNodes[o];if(!g(a))continue;"template"===a.tagName&&delete e.childNodes[o],i.lang||("html"===a.tagName&&(i.lang=r(a,"lang")),"meta"===a.tagName&&"Content-Language"===r(a,"http-equiv")&&(i.lang=r(a,"content"))),("object"===a.tagName?["data"]:["href","src"]).forEach((e=>{const r=t(a,e);r&&Q(r.value)?r.value=V(r.value,i.baseUrl):r&&(r.value=r.value.trim())}));const l=r(a,"id");l&&!i.idRefs[l]&&(i.idRefs[l]=a),j(a)&&pe(a,i,n),me(a,i,n)}},he="<title>",ge="<canonical>",fe=["image","video","audio"],ve=(e,t)=>{const i=e.childNodes.find(f("html"));return((e,t)=>{var r;const i={properties:{}};M(t,"lang")&&t.inherited.lang&&(i.lang=t.inherited.lang);const n=(e,t=[])=>{const r=t.filter(Boolean);r.length&&(i.properties[e]=r)};let o="h-entry";const[a]=null!==(r=e.get(["og:type"]))&&void 0!==r?r:[];return a&&"string"==typeof a&&("profile"===a?o="h-card":["music","video"].some((e=>a.includes(e)))&&(o="h-cite")),i.type=[o],n("name",e.get(["og:title","twitter:title",he])),n("summary",e.get(["og:description","twitter:description","description"])),n("featured",e.get(["og:image","twitter:image"])),n("video",e.get(["og:video","twitter:video"])),n("audio",e.get(["og:audio","twitter:audio"])),n("published",e.get(["article:published_time","date"])),n("updated",e.get(["article:modified_time"])),n("author",e.get(["article:author","author"])),n("url",e.get(["og:url",ge])),n("publication",e.get(["og:site_name","publisher"])),"h-card"===o&&(n("given-name",e.get(["profile:first_name"])),n("family-name",e.get(["profile:last_name"]))),0===Object.keys(i.properties).length?[]:[i]})((e=>{const t=(()=>{const e={};return{metaContent:e,set:(t,r)=>{const[i,n,o]=t.split(":");if(("og"===i||"twitter"===i)&&fe.includes(n)){if("alt"===o){const t=e[`${i}:${n}`];if(null==t?void 0:t.length){const e=t.pop();"string"==typeof e?t.push({value:e,alt:r}):e&&t.push(e)}return}["url","secure_url"].includes(o)&&(t=`${i}:${n}`)}const a=e[t];a?a.map((e=>"string"==typeof e?e:e.value)).some((e=>r===e))||e[t].push(r):e[t]=[r]},get:t=>{for(const r of t)if(e[r])return e[r]}}})();for(const i in e.childNodes){const n=e.childNodes[i];if(!g(n))continue;const o=a(n,["meta"],"content");if(o){const e=r(n,"property");e&&t.set(e,o);const i=r(n,"name");i&&i!==e&&t.set(i,o)}else if("title"===n.tagName&&"value"in n.childNodes[0])t.set(he,n.childNodes[0].value);else if("link"===n.tagName&&l(n,["canonical"])){const e=r(n,"href");e&&t.set(ge,e)}}return t})(null==i?void 0:i.childNodes.find(f("head"))),t)};exports.mf2=(t,r)=>(((e,t)=>{se(e,"HTML");const r=de(t,["baseUrl","experimental"],"options"),i=se(r.baseUrl,"baseUrl");if(new URL(i),"experimental"in r){const e=de(r.experimental,["lang","textContent","metaformats"],"experimental");"lang"in e&&ue(e.lang,"experimental.lang"),"textContent"in e&&ue(e.textContent,"experimental.textContent"),"metaformats"in e&&ue(e.metaformats,"experimental.metaformats")}})(t,r),((t,r)=>{const i=e.parse(t);(e=>{const t=e.childNodes.find(f("html"));if(!t)throw new Error("Microformats parser: No <html> element found");const r=t.childNodes.find(f("body"));if(!r)throw new Error("Microformats parser: No <body> element found");if(!r.childNodes.filter(g).length)throw new Error("Microformats parser: unable to parse HTML")})(i);const{idRefs:n,rels:o,relUrls:a,baseUrl:l,lang:s}=((e,t)=>{var r;const i={idRefs:{},rels:{},relUrls:{},baseUrl:null!==(r=ce(e))&&void 0!==r?r:t.baseUrl,lang:void 0};return me(e,i,t),i})(i,r),u={...r,baseUrl:l,idRefs:n,inherited:{roots:[],lang:s}};let d=U(i,N).map((e=>le(e,u)));return 0===d.length&&M(u,"metaformats")&&(d=ve(i,u)),{rels:o,"rel-urls":a,items:d}})(t,r)); //# sourceMappingURL=index.cjs.map