UNPKG

shamela

Version:

Library to interact with the Maktabah Shamela v4 APIs

9 lines 3.98 kB
import{DEFAULT_MAPPING_RULES as e,FOOTNOTE_MARKER as t}from"./utils/constants.js";const n=/^[)\]\u00BB"”'’.,?!:\u061B\u060C\u061F\u06D4\u2026]+$/,r=e=>{let t=[];for(let r of e){let e=t[t.length-1];e&&n.test(r.text)?e.text+=r.text:t.push(r)}return t},i=e=>e.replace(/\r\n/g,` `).replace(/\r/g,` `).split(` `).map(e=>e.trim()).filter(Boolean),a=e=>i(e).map(e=>({text:e})),o=(e,t)=>{let n=RegExp(`${t}\\s*=\\s*("([^"]*)"|'([^']*)'|([^s>]+))`,`i`),r=e.match(n);if(r)return r[2]??r[3]??r[4]},s=e=>{let t=[],n=/<[^>]+>/g,r=0,i;for(i=n.exec(e);i;){i.index>r&&t.push({type:`text`,value:e.slice(r,i.index)});let a=i[0],s=/^<\//.test(a),c=a.match(/^<\/?\s*([a-zA-Z0-9:-]+)/),l=c?c[1].toLowerCase():``;if(s)t.push({name:l,type:`end`});else{let e={};e.id=o(a,`id`),e[`data-type`]=o(a,`data-type`),t.push({attributes:e,name:l,type:`start`})}r=n.lastIndex,i=n.exec(e)}return r<e.length&&t.push({type:`text`,value:e.slice(r)}),t},c=(e,t)=>{let n=e.trim();return n?t?{id:t,text:n}:{text:n}:null},l=e=>{for(let t=e.length-1;t>=0;t--){let n=e[t];if(n.isTitle&&n.id)return n.id}},u=(e,t)=>{if(!e)return;let n=e.split(` `);for(let e=0;e<n.length;e++){if(e>0){let e=c(t.currentText,t.currentId);e&&t.result.push(e),t.currentText=``,t.currentId=l(t.spanStack)||void 0}n[e]&&(t.currentText+=n[e])}},d=(e,t)=>{let n=e.attributes[`data-type`]===`title`,r;n&&(r=(e.attributes.id??``).replace(/^toc-/,``)),t.spanStack.push({id:r,isTitle:n}),n&&r&&!t.currentId&&(t.currentId=r)},f=e=>e.includes(`\r`)?e.replace(/\r\n?/g,` `):e,p=e=>{if(e=f(e),!/<span[^>]*>/i.test(e))return r(a(e));let t=s(`<root>${e}</root>`),n={currentId:``,currentText:``,result:[],spanStack:[]};for(let e of t)e.type===`text`?u(e.value,n):e.type===`start`&&e.name===`span`?d(e,n):e.type===`end`&&e.name===`span`&&n.spanStack.pop();let i=c(n.currentText,n.currentId);return i&&n.result.push(i),r(n.result).filter(e=>e.text.length>0)},m=Object.entries(e).map(([e,t])=>({regex:new RegExp(e,`g`),replacement:t})),h=t=>{if(t===e)return m;let n=[];for(let e in t)n.push({regex:new RegExp(e,`g`),replacement:t[e]});return n},g=(t,n=e)=>{let r=h(n),i=t;for(let e=0;e<r.length;e++){let{regex:t,replacement:n}=r[e];i=i.replace(t,n)}return i},_=(e,n=t)=>{let r=``,i=e.indexOf(n);return i>=0&&(r=e.slice(i+n.length),e=e.slice(0,i)),[e,r]},v=e=>e.replace(/(?: |\r){0,2}⦗[\u0660-\u0669]+⦘(?: |\r)?/g,` `),y=e=>(e=e.replace(/<a[^>]*>(.*?)<\/a>/gs,`$1`),e=e.replace(/<hadeeth[^>]*>|<\/hadeeth>|<hadeeth-\d+>/gs,``),e),b=e=>e.replace(/<hadeeth-\d+>/gi,`<span class="hadeeth">`).replace(/<\s*\/?\s*hadeeth\s*>/gi,`</span>`),x=e=>e.replace(/<[^>]*>/g,``),S=e=>e.replace(/(^|[\r\n])([^\r\n]*?)<span[^>]*data-type=["']title["'][^>]*>/gi,`$1<span data-type="title">$2`),C=e=>x(e.replace(/<span[^>]*data-type=["']title["'][^>]*>(.*?)<\/span>/gi,`## $1`).replace(/<a[^>]*href=["']inr:\/\/[^"']*["'][^>]*>(.*?)<\/a>/gi,`$1`)),w=(e,t)=>{let{separator:n=` — `,strategy:r}=t;if(!e)return e;let i=/<span\b[^>]*\bdata-type=(['"])title\1[^>]*>[\s\S]*?<\/span>/gi;return e.replace(/((?:<span\b[^>]*\bdata-type=(['"])title\2[^>]*>[\s\S]*?<\/span>\s*){2,})/gi,e=>{let t=e.match(i)??[];if(t.length<2)return e;let a=t[t.length-1],o=e.lastIndexOf(a)+a.length,s=e.slice(o);if(r===`splitLines`)return t.join(` `)+s;if(r===`merge`){let e=t.map(e=>e.replace(/^<span\b[^>]*>/i,``).replace(/<\/span>$/i,``).trim()).filter(Boolean);return`${t[0].match(/^<span\b[^>]*>/i)?.[0]??`<span data-type="title">`}${e.join(n)}</span>`}return[t[0],...t.slice(1).map(e=>e.replace(/\bdata-type=(["'])title\1/i,`data-type="subtitle"`))].join(` `)})},T=(e,t)=>(e=w(e,{strategy:`splitLines`,...t}),e=S(e),e=C(e),e=f(e),e);export{T as convertContentToMarkdown,C as htmlToMarkdown,g as mapPageCharacterContent,S as moveContentAfterLineBreakIntoSpan,b as normalizeHtml,f as normalizeLineEndings,w as normalizeTitleSpans,p as parseContentRobust,v as removeArabicNumericPageMarkers,y as removeTagsExceptSpan,_ as splitPageBodyFromFooter,x as stripHtmlTags}; //# sourceMappingURL=content.js.map