UNPKG

shamela

Version:

Library to interact with the Maktabah Shamela v4 APIs

50 lines 18.1 kB
import e from"sql.js";import{unzipSync as t}from"fflate";var n=(e=>typeof require<`u`?require:typeof Proxy<`u`?new Proxy(e,{get:(e,t)=>(typeof require<`u`?require:e)[t]}):e)(function(e){if(typeof require<`u`)return require.apply(this,arguments);throw Error('Calling `require` for "'+e+"\" in an environment that doesn't expose the `require` function.")});const r=Object.freeze({debug:()=>{},error:()=>{},info:()=>{},warn:()=>{}});let i=r;const a=e=>{if(!e){i=r;return}let t=[`debug`,`error`,`info`,`warn`].find(t=>typeof e[t]!=`function`);if(t)throw Error(`Logger must implement debug, error, info, and warn methods. Missing: ${String(t)}`);i=e},o=()=>i,s=()=>{i=r};var c=new Proxy({},{get:(e,t)=>{let n=o(),r=n[t];return typeof r==`function`?(...e)=>r.apply(n,e):r}});let l={};const u={apiKey:`SHAMELA_API_KEY`,booksEndpoint:`SHAMELA_API_BOOKS_ENDPOINT`,masterPatchEndpoint:`SHAMELA_API_MASTER_PATCH_ENDPOINT`,sqlJsWasmUrl:`SHAMELA_SQLJS_WASM_URL`},ee=typeof process<`u`&&!!process?.env,d=e=>{let t=l[e];if(t!==void 0)return t;let n=u[e];if(ee)return process.env[n]},te=e=>{let{logger:t,...n}=e;`logger`in e&&a(t),l={...l,...n}},f=e=>e===`fetchImplementation`?l.fetchImplementation:d(e),p=()=>({apiKey:d(`apiKey`),booksEndpoint:d(`booksEndpoint`),fetchImplementation:l.fetchImplementation,masterPatchEndpoint:d(`masterPatchEndpoint`),sqlJsWasmUrl:d(`sqlJsWasmUrl`)}),m=e=>{if(e===`fetchImplementation`)throw Error(`fetchImplementation must be provided via configure().`);let t=f(e);if(!t)throw Error(`${u[e]} environment variable not set`);return t},ne=()=>{l={},s()};let h=function(e){return e.Authors=`author`,e.Books=`book`,e.Categories=`category`,e.Page=`page`,e.Title=`title`,e}({});const g=(e,t)=>e.query(`PRAGMA table_info(${t})`).all(),_=(e,t)=>!!e.query(`SELECT name FROM sqlite_master WHERE type='table' AND name = ?1`).get(t),v=(e,t)=>_(e,t)?e.query(`SELECT * FROM ${t}`).all():[],y=e=>String(e.is_deleted)===`1`,b=(e,t,n)=>{let r={};for(let i of n){if(i===`id`){r.id=(t??e)?.id??null;continue}if(t&&i in t){let e=t[i];if(e!==`#`&&e!=null){r[i]=e;continue}}if(e&&i in e){r[i]=e[i];continue}r[i]=null}return r},re=(e,t,n)=>{let r=new Set,i=new Map;for(let t of e)r.add(String(t.id));for(let e of t)i.set(String(e.id),e);let a=[];for(let t of e){let e=i.get(String(t.id));e&&y(e)||a.push(b(t,e,n))}for(let e of t){let t=String(e.id);r.has(t)||y(e)||a.push(b(void 0,e,n))}return a},ie=(e,t,n,r)=>{if(r.length===0)return;let i=n.map(()=>`?`).join(`,`),a=e.prepare(`INSERT INTO ${t} (${n.join(`,`)}) VALUES (${i})`);r.forEach(e=>{let t=n.map(t=>t in e?e[t]:null);a.run(...t)}),a.finalize()},ae=(e,t,n)=>{let r=t.query(`SELECT sql FROM sqlite_master WHERE type='table' AND name = ?1`).get(n);return r?.sql?(e.run(`DROP TABLE IF EXISTS ${n}`),e.run(r.sql),!0):(c.warn(`${n} table definition missing in source database`),!1)},x=(e,t,n,r)=>{if(!_(t,r)){c.warn(`${r} table missing in source database`);return}if(!ae(e,t,r))return;let i=g(t,r),a=n&&_(n,r)?g(n,r):[],o=i.map(e=>e.name);for(let t of a)if(!o.includes(t.name)){let n=t.type&&t.type.length>0?t.type:`TEXT`;e.run(`ALTER TABLE ${r} ADD COLUMN ${t.name} ${n}`),o.push(t.name)}ie(e,r,o,re(v(t,r),n?v(n,r):[],o))},oe=(e,t,n)=>{e.transaction(()=>{x(e,t,n,h.Page),x(e,t,n,h.Title)})()},se=(e,t)=>{e.transaction(()=>{x(e,t,null,h.Page),x(e,t,null,h.Title)})()},ce=e=>{e.run(`CREATE TABLE ${h.Page} ( id INTEGER, content TEXT, part TEXT, page TEXT, number TEXT, services TEXT, is_deleted TEXT )`),e.run(`CREATE TABLE ${h.Title} ( id INTEGER, content TEXT, page INTEGER, parent INTEGER, is_deleted TEXT )`)},S=e=>e.query(`SELECT * FROM ${h.Page}`).all(),C=e=>e.query(`SELECT * FROM ${h.Title}`).all(),w=e=>({pages:S(e),titles:C(e)}),T=e=>{try{return n(`node:fs`).existsSync(e)}catch{return!1}},le=()=>{if(n!==void 0&&n.resolve!==void 0)try{let e=n.resolve(`sql.js`),t=n(`node:path`),r=t.dirname(e),i=t.join(r,`dist`,`sql-wasm.wasm`);if(T(i))return i}catch{}if(typeof process<`u`&&process.cwd)try{let e=n(`node:path`),t=process.cwd(),r=[e.join(t,`node_modules`,`sql.js`,`dist`,`sql-wasm.wasm`),e.join(t,`..`,`node_modules`,`sql.js`,`dist`,`sql-wasm.wasm`),e.join(t,`../..`,`node_modules`,`sql.js`,`dist`,`sql-wasm.wasm`),e.join(t,`.next`,`server`,`node_modules`,`sql.js`,`dist`,`sql-wasm.wasm`)];for(let e of r)if(T(e))return e}catch{}if(n!==void 0&&n.resolve!==void 0&&n.resolve.paths)try{let e=n(`node:path`),t=n.resolve.paths(`sql.js`)||[];for(let n of t){let t=e.join(n,`sql.js`,`dist`,`sql-wasm.wasm`);if(T(t))return t}}catch{}try{if(import.meta.url){let e=new URL(`../../node_modules/sql.js/dist/sql-wasm.wasm`,import.meta.url),t=decodeURIComponent(e.pathname),n=process.platform===`win32`&&t.startsWith(`/`)?t.slice(1):t;if(T(n))return n}}catch{}return null};var ue=class{constructor(e){this.statement=e}run=(...e)=>{e.length>0&&this.statement.bind(e),this.statement.step(),this.statement.reset()};finalize=()=>{this.statement.free()}},E=class{constructor(e){this.db=e}run=(e,t=[])=>{this.db.run(e,t)};prepare=e=>new ue(this.db.prepare(e));query=e=>({all:(...t)=>this.all(e,t),get:(...t)=>this.get(e,t)});transaction=e=>()=>{this.db.run(`BEGIN TRANSACTION`);try{e(),this.db.run(`COMMIT`)}catch(e){throw this.db.run(`ROLLBACK`),e}};close=()=>{this.db.close()};export=()=>this.db.export();all=(e,t)=>{let n=this.db.prepare(e);try{t.length>0&&n.bind(t);let e=[];for(;n.step();)e.push(n.getAsObject());return e}finally{n.free()}};get=(e,t)=>this.all(e,t)[0]};let D=null,O=null;const de=typeof process<`u`&&!!process?.versions?.node,fe=()=>{if(!O){let e=f(`sqlJsWasmUrl`);if(e)O=e;else if(de){let e=le();if(e)O=e;else{let e=[`Unable to automatically locate sql-wasm.wasm file.`,`This can happen in bundled environments (Next.js, webpack, etc.).`,``,`Quick fix - add this to your code before using shamela:`,``,` import { configure, createNodeConfig } from "shamela";`,` configure(createNodeConfig({`,` apiKey: process.env.SHAMELA_API_KEY,`,` booksEndpoint: process.env.SHAMELA_BOOKS_ENDPOINT,`,` masterPatchEndpoint: process.env.SHAMELA_MASTER_ENDPOINT,`,` }));`,``,`Or manually specify the path:`,``,` import { configure } from "shamela";`,` import { join } from "node:path";`,` configure({`,` sqlJsWasmUrl: join(process.cwd(), "node_modules", "sql.js", "dist", "sql-wasm.wasm")`,` });`].join(` `);throw Error(e)}}else O=`https://cdn.jsdelivr.net/npm/sql.js@1.13.0/dist/sql-wasm.wasm`}return O},k=()=>(D||=e({locateFile:()=>fe()}),D),A=async()=>new E(new(await(k())).Database),j=async e=>new E(new(await(k())).Database(e)),pe=(e,t,n)=>{let r=t.query(`SELECT sql FROM sqlite_master WHERE type='table' AND name = ?1`).get(n);if(!r?.sql)throw Error(`Missing table definition for ${n} in source database`);e.run(`DROP TABLE IF EXISTS ${n}`),e.run(r.sql)},me=async(e,t)=>{let n={author:h.Authors,book:h.Books,category:h.Categories},r={};for(let e of t){let t=n[(e.name.split(`/`).pop()?.split(`\\`).pop()??e.name).replace(/\.(sqlite|db)$/i,``).toLowerCase()];t&&(r[t]=await j(e.data))}try{let t=Object.entries(r);e.transaction(()=>{for(let[n,r]of t){pe(e,r,n);let t=r.query(`PRAGMA table_info(${n})`).all().map(e=>e.name);if(t.length===0)continue;let i=r.query(`SELECT * FROM ${n}`).all();if(i.length===0)continue;let a=t.map(()=>`?`).join(`,`),o=t.map(e=>e===`order`?`"order"`:e),s=e.prepare(`INSERT INTO ${n} (${o.join(`,`)}) VALUES (${a})`);try{for(let e of i){let n=t.map(t=>t in e?e[t]:null);s.run(...n)}}finally{s.finalize()}}})()}finally{Object.values(r).forEach(e=>e?.close())}},M=(e,t,n)=>{e.run(`DROP VIEW IF EXISTS ${t}`),e.run(`CREATE VIEW ${t} AS SELECT * FROM ${n}`)},he=e=>{e.run(`CREATE TABLE ${h.Authors} ( id INTEGER, is_deleted TEXT, name TEXT, biography TEXT, death_text TEXT, death_number TEXT )`),e.run(`CREATE TABLE ${h.Books} ( id INTEGER, name TEXT, is_deleted TEXT, category TEXT, type TEXT, date TEXT, author TEXT, printed TEXT, minor_release TEXT, major_release TEXT, bibliography TEXT, hint TEXT, pdf_links TEXT, metadata TEXT )`),e.run(`CREATE TABLE ${h.Categories} ( id INTEGER, is_deleted TEXT, "order" TEXT, name TEXT )`),M(e,`authors`,h.Authors),M(e,`books`,h.Books),M(e,`categories`,h.Categories)},ge=e=>e.query(`SELECT * FROM ${h.Authors}`).all(),_e=e=>e.query(`SELECT * FROM ${h.Books}`).all(),ve=e=>e.query(`SELECT * FROM ${h.Categories}`).all(),N=(e,t)=>({authors:ge(e),books:_e(e),categories:ve(e),version:t}),P=(e,t=[`api_key`,`token`,`password`,`secret`,`auth`])=>{let n=typeof e==`string`?new URL(e):new URL(e.toString());return t.forEach(e=>{let t=n.searchParams.get(e);if(t&&t.length>6){let r=`${t.slice(0,3)}***${t.slice(-3)}`;n.searchParams.set(e,r)}else t&&n.searchParams.set(e,`***`)}),n.toString()},F=e=>({content:e.content,id:e.id,...e.number&&{number:e.number},...e.page&&{page:Number(e.page)},...e.part&&{part:e.part}}),ye=e=>{let t=Number(e.parent);return{content:e.content,id:e.id,page:Number(e.page),...t&&{parent:t}}},I={"<img[^>]*>>":``,舄:``,"﵀":`رَحِمَهُ ٱللَّٰهُ`,"﵁":`رضي الله عنه`,"﵂":`رَضِيَ ٱللَّٰهُ عَنْهَا`,"﵃":`رَضِيَ اللَّهُ عَنْهُمْ`,"﵄":`رَضِيَ ٱللَّٰهُ عَنْهُمَا`,"﵅":`رَضِيَ اللَّهُ عَنْهُنَّ`,"﵇":`عَلَيْهِ ٱلسَّلَٰمُ`,"﵈":`عَلَيْهِمُ السَّلامُ`,"﵌":`صلى الله عليه وآله وسلم`,"﵎":`تبارك وتعالى`,"﵏":`رَحِمَهُمُ ٱللَّٰهُ`,"﷽":``,"﷿":`عَزَّ وَجَلَّ`},L=e=>{let t=new URL(e);return t.protocol=`https`,t.toString()},R=e=>/\.(sqlite|db)$/i.test(e.name),z=e=>e.find(R),B=e=>{let t=/\.([^.]+)$/.exec(e);return t?`.${t[1].toLowerCase()}`:``},V=(e,t,n=!0)=>{let r=new URL(e),i=new URLSearchParams;return Object.entries(t).forEach(([e,t])=>{i.append(e,t.toString())}),n&&i.append(`api_key`,m(`apiKey`)),r.search=i.toString(),r},H=async(e,t={})=>{let n=typeof e==`string`?e:e.toString(),r=await(t.fetchImpl??p().fetchImplementation??fetch)(n);if(!r.ok)throw Error(`Error making request: ${r.status} ${r.statusText}`);if((r.headers.get(`content-type`)??``).includes(`application/json`))return await r.json();let i=await r.arrayBuffer();return new Uint8Array(i)},be=typeof process<`u`&&!!process?.versions?.node,xe=async()=>{if(!be)throw Error(`File system operations are only supported in Node.js environments`);return import(`node:fs/promises`)},Se=async e=>{let[t,n]=await Promise.all([xe(),import(`node:path`)]),r=n.dirname(e);return await t.mkdir(r,{recursive:!0}),t},U=async e=>{let n=await H(e),r=n instanceof Uint8Array?n.length:n&&typeof n.byteLength==`number`?n.byteLength:0;return c.debug(`unzipFromUrl:bytes`,r),new Promise((e,r)=>{let i=n instanceof Uint8Array?n:new Uint8Array(n);try{let n=t(i),r=Object.entries(n).map(([e,t])=>({data:t,name:e}));c.debug(`unzipFromUrl:entries`,r.map(e=>e.name)),e(r)}catch(e){r(Error(`Error processing URL: ${e.message}`))}})},W=async(e,t)=>{if(e.writer){await e.writer(t);return}if(!e.path)throw Error(`Output options must include either a writer or a path`);let n=await Se(e.path);typeof t==`string`?await n.writeFile(e.path,t,`utf-8`):await n.writeFile(e.path,t)},Ce=[`author.sqlite`,`book.sqlite`,`category.sqlite`],G=()=>{let{apiKey:e,booksEndpoint:t,masterPatchEndpoint:n}=p(),r=[[`apiKey`,e],[`booksEndpoint`,t],[`masterPatchEndpoint`,n]].filter(([,e])=>!e).map(([e])=>e);if(r.length)throw Error(`${r.join(`, `)} environment variables not set`)},we=e=>{let t=new Set(e.map(e=>e.match(/[^\\/]+$/)?.[0]??e).map(e=>e.toLowerCase()));return Ce.every(e=>t.has(e.toLowerCase()))},K=async(e,t)=>{c.info(`Setting up book database for ${e}`);let n=t||await J(e),r=n.minorReleaseUrl?U(n.minorReleaseUrl):Promise.resolve([]),[i,a]=await Promise.all([U(n.majorReleaseUrl),r]),o=z(i);if(!o)throw Error(`Unable to locate book database in archive`);let s=await A();try{c.info(`Creating tables`),ce(s);let e=await j(o.data);try{let t=z(a);if(t){c.info(`Applying patches from ${t.name} to ${o.name}`);let n=await j(t.data);try{oe(s,e,n)}finally{n.close()}}else c.info(`Copying table data from ${o.name}`),se(s,e)}finally{e.close()}return{cleanup:async()=>{s.close()},client:s}}catch(e){throw s.close(),e}},q=async e=>{c.info(`Setting up master database`);let t=e||await Y(0);c.info(`Downloading master database ${t.version} from: ${P(t.url)}`);let n=await U(L(t.url));if(c.debug?.(`sourceTables downloaded: ${n.map(e=>e.name).toString()}`),!we(n.map(e=>e.name)))throw c.error(`Some source tables were not found: ${n.map(e=>e.name).toString()}`),Error(`Expected tables not found!`);let r=await A();try{return c.info(`Creating master tables`),he(r),c.info(`Copying data to master table`),await me(r,n.filter(R)),{cleanup:async()=>{r.close()},client:r,version:t.version}}catch(e){throw r.close(),e}},J=async(e,t)=>{G();let n=V(`${m(`booksEndpoint`)}/${e}`,{major_release:(t?.majorVersion||0).toString(),minor_release:(t?.minorVersion||0).toString()});c.info(`Fetching shamela.ws book link: ${P(n)}`);try{let e=await H(n);return{majorRelease:e.major_release,majorReleaseUrl:L(e.major_release_url),...e.minor_release_url&&{minorReleaseUrl:L(e.minor_release_url)},...e.minor_release_url&&{minorRelease:e.minor_release}}}catch(e){throw Error(`Error fetching book metadata: ${e.message}`)}},Te=async(e,t)=>{if(c.info(`downloadBook ${e} ${JSON.stringify(t)}`),!t.outputFile.path)throw Error(`outputFile.path must be provided to determine output format`);let n=B(t.outputFile.path).toLowerCase(),{client:r,cleanup:i}=await K(e,t?.bookMetadata);try{if(n===`.json`){let e=await w(r);await W(t.outputFile,JSON.stringify(e,null,2))}else if(n===`.db`||n===`.sqlite`){let e=r.export();await W(t.outputFile,e)}else throw Error(`Unsupported output extension: ${n}`)}finally{await i()}return t.outputFile.path},Y=async(e=0)=>{G();let t=V(m(`masterPatchEndpoint`),{version:e.toString()});c.info(`Fetching shamela.ws master database patch link: ${P(t)}`);try{let e=await H(t);return{url:e.patch_url,version:e.version}}catch(e){throw Error(`Error fetching master patch: ${e.message}`)}},Ee=e=>{let t=m(`masterPatchEndpoint`),{origin:n}=new URL(t);return`${n}/covers/${e}.jpg`},De=async e=>{if(c.info(`downloadMasterDatabase ${JSON.stringify(e)}`),!e.outputFile.path)throw Error(`outputFile.path must be provided to determine output format`);let t=B(e.outputFile.path),{client:n,cleanup:r,version:i}=await q(e.masterMetadata);try{if(t===`.json`){let t=N(n,i);await W(e.outputFile,JSON.stringify(t,null,2))}else if(t===`.db`||t===`.sqlite`)await W(e.outputFile,n.export());else throw Error(`Unsupported output extension: ${t}`)}finally{await r()}return e.outputFile.path},Oe=async e=>{c.info(`getBook ${e}`);let{client:t,cleanup:n}=await K(e);try{let e=await w(t);return{pages:e.pages.map(F),titles:e.titles.map(ye)}}finally{await n()}},ke=async()=>{c.info(`getMaster`);let{client:e,cleanup:t,version:n}=await q();try{return N(e,n)}finally{await t()}},Ae=/^[)\]\u00BB"”'’.,?!:\u061B\u060C\u061F\u06D4\u2026]+$/,X=e=>{let t=[];for(let n of e){let e=t[t.length-1];e&&Ae.test(n.text)?e.text+=n.text:t.push(n)}return t},je=e=>e.replace(/\r\n/g,` `).replace(/\r/g,` `).split(` `).map(e=>e.trim()).filter(Boolean),Me=e=>je(e).map(e=>({text:e})),Z=(e,t)=>{let n=RegExp(`${t}\\s*=\\s*("([^"]*)"|'([^']*)'|([^s>]+))`,`i`),r=e.match(n);if(r)return r[2]??r[3]??r[4]},Ne=e=>{let t=[],n=/<[^>]+>/g,r=0,i;for(i=n.exec(e);i;){i.index>r&&t.push({type:`text`,value:e.slice(r,i.index)});let a=i[0],o=/^<\//.test(a),s=a.match(/^<\/?\s*([a-zA-Z0-9:-]+)/),c=s?s[1].toLowerCase():``;if(o)t.push({name:c,type:`end`});else{let e={};e.id=Z(a,`id`),e[`data-type`]=Z(a,`data-type`),t.push({attributes:e,name:c,type:`start`})}r=n.lastIndex,i=n.exec(e)}return r<e.length&&t.push({type:`text`,value:e.slice(r)}),t},Q=(e,t)=>{let n=e.trim();return n?t?{id:t,text:n}:{text:n}:null},Pe=e=>{for(let t=e.length-1;t>=0;t--){let n=e[t];if(n.isTitle&&n.id)return n.id}},Fe=(e,t)=>{if(!e)return;let n=e.split(` `);for(let e=0;e<n.length;e++){if(e>0){let e=Q(t.currentText,t.currentId);e&&t.result.push(e),t.currentText=``,t.currentId=Pe(t.spanStack)||void 0}n[e]&&(t.currentText+=n[e])}},Ie=(e,t)=>{let n=e.attributes[`data-type`]===`title`,r;n&&(r=(e.attributes.id??``).replace(/^toc-/,``)),t.spanStack.push({id:r,isTitle:n}),n&&r&&!t.currentId&&(t.currentId=r)},Le=e=>{if(e=e.replace(/\r\n/g,` `).replace(/\r/g,` `),!/<span[^>]*>/i.test(e))return X(Me(e));let t=Ne(`<root>${e}</root>`),n={currentId:void 0,currentText:``,result:[],spanStack:[]};for(let e of t)e.type===`text`?Fe(e.value,n):e.type===`start`&&e.name===`span`?Ie(e,n):e.type===`end`&&e.name===`span`&&n.spanStack.pop();let r=Q(n.currentText,n.currentId);return r&&n.result.push(r),X(n.result).filter(e=>e.text.length>0)},$=Object.entries(I).map(([e,t])=>({regex:new RegExp(e,`g`),replacement:t})),Re=e=>{if(e===I)return $;let t=[];for(let n in e)t.push({regex:new RegExp(n,`g`),replacement:e[n]});return t},ze=(e,t=I)=>{let n=Re(t),r=e;for(let e=0;e<n.length;e++){let{regex:t,replacement:i}=n[e];r=r.replace(t,i)}return r},Be=(e,t=`_________`)=>{let n=``,r=e.lastIndexOf(t);return r>=0&&(n=e.slice(r+t.length),e=e.slice(0,r)),[e,n]},Ve=e=>e.replace(/(?: |\r){0,2}⦗[\u0660-\u0669]+⦘(?: |\r)?/g,` `),He=e=>(e=e.replace(/<a[^>]*>(.*?)<\/a>/gs,`$1`),e=e.replace(/<hadeeth[^>]*>|<\/hadeeth>|<hadeeth-\d+>/gs,``),e);export{te as configure,Te as downloadBook,De as downloadMasterDatabase,Oe as getBook,J as getBookMetadata,Ee as getCoverUrl,ke as getMaster,Y as getMasterMetadata,Le as parseContentRobust,Ve as removeArabicNumericPageMarkers,He as removeTagsExceptSpan,ne as resetConfig,ze as sanitizePageContent,Be as splitPageBodyFromFooter}; //# sourceMappingURL=index.js.map