nextra-dify-plugin
Version:
A Next.js webpack plugin for integrating Dify knowledge base with Nextra documentation
14 lines (8 loc) • 9.03 kB
JavaScript
const d=require("axios"),node_fs=require("node:fs"),node_path=require("node:path"),k=require("gray-matter"),prompts=require("@clack/prompts"),glob=require("glob");function _interopDefaultCompat(u){return u&&typeof u=="object"&&"default"in u?u.default:u}const d__default=_interopDefaultCompat(d),k__default=_interopDefaultCompat(k);class DifyClient{client;knowledgeBaseId;documents=null;process_rule;constructor(e,t,n="https://api.dify.ai/v1",o){this.knowledgeBaseId=t,this.client=d__default.create({baseURL:n,headers:{Authorization:`Bearer ${e}`,"Content-Type":"application/json"}}),this.process_rule=o,this.initDocuments()}async initDocuments(){try{const e=await this.client.get(`/datasets/${this.knowledgeBaseId}/documents`);return this.documents=e.data.data||[],this.documents}catch(e){return console.error("Failed to get documents:",e),[]}}async getDocuments(){return(!this.documents||!this.documents.length)&&await this.initDocuments(),this.documents||[]}async findDocumentByName(e){return(await this.getDocuments()).find(t=>t.name===e)||null}async deleteModuleDocuments(e){const t=await this.getDocuments();e=e.replace(/_\d+$/,"");const n=t.filter(o=>o.name.startsWith(e));for(const o of n)await this.deleteDocument(o.id)}async deleteAllDocumentsForCurProject(e){const t=await this.getDocuments();for(const n of t)n.name.startsWith(e)&&await this.deleteDocument(n.id)}async deleteDocument(e){try{await this.client.delete(`/datasets/${this.knowledgeBaseId}/documents/${e}`),this.documents&&(this.documents=this.documents.filter(t=>t.id!==e))}catch(t){throw console.error("Failed to delete document:",t),t}}async createDocument(e,t){try{const{doc_form:n,doc_language:o,mode:r,rules:i}=this.process_rule||{},s=new FormData,a=new Blob([t],{type:"text/plain"});return s.append("data",JSON.stringify({indexing_technique:"high_quality",process_rule:{mode:r,rules:i},doc_form:n,doc_language:o})),s.append("file",a,`${e}.txt`),{document_id:(await this.client.post(`/datasets/${this.knowledgeBaseId}/document/create-by-file`,s,{headers:{"Content-Type":"multipart/form-data"}})).data.document.id,name:e,operation:"create"}}catch(n){throw console.error("Failed to create document:",n),n}}async updateDocument(e,t,n){try{const{doc_form:o,doc_language:r,mode:i,rules:s}=this.process_rule||{},a=new FormData,c=new Blob([n],{type:"text/plain"});return a.append("data",JSON.stringify({indexing_technique:"high_quality",process_rule:{mode:i,rules:s,doc_form:o,doc_language:r}})),a.append("file",c,`${t}.txt`),await this.client.post(`/datasets/${this.knowledgeBaseId}/documents/${e}/update-by-file`,a,{headers:{"Content-Type":"multipart/form-data"}}),{document_id:e,name:t,operation:"update"}}catch(o){throw console.error("Failed to update document:",o),o}}async uploadDocument(e,t){const n=await this.findDocumentByName(e);return n?this.updateDocument(n.id,e,t):this.createDocument(e,t)}}class MDXParser{options;constructor(e){this.options=e}parseMDXFile(e,t){const n=node_fs.readFileSync(e,"utf-8"),{data:o,content:r}=k__default(n),i=node_path.relative(t,e),s=node_path.dirname(i).split("/").filter(Boolean),a=node_path.basename(e,".mdx"),c=[...s,a],p=o.title||a;return{path:e,content:r,frontmatter:o,title:p,menuPath:c}}generateDocumentName(e,t){const{projectName:n}=this.options,o=e.menuPath.join("_"),r=`${n}_${o}`;return t!==void 0?`${r}_${t+1}`:r}splitContent(e){const{enableSplit:t,splitMarker:n="---",maxChunkSize:o=5e3,maxChunkSplitBeforeMarker:r}=this.options;if(!t)return[e];if(e.includes(n))return e.split(n).map(c=>c.trim()).filter(c=>c.length>0);const i=[];let s="";const a=e.split(`
`);for(const c of a)if(s.length+c.length+1>o&&s.length>0)if(r)try{const p=new RegExp(r,"gm"),l=Array.from(s.matchAll(p));if(l.length>0){const m=l[l.length-1].index;i.push(s.substring(0,m).trim()),s=`${s.substring(m)}
${c}`}else i.push(s.trim()),s=c}catch{if(console.warn(`[MDXParser] Invalid regex pattern: ${r}, falling back to string matching`),s.includes(r)){const p=s.lastIndexOf(r);p>0?(i.push(s.substring(0,p).trim()),s=`${s.substring(p)}
${c}`):(i.push(s.trim()),s=c)}else i.push(s.trim()),s=c}else i.push(s.trim()),s=c;else s+=(s?`
`:"")+c;return s.trim()&&i.push(s.trim()),i.length>0?i:[e]}convertToDocumentChunks(e){const t=this.splitContent(e.content);return t.map((n,o)=>({id:`${e.path}_${o}`,content:n,name:this.generateDocumentName(e,t.length>1?o:void 0),index:o}))}cleanContent(e){return e.replace(/import\s+(?:\S.*?)??from\s+['"][^'"]*['"];?\s*/g,"").replace(/export\s+.*?;?\s*/g,"").replace(/\{\/\*[\s\S]*?\*\/\}/g,"").replace(/\n\s*\n\s*\n/g,`
`).trim()}processMDXFile(e,t){const n=this.parseMDXFile(e,t);return this.convertToDocumentChunks(n).map(o=>({...o,content:this.cleanContent(o.content)}))}}async function batchPromise(u,e={}){const{maxConcurrency:t=3,continueOnError:n=!0}=e;if(u.length===0)return{fulfilled:[],rejected:[],duration:0};const o=Date.now(),r={fulfilled:[],rejected:[],duration:0},i=u.map(l=>typeof l=="function"?l:()=>l);let s=0;const a=new Map,c=async(l,m)=>{try{const h=await m();r.fulfilled.push({index:l,value:h})}catch(h){if(r.rejected.push({index:l,reason:h}),!n)throw h}},p=()=>{if(s>=i.length)return;const l=s++,m=i[l],h=c(l,m).finally(()=>{a.delete(l),p()});a.set(l,h)};return new Promise((l,m)=>{const h=Math.min(t,i.length);for(let f=0;f<h;f++)p();const g=()=>{a.size===0&&s>=i.length?(r.duration=Date.now()-o,l(r)):setTimeout(g,10)};n||Promise.all(Array.from(a.values())).catch(f=>{m(f)}),g()})}async function batchPromiseSettled(u,e=3){const t=await batchPromise(u,{maxConcurrency:e,continueOnError:!0}),n=Array.from({length:u.length});return t.fulfilled.forEach(({index:o,value:r})=>{n[o]={status:"fulfilled",value:r}}),t.rejected.forEach(({index:o,reason:r})=>{n[o]={status:"rejected",reason:r}}),n}class NextraDifyPlugin{options;difyClient;mdxParser;constructor(e){this.options={enableUpload:!1,enableSplit:!1,splitMarker:"---",include:["**/*.mdx"],exclude:["node_modules/**",".next/**"],apiBaseUrl:"https://api.dify.ai/v1",maxChunkSize:5e3,deleteAllDocumentsBeforeUpload:!1,...e},this.difyClient=new DifyClient(this.options.apiToken,this.options.knowledgeBaseId,this.options.apiBaseUrl,this.options.process_rule||{mode:"custom",indexing_technique:"high_quality",rules:{pre_processing_rules:[{id:"remove_extra_spaces",enabled:!0}],parent_mode:"full-doc",segmentation:{separator:`
`,max_tokens:2e3},subchunk_segmentation:{separator:`
`,max_tokens:400}},doc_form:"hierarchical_model",doc_language:"Chinese"}),this.mdxParser=new MDXParser(this.options)}apply(e){e.hooks.done.tapAsync("NextraDifyPlugin",async(t,n)=>{if(prompts.intro("[NextraDifyPlugin] Starting document processing..."),!this.options.enableUpload){prompts.log.warn("[NextraDifyPlugin] Upload is disabled, skipping..."),n();return}try{await this.processDocuments(e.context),prompts.log.success("Documents processed successfully")}catch(o){prompts.log.error(`Error processing documents:', ${JSON.stringify(o,null,2)}`)}finally{prompts.outro("Document processing completed"),n()}})}async processDocuments(e){const{deleteAllDocumentsBeforeUpload:t,projectName:n}=this.options;t&&(await this.difyClient.deleteAllDocumentsForCurProject(n),prompts.log.warn(`Deleted all documents for project: ${n}`));const o=await this.findMDXFiles(e);prompts.log.info(`[NextraDifyPlugin] Found ${o.length} MDX files`);const r=await batchPromiseSettled(o.map(a=>async()=>{await this.processFile(a,e).catch(c=>{prompts.log.error(`[NextraDifyPlugin] Error processing file ${a}: ${JSON.stringify(c||{},null,2)}`)})}),4),i=r.filter(a=>a.status==="fulfilled"),s=r.filter(a=>a.status==="rejected");prompts.log.success(`Processed ${i.length} files successfully`),prompts.log.error(`Processed ${s.length} files failed`)}async findMDXFiles(e){const{include:t,exclude:n}=this.options,o=[];for(const r of t||["**/*.mdx"]){const i=await glob.glob(r,{cwd:e,absolute:!0,ignore:n});o.push(...i)}return[...new Set(o)]}async processFile(e,t){prompts.log.step(`Processing file: ${e}`);try{const{deleteAllDocumentsBeforeUpload:n}=this.options,o=this.mdxParser.processMDXFile(e,t);if(!n){const[r]=o||[];r&&(await this.difyClient.deleteModuleDocuments(r.name),prompts.log.warn(`Deleted module documents: ${r.name}`))}for(const r of o)await this.uploadChunk(r),prompts.log.info(`Uploaded chunk: ${r.name}`)}catch(n){throw prompts.log.error(`Error processing file ${e}: ${JSON.stringify(n||{},null,2)}`),n}}async uploadChunk(e){try{const t=await this.difyClient.uploadDocument(e.name,e.content);prompts.log.success(`${t.operation==="create"?"Created":"Updated"} document: ${e.name}`)}catch(t){throw prompts.log.error(`Error uploading chunk ${e.name}: ${JSON.stringify(t||{},null,2)}`),t}}static validateOptions(e){if(!e.knowledgeBaseId)throw new Error("NextraDifyPlugin: knowledgeBaseId is required");if(!e.apiToken)throw new Error("NextraDifyPlugin: apiToken is required");if(!e.projectName)throw new Error("NextraDifyPlugin: projectName is required")}}exports.DifyClient=DifyClient,exports.MDXParser=MDXParser,exports.NextraDifyPlugin=NextraDifyPlugin;
;