UNPKG

deep-research

Version:

Open source deep research TS/JS library with built in web search, reasoning, and bibliography generation

167 lines (132 loc) 21.8 kB
var O=Object.defineProperty;var g=(t,e)=>O(t,"name",{value:e,configurable:!0});import{createDeepInfra as U}from"@ai-sdk/deepinfra";import{createOpenAI as q}from"@ai-sdk/openai";import{generateObject as v,generateText as G}from"ai";import{z as f}from"zod";import{retryAsync as Y,createExponetialDelay as D}from"ts-retry";import{JigsawStack as F}from"jigsawstack";const $=class ${constructor({OPENAI_API_KEY:e,DEEPINFRA_API_KEY:s,defaultModel:a,reasoningModel:o,outputModel:i}){this.providers=new Map,this.models={default:a||q({apiKey:e}).languageModel("gpt-4.1"),reasoning:o||U({apiKey:s}).languageModel("deepseek-ai/DeepSeek-R1-0528"),output:i||q({apiKey:e}).languageModel("gpt-4.1")}}static getInstance({OPENAI_API_KEY:e,DEEPINFRA_API_KEY:s,defaultModel:a,reasoningModel:o,outputModel:i}){return $.instance||($.instance=new $({OPENAI_API_KEY:e,DEEPINFRA_API_KEY:s,defaultModel:a,reasoningModel:o,outputModel:i})),$.instance}getModel(e){return this.models[e]}getProvider(e){return this.providers.get(e)}};g($,"AIProvider");let N=$;const I={max_depth:3,max_breadth:3,max_output_tokens:32e3,logging:{enabled:!1}},K=g(({prompt:t,queries:e,research_sources:s})=>` You are a world-class context generator. Your task is to generate a context overview for the following queries and sources that relates to the main prompt: Extract all the information from the sources that is relevant to the main prompt. Main Prompt: ${t} Sub-Queries and Sources: ${e?.map(a=>{const o=s?.filter(i=>i.url&&i.url.length>0);return o&&o.length>0?`**${a}** ${o.map(i=>` [${i.reference_number}] ${i.title||"No title"} (${i.url}) Content and Snippets: ${i.content?i.content:i.snippets?.join(` `)}`).join(` `)}`:`**${a}** (No sources found)`}).join(` `)} `.trim(),"CONTEXT_GENERATION_PROMPT"),Q=g(({prompt:t,reasoning:e,queries:s,sources:a,config:o})=>{const i=`You are a world-class research planner. Your primary goal is to construct a comprehensive research plan and a set of effective search queries to thoroughly investigate the given prompt. INSTRUCTIONS: 1. A Detailed Research Plan: - Clearly outline the overall research strategy and methodology you propose. - Identify key areas, themes, or sub-topics that need to be investigated to ensure comprehensive coverage of the prompt. - Suggest the types of information, data, or sources (e.g., academic papers, official reports, news articles, expert opinions) that would be most valuable for this research. - The plan should be logical, actionable, and designed for efficient information gathering. 2. A List of Focused Search Queries: - Generate a list of specific and targeted search queries. - These queries should be optimized to yield relevant, high-quality, and diverse search results from search engines. - The set of queries should collectively aim to cover the main aspects identified in your research plan. - Ensure queries are distinct and avoid redundancy. 3. Generate how deep the research should be: - Generate a number to determine how deep the research should be to fully explore this prompt 4. Generate how broad the research should be: - Generate a number to determine how broad the research should be to fully explore this prompt Output in the given JSON schema. `.trim(),r=` ${e?`Reasoning: ${e}`:""} ${s?` Sub-Queries and Sources previously generated: ${s.map(l=>{const p=a?.find(n=>n.query===l);return p&&p.search_results.results.length>0?`**${l}** ${p.search_results.results.map(n=>` [${n.reference_number}] ${n.title||"No title"} (${n.url}) Content and Snippets: ${n.content?n.content:n.snippets?.join(` `)}`).join(` `)}`:`**${l}** (No sources found)`}).join(` `)}`:""} User Prompt: ${t} `.trim(),c=f.object({subQueries:f.array(f.string()).min(1).max(o.max_breadth).describe("An array of high-quality, non-redundant search queries (min 1, max N) that together provide comprehensive research coverage for the user prompt"),researchPlan:f.string().describe("A detailed plan explaining the research approach and methodology"),depth:f.number().min(1).max(o.max_depth).describe("A number representing the depth of the research"),breadth:f.number().min(1).max(o.max_breadth).describe("A number representing the breadth of the research")});return{system:i,user:r,schema:c}},"RESEARCH_PROMPT_TEMPLATE"),W=g(({prompt:t,reasoning:e,queries:s,sources:a,researchPlan:o})=>{const i=` You are a world-class analyst. Your primary purpose is to help decide if the data provided is sufficient to complete the given prompt. Current datetime is: ${new Date().toISOString()} INSTRUCTIONS: - If the reasoning is sufficient to answer the prompt set "isComplete" to true. - In either case, provide a brief explanation in "reason" describing your judgement. Response in the given JSON schema. `.trim(),r=` Research Plan: "${o}" Sub-Queries and Sources previously generated: ${s.map(l=>{const p=a?.find(n=>n.query===l);return p&&p.search_results.results.length>0?`**${l}** ${p.search_results.results.map(n=>` [${n.reference_number}] ${n.title||"No title"} (${n.url}) Content and Snippets: ${n.content?n.content:n.snippets?.join(` `)}`).join(` `)}`:`**${l}** (No sources found)`}).join(` `)} Reasoning generated previously: "${e}" Prompt: "${t}" `.trim(),c=f.object({isComplete:f.boolean().describe("If the reasoning is sufficient to answer the main prompt set to true."),reason:f.string().describe("The reason for the decision")});return{system:i,user:r,schema:c}},"DECISION_MAKING_PROMPT"),L=g(({prompt:t,researchPlan:e,queries:s,sources:a})=>{const o="".trim(),i=` Proposed research plan: "${e}" Context for each query: ${s?.map(r=>{const c=a?.find(l=>l.query===r);return c?`**Query: ${r}** Context: ${c.context}`:`**Query: ${r}** Context: No context found`}).join(` `)} Prompt: "${t}" `.trim();return{system:o,user:i}},"REASONING_SEARCH_RESULTS_PROMPT"),J=g(({prompt:t,sources:e,targetOutputTokens:s,researchPlan:a,reasoning:o,queries:i,phase:r,currentReport:c})=>{const l=s?s*3:void 0,p=l?Math.max(l-c.length,0):void 0,n=l?c.length>=l:void 0,u=` You are a world-class analyst. Your primary purpose is to help users answer their prompt. GENERAL GUIDELINES: - If you are about to reach your output token limit, ensure you properly close all JSON objects and strings to prevent parsing errors. - Only use the sources provided in the context. - Cite every factual claim or statistic with in-text references using the reference numbers by the sources provided (e.g. "[1]"). - **Never repeat a heading that is already present in the Existing Draft.** - When writing mathematical equations, always use single dollar sign syntax ($...$) for inline equations and double dollar signs ($$...$$) for block equations. Do not use (...) or [...] delimiters. INSTRUCTIONS: - generate in the - Make sure your report is addressing the prompt. - Make sure your report is comprehensive and covers all the sub-topics. - Make sure your report is well-researched and well-cited. - Make sure your report is well-written and well-structured. - Make sure your report is well-organized and well-formatted. `;let d="";switch(r){case"initial":d=` Do not generate a reference or conclusion section. Return phase as "continuation" `;break;case"continuation":n===!1?d=` Generate a continuation of the report. No need to include the initial report. ${p?`You still need \u2248${p.toLocaleString()} characters.`:""} Do not generate a reference or conclusion section. Return phase as "continuation" `:d=` - This is your FINAL response for this question. - If the provided sources are insufficient, give your best definitive answer. - YOU MUST conclude your answer now, regardless of whether you feel it's complete. Return phase as "done" `;break}const w=` ${s?`Target length: \u2248 ${(s*3).toLocaleString()} characters (${s} tokens \xD73)`:""} CONTEXT: Latest Research Plan: ${a} Latest Reasoning Snapshot: ${o} Sub-Queries and Sources: ${i?.map(b=>{const y=e?.find(_=>_.query===b);return y&&y.search_results.results.length>0?`**${b}** ${y.search_results.results.map(_=>` [${_.reference_number}] ${_.title||"No title"} (${_.url}) Content and Snippets: ${_.content?_.content:_.snippets?.join(` `)}`).join(` `)}`:`**${b}** (No sources found)`}).join(` `)} ${c?`Current Draft: ${c}`:""} ${d} Prompt: "${t}" `.trim(),m=f.object({text:f.string().describe("The final report"),phase:f.enum(["initial","continuation","done"]).describe("The phase of the report")});return{system:u,user:w,schema:m}},"FINAL_REPORT_PROMPT"),E={research:Q,reasoningSearchResults:L,decisionMaking:W,finalReport:J,contextGeneration:K},P=class P{constructor(){this._enabled=!1}static getInstance(){return P._instance||(P._instance=new P),P._instance}setEnabled(e){this._enabled=e}log(...e){this._enabled&&console.log(...e)}error(...e){console.error(...e)}warn(...e){this._enabled&&console.warn(...e)}info(...e){this._enabled&&console.info(...e)}};g(P,"Logger");let k=P;const h=k.getInstance(),z=g(async({reasoning:t,prompt:e,aiProvider:s,queries:a,sources:o,researchPlan:i})=>{const r=E.decisionMaking({reasoning:t,prompt:e,queries:a,sources:o,researchPlan:i}),c=await v({model:s.getModel("default"),output:"object",system:r.system,prompt:r.user,schema:r.schema,temperature:0});return{decision:c,usage:c.usage}},"decisionMaking"),H=g(async({prompt:t,researchPlan:e,sources:s,queries:a,aiProvider:o})=>{try{const i=E.reasoningSearchResults({prompt:t,researchPlan:e,sources:s,queries:a});h.log("REASONING WITH",i);const r=await G({model:o.getModel("reasoning"),prompt:i.user});if(r.reasoning)return{reasoning:r.reasoning,usage:r.usage};const c=r.text.match(/<think>([\s\S]*?)<\/think>|<thinking>([\s\S]*?)<\/thinking>/);return c?{reasoning:c[1]||c[2],usage:r.usage}:{reasoning:r.text,usage:r.usage}}catch(i){throw h.error("Fatal error in reasoningSearchResults:",i.message||i),h.error(" Error details:",i),new Error(`reasoning evaluation failed: ${i.message||"Unknown error"}`)}},"reasoningSearchResults"),X=g(async({report:t,sources:e})=>{const s=new Map;e.forEach(c=>{c.search_results&&Array.isArray(c.search_results.results)&&c.search_results.results.forEach(l=>{l.reference_number&&s.set(l.reference_number,l)})}),h.log(`Reference map size: ${s.size}`);const a=/\[(\d+(?:\s*,\s*\d+)*)\]/g,o=t.replace(a,(c,l)=>{const p=l.split(",").map(n=>parseInt(n.trim(),10));if(p.length===1){const n=p[0],u=s.get(n);return u?`[[${n}](${u.url})]`:(h.log(`No source found for citation [${n}]`),c)}else return`[${p.map(u=>{const d=s.get(u);return d?`[${u}](${d.url})`:(h.log(`No source found for citation part ${u}`),`${u}`)}).join(", ")}]`});let i=` ## References `;const r=Array.from(s.entries()).sort((c,l)=>c[0]-l[0]);return h.log(`Generating bibliography with ${r.length} entries`),r.forEach(([c,l])=>{const p=l.title||"No title";i+=`${c}. [${p}](${l.url}) `}),{reportWithSources:o,bibliography:i}},"processReportForSources"),Z=g(async({sources:t,prompt:e,maxOutputTokens:s,targetOutputTokens:a,aiProvider:o,reasoning:i,researchPlan:r,queries:c})=>{let l="",p=0,n="initial",u=0;do{h.log(`[Iteration ${p}] phase=${n}`);const m=E.finalReport({currentReport:l,prompt:e,sources:t,targetOutputTokens:a,researchPlan:r,reasoning:i,queries:c,phase:n});h.log(` [Iteration ${p}] phase=${n}`),h.log(`SYSTEM PROMPT: `+m.system),h.log(`USER PROMPT: `+m.user);const b=await v({model:o.getModel("output"),system:m.system,prompt:m.user,schema:m.schema,experimental_repairText:g(async({text:y,error:_})=>_&&_.message&&_.message.includes("Unterminated string")?y+'"}':y,"experimental_repairText")});if(n=b.object.phase,l+=b.object.text,h.log(`PHASE==============================: `+b.object.phase),h.log(`MODEL OUTPUT: `+b.object.text),n==="continuation"){const y=a?a*4:void 0;y&&l.length>=y&&(n="done"),s&&l.length>=s/3-2e3&&(n="done")}p++,u+=b.usage.totalTokens}while(n!=="done");const{reportWithSources:d,bibliography:w}=await X({report:l,sources:t});return h.log("Done processing report for sources"),{report:d,bibliography:w,tokenUsage:u}},"generateFinalReport"),B=g(async({aiProvider:t,prompt:e,reasoning:s,queries:a,sources:o,config:i})=>{try{const r=E.research({prompt:e,reasoning:s,queries:a,sources:o,config:i}),c=await v({model:t.getModel("default"),system:r.system,prompt:r.user,schema:r.schema,mode:"json"});return h.log("Research Prompts",E.research({prompt:e,reasoning:s,queries:a,sources:o,config:i})),{subQueries:c.object.subQueries,researchPlan:c.object.researchPlan,depth:c.object.depth,breadth:c.object.breadth,tokenUsage:c.usage}}catch(r){throw h.error(`Error generating research plan: ${r.message||r}`),new Error(`Research evaluation failed: ${r.message||"Unknown error"}`)}},"generateResearchPlan"),T=class T{static cleanContent(e){const s=e.content?this.contentPipeline(e.content):void 0,a=e.snippets?e.snippets.map(o=>this.contentPipeline(o)):void 0;return{...e,content:s,snippets:a}}static contentPipeline(e){return this.contentSteps.reduce((s,a)=>a(s),e)}};g(T,"ContentCleaner");let R=T;R.contentSteps=[t=>t.replace(/<[^>]*>/g," "),t=>t.replace(/\.[A-Za-z][\w-]*\s*\{[^}]*\}/g,""),t=>t.replace(/\.(MJX|mjx)[-\w]*\s*\{[^}]*\}/g,""),t=>t.replace(/@font-face\s*\{[^}]*\}/g,""),t=>t.replace(/\b(display|position|font-family|src|font-weight|font-style|margin|padding|border|width|height|min-width|max-width|text-align|line-height|box-sizing):[^;}]*(;|$)/g,""),t=>t.replace(/\w+(\.\w+)*\s*\{[^{}]*\}/g,""),t=>t.replace(/url\([^)]*\)/g,""),t=>t.replace(/\.mjx-chtml\s*\{[^}]*\}/g,""),t=>t.replace(/\.mjx-[-\w]+/g,""),t=>t.replace(/\s+/g," "),t=>t.replace(/[^\w\s.,!?;:()"'-]/g," "),t=>t.replace(/[""]/g,'"').replace(/['']/g,"'"),t=>t.replace(/(\d+)([a-zA-Z])/g,"$1 $2").replace(/([a-zA-Z])(\d+)/g,"$1 $2").replace(/\.{3,}/g,"...").replace(/\s*-\s*/g," - "),t=>t.replace(/https?:\/\/\S+/g,""),t=>t.replace(/([.!?])\s*([A-Z])/g,"$1 $2"),t=>t.replace(/l\.mjx-chtml/g,""),t=>t.replace(/X\.mjx-chtml/g,""),t=>t.replace(/format\(\'woff\'\)/g,""),t=>t.replace(/format\(\'opentype\'\)/g,""),t=>t.replace(/\{\s*\}/g,""),t=>t.replace(/\s{2,}/g," "),t=>t.trim(),t=>{const e=t.slice(-1);return!".,!?".includes(e)&&t.length>0?t+".":t}];const V=g(({sources:t})=>{const e=new Map;return t.map(s=>({...s,search_results:{results:s.search_results.results.filter(a=>e.has(a.url)?!1:(e.set(a.url,!0),!0))}}))},"deduplicateSearchResults"),ee=g(({sources:t})=>{const e=new Map;let s=1;return t.map(a=>({...a,search_results:{results:a.search_results.results.map(o=>(e.has(o.url)||e.set(o.url,s++),{...o,reference_number:e.get(o.url)||0}))}}))},"mapSearchResultsToNumbers"),x=class x{constructor({apiKey:e}){this.jigsawInstance=F({apiKey:e||process.env.JIGSAW_API_KEY})}static getInstance({apiKey:e}){return x.instance||(x.instance=new x({apiKey:e})),x.instance}};g(x,"JigsawProvider");let j=x;const S=class S{constructor(e){this.jigsaw=null,this.customSearchFunction=null,e.web_search&&(this.customSearchFunction=e.web_search),e.JIGSAW_API_KEY&&(this.jigsaw=j.getInstance({apiKey:e.JIGSAW_API_KEY}))}static getInstance(e){return S.instance||(S.instance=new S(e)),S.instance}async fireWebSearches(e){const s=e.map(async a=>{try{if(this.customSearchFunction)return await this.customSearchFunction(a);if(this.jigsaw){const o=await Y(async()=>await this.jigsaw.jigsawInstance.web.search({query:a,ai_overview:!1}),{delay:D(2e3),maxTry:3,onError:g((r,c)=>(console.warn(`API request failed (attempt ${c}/3):`,r.message),!0),"onError")});if(!o||!o.results)throw console.error("Invalid response structure:",o),new Error("Invalid search response structure");const i=o.results.slice(0,5).map(r=>({...R.cleanContent(r)})).filter(r=>r.content&&r.content.length>0||r.snippets&&r.snippets.length>0);return{...o,search_results:{results:i}}}throw new Error("No search method available")}catch(o){return console.error("Full error details:",o),{query:a,search_results:{results:[]}}}});return Promise.all(s)}async searchAndGenerateContext({queries:e,prompt:s,aiProvider:a,sources:o}){const r=(await this.fireWebSearches(e)).filter(u=>u.search_results&&u.search_results.results&&u.search_results.results.length>0);if(r.length===0)return console.warn("No search results found for any query"),[];const c=r.map(u=>u.query),l=await this.contextGenerator({queries:c,sources:r,prompt:s,aiProvider:a}),p=r.map((u,d)=>{const w=u.search_results.results.filter(m=>m.content&&m.content.trim()!==""||m.snippets&&m.snippets.length>0);return w.length===0?null:{query:u.query,search_results:{results:w},context:l[d]||"",geo_results:u.geo_results,image_urls:u.image_urls,links:u.links}}).filter(u=>u!==null);return V({sources:[...o,...p]})}async contextGenerator({queries:e,sources:s,prompt:a,aiProvider:o}){try{return await Promise.all(e.map(async r=>{const l=(s.find(n=>n.query===r)?.search_results.results||[]).map(n=>!n.content||n.content.trim()===""?n.snippets&&n.snippets.length>0?{...n,content:n.snippets.join(` `)}:null:n).filter(n=>n!==null);return(await v({model:o.getModel("default"),prompt:E.contextGeneration({prompt:a,queries:[r],research_sources:l}),schema:f.object({context:f.string().describe("The context overview")})})).object.context}))}catch(i){return console.error("Error generating context overview:",i),"Error generating context overview."}}};g(S,"WebSearchProvider");let M=S;const C=class C{constructor(e){this.prompt="",this.finalReport="",this.tokenUsage={research_tokens:0,reasoning_tokens:0,report_tokens:0,decision_tokens:0,total_tokens:0},this.researchPlan="",this.reasoning="",this.decision={isComplete:!1,reason:""},this.logger=k.getInstance(),this.queries=[],this.sources=[],this.config=this.validateConfig(e),this.config.logging&&this.config.logging.enabled!==void 0&&this.logger.setEnabled(this.config.logging.enabled),this.webSearchProvider=M.getInstance(this.config),this.aiProvider=N.getInstance({OPENAI_API_KEY:this.config.OPENAI_API_KEY,DEEPINFRA_API_KEY:this.config.DEEPINFRA_API_KEY,defaultModel:this.config.models?.default,reasoningModel:this.config.models?.reasoning,outputModel:this.config.models?.output})}validateConfig(e){if(e.max_output_tokens&&e.target_output_tokens&&e.max_output_tokens<e.target_output_tokens)throw new Error("maxOutputChars must be greater than targetOutputChars");return{config:{...I,...e||{}},max_output_tokens:e.max_output_tokens||I.max_output_tokens,target_output_tokens:e.target_output_tokens,max_depth:e.max_depth||I.max_depth,max_breadth:e.max_breadth||I.max_breadth,JIGSAW_API_KEY:e.JIGSAW_API_KEY||process.env.JIGSAW_API_KEY||(e.web_search?null:(()=>{throw new Error("JIGSAW_API_KEY must be provided in config")})()),OPENAI_API_KEY:e.OPENAI_API_KEY||process.env.OPENAI_API_KEY||(e.models?.default&&e.models?.output?null:(()=>{throw new Error("Either OPENAI_API_KEY or models.default and models.output must be provided in config")})()),DEEPINFRA_API_KEY:e.DEEPINFRA_API_KEY||process.env.DEEPINFRA_API_KEY||(e.models?.reasoning?null:(()=>{throw new Error("DeepInfra API key must be provided in config")})()),logging:{...I.logging,...e.logging||{}},models:{...e.models||{}},web_search:e.web_search}}async generate(e){h.log(`Running research with prompt: ${e}`),this.prompt=e;let s=0;do{s++,h.log(`[Step 1] Generating research plan... at ${s}`);const{subQueries:r,researchPlan:c,depth:l,breadth:p,tokenUsage:n}=await B({aiProvider:this.aiProvider,prompt:this.prompt,reasoning:this.reasoning,queries:this.queries,sources:this.sources,config:this.config});this.queries=[...this.queries||[],...r],this.researchPlan=c,this.config.max_depth=l,this.config.max_breadth=p,this.tokenUsage.research_tokens=n.totalTokens,h.log(`Research plan: ${this.researchPlan}`),h.log(`Research queries: ${this.queries.join(` `)}`),h.log(`Research depth and breadth: ${this.config.max_depth} ${this.config.max_breadth}`),h.log(`[Step 2] Running initial web searches with ${this.queries.length} queries...`);const u=await this.webSearchProvider.searchAndGenerateContext({queries:this.queries,prompt:this.prompt,aiProvider:this.aiProvider,sources:this.sources});this.sources=u,h.log("[Step 3] Reasoning about the search results...");const d=await H({prompt:this.prompt,researchPlan:this.researchPlan,sources:this.sources,queries:this.queries,aiProvider:this.aiProvider});this.reasoning=d.reasoning,this.tokenUsage.reasoning_tokens=d.usage.totalTokens,h.log(`Reasoning: ${d}`),h.log("[Step 4] Decision making...");const{decision:w,usage:m}=await z({reasoning:this.reasoning,prompt:this.prompt,queries:this.queries,sources:this.sources,researchPlan:this.researchPlan,aiProvider:this.aiProvider});this.decision=w.object,this.tokenUsage.decision_tokens=m.totalTokens,h.log(`Decision making: ${this.decision.isComplete} ${this.decision.reason}`)}while(!this.decision.isComplete&&s<this.config.max_depth);this.sources=ee({sources:this.sources}),h.log("[Step 5] Generating report...");const{report:a,bibliography:o,tokenUsage:i}=await Z({sources:this.sources,prompt:this.prompt,targetOutputTokens:this.config.target_output_tokens,maxOutputTokens:this.config.max_output_tokens,aiProvider:this.aiProvider,reasoning:this.reasoning,researchPlan:this.researchPlan,queries:this.queries});return this.tokenUsage.report_tokens=i,this.tokenUsage.total_tokens=this.tokenUsage.research_tokens+this.tokenUsage.reasoning_tokens+this.tokenUsage.decision_tokens+this.tokenUsage.report_tokens,{status:"success",data:{text:a,bibliography:o,metadata:{prompt:this.prompt,reasoning:this.reasoning,research_plan:this.researchPlan,queries:this.queries,sources:this.sources}},_usage:this.tokenUsage}}};g(C,"DeepResearch");let A=C;const te=g(t=>new A(t),"createDeepResearch");export{A as DeepResearch,te as createDeepResearch};