UNPKG

deepsearcher

Version:

DeepResearch Agent with LangGraph, use any llm and web search to build your own deep research agent

github.com/yokingma/deepresearch

yokingma/deepresearch

90 lines (71 loc) • 12.8 kB

JavaScript

import{Send as I,START as Y,END as B,StateGraph as W}from"@langchain/langgraph";import{RunnableSequence as S}from"@langchain/core/runnables";import{ChatOpenAI as q}from"@langchain/openai";import{z as p}from"zod";var M=p.object({query:p.array(p.string()).describe("A list of search queries to be used for research."),rationale:p.string().describe("A brief explanation of why these queries are relevant to the research topic.")}),v=p.object({isSufficient:p.boolean().describe("Whether the provided summaries are sufficient to answer the user's question."),knowledgeGap:p.string().describe("A description of what information is missing or needs clarification."),followUpQueries:p.array(p.string()).describe("A list of follow-up queries to address the knowledge gap.")});import{addMessages as P,Annotation as o}from"@langchain/langgraph";var Q=o.Root({messages:o({reducer:P,default:()=>[]}),generatedQueries:o,searchedQueries:o({reducer:(a,e)=>a.concat(e),default:()=>[]}),researchResult:o({reducer:(a,e)=>a.concat(e),default:()=>[]}),sourcesGathered:o({reducer:(a,e)=>a.concat(e),default:()=>[]}),researchLoopCount:o,reflectionState:o}),ee=o.Root({sourcesGathered:o,messages:o}),te=o.Root({isSufficient:o,knowledgeGap:o,followUpQueries:o,numberOfRanQueries:o}),O=o.Root({query:o,id:o});import{z as g}from"zod";var k=g.object({queryGeneratorModel:g.string().describe("The name of the language model to use for the agent's query generation."),reflectionModel:g.string().describe("The name of the language model to use for the agent's reflection."),answerModel:g.string().describe("The name of the language model to use for the agent's answer."),numberOfInitialQueries:g.number().describe("The number of initial search queries to generate."),maxResearchLoops:g.number().describe("The maximum number of research loops to perform.")}),_={numberOfInitialQueries:3,maxResearchLoops:3};function y(a){let e=a?.configurable??{};if(!e.answerModel||!e.queryGeneratorModel||!e.reflectionModel)throw new Error("Missing required model configuration: answerModel, queryGeneratorModel, and reflectionModel must be provided");let t={queryGeneratorModel:e.queryGeneratorModel,reflectionModel:e.reflectionModel,answerModel:e.answerModel,numberOfInitialQueries:e.numberOfInitialQueries??_.numberOfInitialQueries,maxResearchLoops:e.maxResearchLoops??_.maxResearchLoops};return Object.entries(t).forEach(([n,r])=>{if(r!==void 0&&(n==="numberOfInitialQueries"||n==="maxResearchLoops")){let c=typeof r=="string"?parseInt(r,10):r;t[n]=c}}),t}var E=`Your goal is to generate sophisticated and diverse search queries. These queries are intended for an advanced automated research tool capable of analyzing complex results, following links, and synthesizing information. Instructions: - Always prefer a single search query, only add another query if the original question requests multiple aspects or elements and one query is not enough. - Each query should focus on one specific aspect of the original question. - Don't produce more than {number_queries} queries. - Queries should be diverse, if the topic is broad, generate more than 1 query. - Don't generate multiple similar queries, 1 is enough. - Query should ensure that the most current information is gathered. The current date is {current_date}. - Use specific keywords and technical terms rather than long descriptive sentences. - Focus on core concepts, product names, versions, or specific features for better search results. - **Language Constraint**: Always respond in the same language as the user's input. If the user asks in Chinese, respond in Chinese; if in English, respond in English, etc. {format_instructions} Context: {research_topic}`,T=`Conduct targeted searches to gather the most recent, credible information on "{research_topic}" and synthesize it into a verifiable text artifact. Instructions: - Query should ensure that the most current information is gathered. The current date is {current_date}. - Conduct multiple, diverse searches to gather comprehensive information. - Consolidate key findings while meticulously tracking the source(s) for each specific piece of information. - The output should be a well-written summary or report based on your search findings. - Only include the information found in the search results, don't make up any information. - For each key finding, use numbered citations in double square brackets [[1]], [[2]], etc., referring to the search result numbers below. - **Language Constraint**: Always respond in the same language as the user's input. If the user asks in Chinese, respond in Chinese; if in English, respond in English, etc. Citation Format: - Use [[1]], [[2]], [[3]] etc. to cite specific search results - Each important claim or data point must include a citation - Multiple sources can be cited as [[1]][[2]] Example output format: "According to recent studies, XYZ technology has shown significant improvements [[1]]. Market adoption rates have increased by 25% in 2024 [[2]][[3]]." Search Results: {search_results} Research Topic: {research_topic} `,L=`You are an expert research assistant analyzing summaries about "{research_topic}". Instructions: - Identify knowledge gaps or areas that need deeper exploration and generate a follow-up query. (1 or multiple). - If provided summaries are sufficient to answer the user's question, don't generate a follow-up query. - If there is a knowledge gap, generate a follow-up query that would help expand your understanding. - Don't produce more than {number_queries} follow-up queries. - Focus on technical details, implementation specifics, or emerging trends that weren't fully covered. - **Language Constraint**: Always respond in the same language as the user's input. If the user asks in Chinese, respond in Chinese; if in English, respond in English, etc. Query Optimization Requirements: - Ensure the follow-up query is self-contained and includes necessary context for search. - Use specific keywords and technical terms rather than long descriptive sentences. - Focus on core concepts, product names, versions, or specific features. - Avoid overly complex or verbose phrasing that may reduce search effectiveness. {format_instructions} Reflect carefully on the Summaries to identify knowledge gaps and produce a follow-up query. Then, produce your output following this JSON format: Summaries: {summaries} `,N=`Generate a high-quality answer to the user's question based on the provided summaries. Instructions: - The current date is {current_date}. - You are the final step of a multi-step research process, don't mention that you are the final step. - You have access to all the information gathered from the previous steps. - You have access to the user's question. - Generate a high-quality answer to the user's question based on the provided summaries and the user's question. - you MUST include all the citations from the summaries in the answer correctly: [title](id/url). - **Language Constraint**: Always respond in the same language as the user's input. If the user asks in Chinese, respond in Chinese; if in English, respond in English, etc. User Context: - {research_topic} Summaries: {summaries}`;import{HumanMessage as j,AIMessage as K}from"@langchain/core/messages";function R(a){if(a.length===1){let e=a[a.length-1];if(typeof e=="string")return e;if("content"in e){if(typeof e.content=="string")return e.content;if(Array.isArray(e.content))return e.content.map(t=>t.type==="text"?t.text:"").join(` `)}return JSON.stringify(e)}else{let e="";for(let t of a)t instanceof j?e+=`User: ${t.content} `:t instanceof K&&(e+=`Assistant: ${t.content} `);return e}}function G(a,e){let t=a.content,n=i=>i.replace(/\[\[(\d+)]]/g,"[citation]($1)").replace(/\[(\d+)]/g,"[citation]($1)"),r=i=>{let f=/\[citation\]\((\d+)\)/g,s=[],l;for(;(l=f.exec(i))!==null;)s.push(l[1]);return s},c=n(t),u=/\[citation\]\((\d+)\)/g,h=r(c);return{content:c.replace(u,i=>{let f=parseInt(i.match(/\((\d+)\)/)?.[1]??"0",10),s=e[f-1];return s?`[${s.title.length>32?s.title.slice(0,32)+"...":s.title}](${s.id})`:i}),segmentIndexes:h}}var C=()=>new Date().toISOString();import{ChatPromptTemplate as x}from"@langchain/core/prompts";import{AIMessage as F}from"@langchain/core/messages";import{StructuredOutputParser as z}from"@langchain/core/output_parsers";var Z=(r=>(r.GenerateQuery="generate_query",r.Research="research",r.Reflection="reflection",r.FinalizeAnswer="finalize_answer",r))(Z||{}),J=(n=>(n.ChatModelStart="on_chat_model_start",n.ChatModelStream="on_chat_model_stream",n.ChatModelEnd="on_chat_model_end",n))(J||{}),U=class{constructor({searcher:e,options:t}){this.searcher=e,this.options=t}async compile(){let e=new W(Q,k);return e.addNode("generate_query",this.generateQuery.bind(this)),e.addNode("research",this.research.bind(this),{input:O}),e.addNode("reflection",this.reflection.bind(this)),e.addNode("finalize_answer",this.finalizeAnswer.bind(this)),e.addEdge(Y,"generate_query"),e.addConditionalEdges("generate_query",this.continueToSearch.bind(this),["research"]),e.addEdge("research","reflection"),e.addConditionalEdges("reflection",this.evaluateResearch.bind(this),["research","finalize_answer"]),e.addEdge("finalize_answer",B),e.compile({name:"DeepResearch"})}async generateQuery(e,t){let n=y(t),{numberOfInitialQueries:r}=n,c=new q({model:n.queryGeneratorModel,temperature:1,configuration:this.options,maxRetries:2,apiKey:this.options?.apiKey}).withConfig({tags:["generate_query"]}),u=R(e.messages),h=C(),d=x.fromTemplate(E),i=z.fromZodSchema(M),f=S.from([d,c,i]);try{let l=(await f.invoke({number_queries:r,current_date:h,research_topic:u,format_instructions:i.getFormatInstructions()})).query||[];return l.length===0?(console.warn("LLM returned empty query list, using original topic as fallback"),{generatedQueries:[u]}):{generatedQueries:l}}catch(s){return console.error("Failed to generate search queries:",s),console.warn("Using original topic as fallback due to LLM failure"),{generatedQueries:[u]}}}async continueToSearch(e){let t=e.generatedQueries||[];if(t.length===0){let n=R(e.messages);return[new I("research",{query:n,id:"0"})]}return t.map((n,r)=>new I("research",{query:n,id:r.toString()}))}async research(e,t){let n=y(t),{queryGeneratorModel:r}=n,c=await this.searcher(e),u=c.map(({title:m,content:b,date:A,score:$},D)=>`[[${D+1}]]. Title: ${m} Content: ${b} Date: ${A??"N/A"} Confidence Score: ${$??"N/A"}`).join(` `),h=new q({model:r,temperature:.2,maxRetries:2,configuration:this.options,apiKey:this.options?.apiKey}).withConfig({tags:["research"]}),d=x.fromTemplate(T),f=await S.from([d,h]).invoke({current_date:C(),research_topic:e.query,search_results:u}),{content:s,segmentIndexes:l}=G(f,c);return{sourcesGathered:c.filter((m,b)=>l.includes(`${b+1}`)),searchedQueries:[e.query],researchResult:[s]}}async reflection(e,t){let n=y(t),{reflectionModel:r,numberOfInitialQueries:c}=n,u=(e.researchLoopCount??0)+1,h=r,d=R(e.messages),i=e.researchResult.join(` `),f=new q({model:h,temperature:0,maxRetries:2,configuration:this.options,apiKey:this.options?.apiKey}).withConfig({tags:["reflection"]}),s=x.fromTemplate(L),l=z.fromZodSchema(v),w=S.from([s,f,l]);try{let m=await w.invoke({research_topic:d,summaries:i,number_queries:c,format_instructions:l.getFormatInstructions()});return{researchLoopCount:u,reflectionState:{isSufficient:m.isSufficient,knowledgeGap:m.knowledgeGap,followUpQueries:m.followUpQueries||[],numberOfRanQueries:e.searchedQueries.length}}}catch(m){return console.error("Failed to generate reflection:",m),{researchLoopCount:u,reflectionState:{isSufficient:!0,knowledgeGap:"Unable to analyze knowledge gaps",followUpQueries:[],numberOfRanQueries:e.searchedQueries.length}}}}async evaluateResearch(e,t){let{reflectionState:n,researchLoopCount:r}=e,u=y(t).maxResearchLoops,{followUpQueries:h=[],isSufficient:d,numberOfRanQueries:i}=n;return r>=u||d?"finalize_answer":!h||h.length===0?(console.warn("No follow-up queries generated, proceeding to finalize answer"),"finalize_answer"):h.map((f,s)=>new I("research",{query:f,id:(i+s).toString()}))}async finalizeAnswer(e,t){let n=y(t),{reflectionModel:r}=n,c=r,u=C(),h=R(e.messages),d=e.researchResult.join(` `);if(!d.trim())return{messages:[new F("Sorry, no useful information was retrieved. Please try again later or ask a different question.")],sourcesGathered:[]};let i=new q({model:c,temperature:0,maxRetries:2,configuration:this.options,apiKey:this.options?.apiKey}).withConfig({tags:["finalize_answer"]}),f=x.fromTemplate(N),l=await S.from([f,i]).invoke({current_date:u,research_topic:h,summaries:d}),w=[];for(let m of e.sourcesGathered){let b=`(${m.id})`;l.content.includes(b)&&w.push(m)}return{messages:[new F(l.content)],sourcesGathered:w}}};export{U as DeepResearch,J as EventStreamEnum,Z as NodeEnum}; //# sourceMappingURL=index.js.map