UNPKG

deepsearcher

Version:

DeepResearch Agent with LangGraph, use any llm and web search to build your own deep research agent

87 lines (70 loc) 14.5 kB
import{Send as k,START as H,END as J,StateGraph as X}from"@langchain/langgraph";import{ChatOpenAI as Z}from"@langchain/openai";import{ChatAnthropic as ee}from"@langchain/anthropic";import{ChatGoogleGenerativeAI as te}from"@langchain/google-genai";import{ChatVertexAI as ne}from"@langchain/google-vertexai";import{AIMessage as P,createAgent as Q,HumanMessage as O,toolStrategy as j}from"langchain";import{z as m}from"zod";var E=m.object({query:m.array(m.string()).describe("A list of search queries to be used for research."),rationale:m.string().describe("A brief explanation of why these queries are relevant to the research topic.")}),_=m.object({isSufficient:m.boolean().describe("Whether the provided summaries are sufficient to answer the user's question."),knowledgeGap:m.string().describe("A description of what information is missing or needs clarification."),followUpQueries:m.array(m.string()).describe("A list of follow-up queries to address the knowledge gap.")});import{addMessages as K,Annotation as s}from"@langchain/langgraph";var L=s.Root({messages:s({reducer:K,default:()=>[]}),rationale:s,generatedQueries:s,searchedQueries:s({reducer:(o,e)=>o.concat(e),default:()=>[]}),researchResult:s({reducer:(o,e)=>o.concat(e),default:()=>[]}),sourcesGathered:s({reducer:(o,e)=>o.concat(e),default:()=>[]}),researchLoopCount:s({reducer:(o,e)=>Math.max(o,e??0),default:()=>0}),reflectionState:s}),ce=s.Root({sourcesGathered:s,messages:s}),ue=s.Root({isSufficient:s,knowledgeGap:s,followUpQueries:s,numberOfRanQueries:s}),N=s.Root({query:s,id:s,loopIndex:s});import{z as C}from"zod";var T=C.object({queryGeneratorModel:C.string().describe("The name of the language model to use for the agent's query generation."),reflectionModel:C.string().describe("The name of the language model to use for the agent's reflection."),answerModel:C.string().describe("The name of the language model to use for the agent's answer."),numberOfInitialQueries:C.number().describe("The number of initial search queries to generate."),maxResearchLoops:C.number().describe("The maximum number of research loops to perform.")}),G={numberOfInitialQueries:3,maxResearchLoops:3};function R(o){let e=o?.configurable??{};if(!e.answerModel||!e.queryGeneratorModel||!e.reflectionModel)throw new Error("Missing required model configuration: answerModel, queryGeneratorModel, and reflectionModel must be provided");let t={queryGeneratorModel:e.queryGeneratorModel,reflectionModel:e.reflectionModel,answerModel:e.answerModel,numberOfInitialQueries:e.numberOfInitialQueries??G.numberOfInitialQueries,maxResearchLoops:e.maxResearchLoops??G.maxResearchLoops};return Object.entries(t).forEach(([n,r])=>{if(r!==void 0&&(n==="numberOfInitialQueries"||n==="maxResearchLoops")){let c=typeof r=="string"?parseInt(r,10):r;t[n]=c}}),t}var z=`Your goal is to generate sophisticated and diverse search queries. These queries are intended for an advanced automated research tool capable of analyzing complex results, following links, and synthesizing information. Instructions: - Always prefer a single search query, only add another query if the original question requests multiple aspects or elements and one query is not enough. - Each query should focus on one specific aspect of the original question. - Don't produce more than {number_queries} queries. - Queries should be diverse, if the topic is broad, generate more than 1 query. - Don't generate multiple similar queries, 1 is enough. - Query should ensure that the most current information is gathered. The current date is {current_date}. - Use specific keywords and technical terms rather than long descriptive sentences. - Focus on core concepts, product names, versions, or specific features for better search results. - **Language Constraint**: Always respond in the same language as the user's input. If the user asks in Chinese, respond in Chinese; if in English, respond in English, etc. Context: {research_topic}`,F=`Conduct targeted searches to gather the most recent, credible information on "{research_topic}" and synthesize it into a verifiable text artifact. Instructions: - Query should ensure that the most current information is gathered. The current date is {current_date}. - Conduct multiple, diverse searches to gather comprehensive information. - Consolidate key findings while meticulously tracking the source(s) for each specific piece of information. - The output should be a well-written summary or report based on your search findings. - Only include the information found in the search results, don't make up any information. - For each key finding, use numbered citations in the format [[citation:1]], [[citation:2]], etc., referring to the search result numbers below. - **Language Constraint**: Always respond in the same language as the user's input. If the user asks in Chinese, respond in Chinese; if in English, respond in English, etc. Citation Format: - Use [[citation:1]], [[citation:2]], [[citation:3]] etc. to cite specific search results - Each important claim or data point must include a citation - Multiple sources can be cited as [[citation:1]][[citation:2]] Example output format: "According to recent studies, XYZ technology has shown significant improvements [[citation:1]]. Market adoption rates have increased by 25% in 2024 [[citation:2]][[citation:3]]." Search Results: {search_results} Research Topic: {research_topic} `,U=`You are an expert research assistant analyzing summaries about "{research_topic}". Instructions: - Identify knowledge gaps or areas that need deeper exploration and generate a follow-up query. (1 or multiple). - If provided summaries are sufficient to answer the user's question, don't generate a follow-up query. - If there is a knowledge gap, generate a follow-up query that would help expand your understanding. - Don't produce more than {number_queries} follow-up queries. - Focus on technical details, implementation specifics, or emerging trends that weren't fully covered. - **Language Constraint**: Always respond in the same language as the user's input. If the user asks in Chinese, respond in Chinese; if in English, respond in English, etc. Query Optimization Requirements: - Ensure the follow-up query is self-contained and includes necessary context for search. - Use specific keywords and technical terms rather than long descriptive sentences. - Focus on core concepts, product names, versions, or specific features. - Avoid overly complex or verbose phrasing that may reduce search effectiveness. - **Language Constraint**: Always respond in the same language as the user's input. If the user asks in Chinese, respond in Chinese; if in English, respond in English, etc. Reflect carefully on the Summaries to identify knowledge gaps and produce a follow-up query. Summaries: {summaries} `,$=`Generate a high-quality answer to the user's question based on the provided summaries. Instructions: - The current date is {current_date}. - You are the final step of a multi-step research process, don't mention that you are the final step. - You have access to all the information gathered from the previous steps. - You have access to the user's question. - Generate a high-quality answer to the user's question based on the provided summaries and the user's question. - you MUST include all the citations from the summaries in the answer correctly in the format [[citation:number]]. - **Language Constraint**: Always respond in the same language as the user's input. If the user asks in Chinese, respond in Chinese; if in English, respond in English, etc. User Context: - {research_topic} Summaries: {summaries}`;import{HumanMessage as V,AIMessage as W}from"@langchain/core/messages";function q(o){if(o.length===1){let e=o[o.length-1];if(typeof e=="string")return e;if("content"in e){if(typeof e.content=="string")return e.content;if(Array.isArray(e.content))return e.content.map(t=>t.type==="text"?t.text:"").join(` `)}return JSON.stringify(e)}else{let e="";for(let t of o)t instanceof V?e+=`User: ${t.content} `:t instanceof W&&(e+=`Assistant: ${t.content} `);return e}}function D(o,e,t=!0){let n=typeof o.content=="string"?o.content:JSON.stringify(o.content),r=l=>l.replace(/\[\[citation:(\d+)]]/g,"[citation]($1)").replace(/\[\[(\d+)]]/g,"[citation]($1)").replace(/(?<!\[)\[(\d+)](?!])/g,"[citation]($1)"),c=l=>{let d=/\[citation\]\((\d+)\)/g,i=[],h;for(;(h=d.exec(l))!==null;)i.push(h[1]);return i},u=r(n),a=/\[citation\]\((\d+)\)/g,p=c(u);return{content:u.replace(a,l=>{let d=parseInt(l.match(/\((\d+)\)/)?.[1]??"0",10),i=e[d-1];return i?t?i.url?`<sup>[[${i.id}](${i.url})]</sup>`:`<sup>[[${i.id}]]</sup>`:`[[citation:${i.id}]]`:l}),segmentIndexes:p}}var v=()=>new Date().toISOString();function I(o,e){if(typeof o!="string")return o;for(let t in e){let n=e[t];["string","number"].includes(typeof n)&&(o=o.replace(new RegExp(`{(${t})}`,"g"),String(n)))}return o||""}var re=(r=>(r.GenerateQuery="generate_query",r.Research="research",r.Reflection="reflection",r.FinalizeAnswer="finalize_answer",r))(re||{}),oe=(n=>(n.ChatModelStart="on_chat_model_start",n.ChatModelStream="on_chat_model_stream",n.ChatModelEnd="on_chat_model_end",n))(oe||{}),Y=class{constructor({searcher:e,options:t}){this.searcher=e,this.options=t}async compile(){let e=new X(L,T);return e.addNode("generate_query",this.generateQuery.bind(this)),e.addNode("research",this.research.bind(this),{input:N}),e.addNode("reflection",this.reflection.bind(this)),e.addNode("finalize_answer",this.finalizeAnswer.bind(this)),e.addEdge(H,"generate_query"),e.addConditionalEdges("generate_query",this.continueToSearch.bind(this),["research"]),e.addEdge("research","reflection"),e.addConditionalEdges("reflection",this.evaluateResearch.bind(this),["research","finalize_answer"]),e.addEdge("finalize_answer",J),e.compile({name:"DeepResearch"})}async generateQuery(e,t){let n=R(t),{numberOfInitialQueries:r,queryGeneratorModel:c}=n,{systemPrompt:u="You are a helpful research assistant.",temperature:a=.1}=this.options||{},p=q(e.messages),f=v(),l=this.createClient(c,a),d=Q({model:l,tools:[],systemPrompt:u,responseFormat:j(E,{toolMessageContent:`I will generate ${r} search queries based on your input.`})}),i=I(z,{number_queries:r,current_date:f,research_topic:p});try{let h=await d.invoke({messages:[new O(i)]},{tags:["generate_query"]}),w=h.structuredResponse?.query||[];return w.length===0?(console.warn("LLM returned empty query list, using original topic as fallback"),{generatedQueries:[p]}):{generatedQueries:w,rationale:h.structuredResponse?.rationale}}catch(h){return console.error("Failed to generate search queries:",h),console.warn("Using original topic as fallback due to LLM failure"),{generatedQueries:[p]}}}async continueToSearch(e){let t=e.generatedQueries||[];if(t.length===0){let n=q(e.messages);return[new k("research",{query:n,id:"0"})]}return t.map((n,r)=>new k("research",{query:n,id:r.toString(),loopIndex:1}))}async research(e,t){let n=R(t),{queryGeneratorModel:r}=n,{temperature:c=.1,enableCitationUrl:u=!0}=this.options||{},a=await this.searcher(e),p=a.map(({title:g,content:b,date:y,score:S},x)=>`[[${x+1}]]. Title: ${g} Content: ${b} Date: ${y??"N/A"} Confidence Score: ${S??"N/A"}`).join(` `),f=this.createClient(r,c),l=Q({model:f,tools:[]}),d=I(F,{current_date:v(),research_topic:e.query,search_results:p}),i=await l.invoke({messages:[new O(d)]},{tags:["research"]}),h=i.messages[i.messages.length-1],{content:w,segmentIndexes:A}=D(h,a,u);return{sourcesGathered:a.filter((g,b)=>A.includes(`${b+1}`)),searchedQueries:[e.query],researchResult:[w],researchLoopCount:e.loopIndex}}async reflection(e,t){let n=R(t),{reflectionModel:r,numberOfInitialQueries:c}=n,{temperature:u=.1}=this.options||{},a=q(e.messages),p=e.researchResult.join(` `),f=this.createClient(r,u),l=Q({model:f,tools:[],responseFormat:j(_,{toolMessageContent:"I will analyze the research summaries and determine if more information is needed."})}),d=I(U,{research_topic:a,summaries:p,number_queries:c});try{let h=(await l.invoke({messages:[new O(d)]},{tags:["reflection"]})).structuredResponse;return{reflectionState:{isSufficient:h?.isSufficient??!0,knowledgeGap:h?.knowledgeGap??"",followUpQueries:h?.followUpQueries||[],numberOfRanQueries:e.searchedQueries.length}}}catch(i){return console.error("Failed to generate reflection:",i),{reflectionState:{isSufficient:!0,knowledgeGap:"Unable to analyze knowledge gaps",followUpQueries:[],numberOfRanQueries:e.searchedQueries.length}}}}async evaluateResearch(e,t){let{reflectionState:n,researchLoopCount:r}=e,u=R(t).maxResearchLoops,{followUpQueries:a=[],isSufficient:p,numberOfRanQueries:f}=n;return r>=u||p?"finalize_answer":!a||a.length===0?(console.warn("No follow-up queries generated, proceeding to finalize answer"),"finalize_answer"):a.map((l,d)=>new k("research",{query:l,id:(f+d).toString(),loopIndex:r+1}))}async finalizeAnswer(e,t){let n=R(t),{reflectionModel:r}=n,{systemPrompt:c="You are a helpful research assistant.",temperature:u=.1,enableCitationUrl:a=!0}=this.options||{},p=r,f=v(),l=q(e.messages),d=e.researchResult.join(` `);if(!d.trim())return{messages:[new P("Sorry, no useful information was retrieved. Please try again later or ask a different question.")],sourcesGathered:[]};let i=this.createClient(p,u),h=Q({model:i,tools:[],systemPrompt:c}),w=I($,{research_topic:l,summaries:d,current_date:f}),A=await h.invoke({messages:[new O(w)]},{tags:["finalize_answer"]}),M=A.messages[A.messages.length-1],g=typeof M.content=="string"?M.content:JSON.stringify(M.content),b=[];for(let y of e.sourcesGathered){let S=!1;if(a){let x=`<sup>[[${y.id}](${y.url})]</sup>`,B=`<sup>[[${y.id}]]</sup>`;S=g.includes(x)||g.includes(B)}else{let x=`[[citation:${y.id}]]`;S=g.includes(x)}S&&b.push(y)}return{messages:[new P(g)],sourcesGathered:b}}createClient(e,t=.1){let{apiKey:n,type:r="openai",baseURL:c,...u}=this.options||{};switch(r){case"anthropic":{let a={model:e,anthropicApiKey:n,temperature:t,...u};return c&&(a.anthropicApiUrl=c),new ee(a)}case"gemini":return new te({model:e,apiKey:n,baseUrl:c||void 0,temperature:t,...u});case"vertexai":return new ne({model:e,apiKey:n,temperature:t,...u});case"openai":default:return new Z({model:e,openAIApiKey:n,temperature:t,configuration:{apiKey:n,baseURL:c,...u}})}}};import{HumanMessage as Te,AIMessage as ze,SystemMessage as Fe,BaseMessage as Ue}from"langchain";export{ze as AIMessage,Ue as BaseMessage,Y as DeepResearch,oe as EventStreamEnum,Te as HumanMessage,re as NodeEnum,Fe as SystemMessage}; //# sourceMappingURL=index.js.map