UNPKG

@langchain/core

Version:
1 lines 7.65 kB
{"version":3,"file":"xml.cjs","names":["BaseCumulativeTransformOutputParser","fields?: XMLOutputParserFields","prev: unknown | undefined","next: unknown","compare","generations: ChatGeneration[] | Generation[]","text: string","input: ParsedResult","result: XMLResult","s: string","sax","parsedResult: ParsedResult","elementStack: ParsedResult[]","node: any","text: any","attr: any"],"sources":["../../src/output_parsers/xml.ts"],"sourcesContent":["import {\n BaseCumulativeTransformOutputParser,\n BaseCumulativeTransformOutputParserInput,\n} from \"./transform.js\";\nimport { Operation, compare } from \"../utils/json_patch.js\";\nimport { sax } from \"../utils/sax-js/sax.js\";\nimport { ChatGeneration, Generation } from \"../outputs.js\";\n\nexport const XML_FORMAT_INSTRUCTIONS = `The output should be formatted as a XML file.\n1. Output should conform to the tags below. \n2. If tags are not given, make them on your own.\n3. Remember to always open and close all the tags.\n\nAs an example, for the tags [\"foo\", \"bar\", \"baz\"]:\n1. String \"<foo>\\n <bar>\\n <baz></baz>\\n </bar>\\n</foo>\" is a well-formatted instance of the schema. \n2. String \"<foo>\\n <bar>\\n </foo>\" is a badly-formatted instance.\n3. String \"<foo>\\n <tag>\\n </tag>\\n</foo>\" is a badly-formatted instance.\n\nHere are the output tags:\n\\`\\`\\`\n{tags}\n\\`\\`\\``;\n\nexport interface XMLOutputParserFields\n extends BaseCumulativeTransformOutputParserInput {\n /**\n * Optional list of tags that the output should conform to.\n * Only used in formatting of the prompt.\n */\n tags?: string[];\n}\n\nexport type Content = string | undefined | Array<{ [key: string]: Content }>;\n\nexport type XMLResult = {\n [key: string]: Content;\n};\n\nexport class XMLOutputParser extends BaseCumulativeTransformOutputParser<XMLResult> {\n tags?: string[];\n\n constructor(fields?: XMLOutputParserFields) {\n super(fields);\n\n this.tags = fields?.tags;\n }\n\n static lc_name() {\n return \"XMLOutputParser\";\n }\n\n lc_namespace = [\"langchain_core\", \"output_parsers\"];\n\n lc_serializable = true;\n\n protected _diff(\n prev: unknown | undefined,\n next: unknown\n ): Operation[] | undefined {\n if (!next) {\n return undefined;\n }\n if (!prev) {\n return [{ op: \"replace\", path: \"\", value: next }];\n }\n return compare(prev, next);\n }\n\n async parsePartialResult(\n generations: ChatGeneration[] | Generation[]\n ): Promise<XMLResult | undefined> {\n return parseXMLMarkdown(generations[0].text);\n }\n\n async parse(text: string): Promise<XMLResult> {\n return parseXMLMarkdown(text);\n }\n\n getFormatInstructions(): string {\n const withTags = !!(this.tags && this.tags.length > 0);\n return withTags\n ? XML_FORMAT_INSTRUCTIONS.replace(\"{tags}\", this.tags?.join(\", \") ?? \"\")\n : XML_FORMAT_INSTRUCTIONS;\n }\n}\n\nconst strip = (text: string) =>\n text\n .split(\"\\n\")\n .map((line) => line.replace(/^\\s+/, \"\"))\n .join(\"\\n\")\n .trim();\n\ntype ParsedResult = {\n name: string;\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n attributes: Record<string, any>;\n children: Array<ParsedResult>;\n text?: string;\n isSelfClosing: boolean;\n};\n\nconst parseParsedResult = (input: ParsedResult): XMLResult => {\n if (Object.keys(input).length === 0) {\n return {};\n }\n const result: XMLResult = {};\n if (input.children.length > 0) {\n result[input.name] = input.children.map(parseParsedResult);\n return result;\n } else {\n result[input.name] = input.text ?? undefined;\n return result;\n }\n};\n\nexport function parseXMLMarkdown(s: string): XMLResult {\n const cleanedString = strip(s);\n const parser = sax.parser(true);\n let parsedResult: ParsedResult = {} as ParsedResult;\n const elementStack: ParsedResult[] = [];\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n parser.onopentag = (node: any) => {\n const element = {\n name: node.name,\n attributes: node.attributes,\n children: [],\n text: \"\",\n isSelfClosing: node.isSelfClosing,\n };\n\n if (elementStack.length > 0) {\n const parentElement = elementStack[elementStack.length - 1];\n parentElement.children.push(element);\n } else {\n parsedResult = element as ParsedResult;\n }\n\n if (!node.isSelfClosing) {\n elementStack.push(element);\n }\n };\n\n parser.onclosetag = () => {\n if (elementStack.length > 0) {\n const lastElement = elementStack.pop();\n if (elementStack.length === 0 && lastElement) {\n parsedResult = lastElement as ParsedResult;\n }\n }\n };\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n parser.ontext = (text: any) => {\n if (elementStack.length > 0) {\n const currentElement = elementStack[elementStack.length - 1];\n currentElement.text += text;\n }\n };\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n parser.onattribute = (attr: any) => {\n if (elementStack.length > 0) {\n const currentElement = elementStack[elementStack.length - 1];\n currentElement.attributes[attr.name] = attr.value;\n }\n };\n\n // Try to find XML string within triple backticks.\n const match = /```(xml)?(.*)```/s.exec(cleanedString);\n const xmlString = match ? match[2] : cleanedString;\n parser.write(xmlString).close();\n\n // Remove the XML declaration if present\n if (parsedResult && parsedResult.name === \"?xml\") {\n parsedResult = parsedResult.children[0] as ParsedResult;\n }\n\n return parseParsedResult(parsedResult);\n}\n"],"mappings":";;;;;;AAQA,MAAa,0BAA0B,CAAC;;;;;;;;;;;;;MAalC,CAAC;AAiBP,IAAa,kBAAb,cAAqCA,sDAA+C;CAClF;CAEA,YAAYC,QAAgC;EAC1C,MAAM,OAAO;EAEb,KAAK,OAAO,QAAQ;CACrB;CAED,OAAO,UAAU;AACf,SAAO;CACR;CAED,eAAe,CAAC,kBAAkB,gBAAiB;CAEnD,kBAAkB;CAElB,AAAU,MACRC,MACAC,MACyB;AACzB,MAAI,CAAC,KACH,QAAO;AAET,MAAI,CAAC,KACH,QAAO,CAAC;GAAE,IAAI;GAAW,MAAM;GAAI,OAAO;EAAM,CAAC;AAEnD,SAAOC,uBAAQ,MAAM,KAAK;CAC3B;CAED,MAAM,mBACJC,aACgC;AAChC,SAAO,iBAAiB,YAAY,GAAG,KAAK;CAC7C;CAED,MAAM,MAAMC,MAAkC;AAC5C,SAAO,iBAAiB,KAAK;CAC9B;CAED,wBAAgC;EAC9B,MAAM,WAAW,CAAC,EAAE,KAAK,QAAQ,KAAK,KAAK,SAAS;AACpD,SAAO,WACH,wBAAwB,QAAQ,UAAU,KAAK,MAAM,KAAK,KAAK,IAAI,GAAG,GACtE;CACL;AACF;AAED,MAAM,QAAQ,CAACA,SACb,KACG,MAAM,KAAK,CACX,IAAI,CAAC,SAAS,KAAK,QAAQ,QAAQ,GAAG,CAAC,CACvC,KAAK,KAAK,CACV,MAAM;AAWX,MAAM,oBAAoB,CAACC,UAAmC;AAC5D,KAAI,OAAO,KAAK,MAAM,CAAC,WAAW,EAChC,QAAO,CAAE;CAEX,MAAMC,SAAoB,CAAE;AAC5B,KAAI,MAAM,SAAS,SAAS,GAAG;EAC7B,OAAO,MAAM,QAAQ,MAAM,SAAS,IAAI,kBAAkB;AAC1D,SAAO;CACR,OAAM;EACL,OAAO,MAAM,QAAQ,MAAM,QAAQ;AACnC,SAAO;CACR;AACF;AAED,SAAgB,iBAAiBC,GAAsB;CACrD,MAAM,gBAAgB,MAAM,EAAE;CAC9B,MAAM,SAASC,gBAAI,OAAO,KAAK;CAC/B,IAAIC,eAA6B,CAAE;CACnC,MAAMC,eAA+B,CAAE;CAGvC,OAAO,YAAY,CAACC,SAAc;EAChC,MAAM,UAAU;GACd,MAAM,KAAK;GACX,YAAY,KAAK;GACjB,UAAU,CAAE;GACZ,MAAM;GACN,eAAe,KAAK;EACrB;AAED,MAAI,aAAa,SAAS,GAAG;GAC3B,MAAM,gBAAgB,aAAa,aAAa,SAAS;GACzD,cAAc,SAAS,KAAK,QAAQ;EACrC,OACC,eAAe;AAGjB,MAAI,CAAC,KAAK,eACR,aAAa,KAAK,QAAQ;CAE7B;CAED,OAAO,aAAa,MAAM;AACxB,MAAI,aAAa,SAAS,GAAG;GAC3B,MAAM,cAAc,aAAa,KAAK;AACtC,OAAI,aAAa,WAAW,KAAK,aAC/B,eAAe;EAElB;CACF;CAGD,OAAO,SAAS,CAACC,SAAc;AAC7B,MAAI,aAAa,SAAS,GAAG;GAC3B,MAAM,iBAAiB,aAAa,aAAa,SAAS;GAC1D,eAAe,QAAQ;EACxB;CACF;CAGD,OAAO,cAAc,CAACC,SAAc;AAClC,MAAI,aAAa,SAAS,GAAG;GAC3B,MAAM,iBAAiB,aAAa,aAAa,SAAS;GAC1D,eAAe,WAAW,KAAK,QAAQ,KAAK;EAC7C;CACF;CAGD,MAAM,QAAQ,oBAAoB,KAAK,cAAc;CACrD,MAAM,YAAY,QAAQ,MAAM,KAAK;CACrC,OAAO,MAAM,UAAU,CAAC,OAAO;AAG/B,KAAI,gBAAgB,aAAa,SAAS,QACxC,eAAe,aAAa,SAAS;AAGvC,QAAO,kBAAkB,aAAa;AACvC"}