UNPKG

type2docfx

Version:

A tool to convert json format output from TypeDoc to universal reference model for DocFx to consume.

525 lines (442 loc) 21.2 kB
/** * @module botbuilder-ai */ /** * Copyright (c) Microsoft Corporation. All rights reserved. * Licensed under the MIT License. */ import { Middleware, TurnContext, ActivityTypes, Activity } from 'botbuilder'; import * as request from 'request-promise-native'; import { DOMParser } from "xmldom"; export interface TranslatorSettings { translatorKey: string, nativeLanguages: string[], noTranslatePatterns?: { [id: string] : string[] }, wordDictionary?: { [id: string]: string}, getUserLanguage?: (context: TurnContext) => string, setUserLanguage?: (context: TurnContext) => Promise<boolean>, translateBackToUserLanguage?: boolean, } /** * The LanguageTranslator will use the Text Translator Cognitive service to translate text from a source language * to one of the native languages that the bot speaks. By adding it to the middleware pipeline you will automatically * get a translated experience, and also a LUIS model allowing the user to ask to speak a language. */ export class LanguageTranslator implements Middleware { private translator: Translator; private getUserLanguage: ((context: TurnContext) => string) | undefined; private setUserLanguage: ((context: TurnContext) => Promise<boolean>) | undefined; private nativeLanguages: string[]; private translateBackToUserLanguage: boolean; private noTranslatePatterns: { [id: string] : string[] }; private wordDictionary: { [id:string]: string } public constructor(settings: TranslatorSettings) { this.translator = new MicrosoftTranslator(settings.translatorKey); this.nativeLanguages = settings.nativeLanguages; this.getUserLanguage = settings.getUserLanguage; this.setUserLanguage = settings.setUserLanguage; this.translateBackToUserLanguage = settings.translateBackToUserLanguage; this.noTranslatePatterns = settings.noTranslatePatterns; this.wordDictionary = settings.wordDictionary; } /// Incoming activity public async onTurn(context: TurnContext, next: () => Promise<void>): Promise<void> { if (context.activity.type != ActivityTypes.Message) { return next(); } if (this.setUserLanguage != undefined) { let changedLanguage = await this.setUserLanguage(context); if (changedLanguage) { return Promise.resolve(); } } // determine the language we are using for this conversation let sourceLanguage: string; if (this.getUserLanguage != undefined) { sourceLanguage = this.getUserLanguage(context); } else { sourceLanguage = await this.translator.detect(context.activity.text); } let targetLanguage = (this.nativeLanguages.indexOf(sourceLanguage) >= 0) ? sourceLanguage : this.nativeLanguages[0]; await this.translateMessageAsync(context, context.activity, sourceLanguage, targetLanguage); if (this.translateBackToUserLanguage) { context.onSendActivities(async (newContext, activities, newNext) => { await Promise.all(activities.map(async (activity) => { if (activity.type == ActivityTypes.Message) { await this.translateMessageAsync(newContext, activity, targetLanguage, sourceLanguage); } })); return newNext(); }) } // translate to bots language return next(); } /// Translate .Text field of a message, regardless of direction private async translateMessageAsync(context: TurnContext, message: Partial<Activity>, sourceLanguage: string, targetLanguage: string): Promise<TranslationResult[]> { if (sourceLanguage == targetLanguage) { return Promise.resolve([]); } let text = message.text; let lines = text.split('\n'); if (this.noTranslatePatterns && this.noTranslatePatterns[sourceLanguage] && this.noTranslatePatterns[sourceLanguage].length > 0) { this.translator.setPostProcessorTemplate(this.noTranslatePatterns[sourceLanguage], this.wordDictionary); } else if (this.wordDictionary) { this.translator.setPostProcessorTemplate([], this.wordDictionary); } return this.translator.translateArrayAsync({ from: sourceLanguage, to: targetLanguage, texts: lines, contentType: 'text/plain' }) .then((translateResult) => { text = ''; translateResult.forEach(translatedSentence => { if (text.length > 0) text += '\n'; text += translatedSentence.translatedText; }); message.text = text; return Promise.resolve(translateResult); }) } } declare interface TranslateArrayOptions { texts: string[]; from: string; to: string; contentType?: string; category?: string; } interface TranslationResult { translatedText: string; } interface Translator { translateArrayAsync(options: TranslateArrayOptions): Promise<TranslationResult[]>; detect(text: string): Promise<string>; setPostProcessorTemplate(noTranslatePatterns: string[], wordDictionary?: { [id: string]: string}); } class MicrosoftTranslator implements Translator { apiKey: string; postProcessor: PostProcessTranslator; constructor(apiKey: string) { this.apiKey = apiKey; this.postProcessor = new PostProcessTranslator(); } setPostProcessorTemplate(noTranslatePatterns: string[], wordDictionary?: { [id: string]: string }) { this.postProcessor = new PostProcessTranslator(noTranslatePatterns, wordDictionary); } getAccessToken(): Promise<string> { return request({ url: `https://api.cognitive.microsoft.com/sts/v1.0/issueToken?Subscription-Key=${this.apiKey}`, method: 'POST' }) .then(result => Promise.resolve(result)) } entityMap: any = { "&": "&amp;", "<": "&lt;", ">": "&gt;", '"': '&quot;', "'": '&#39;', "/": '&#x2F;' }; escapeHtml(source: string) { return String(source).replace(/[&<>"'\/]/g, s => this.entityMap[s]); } detect(text: string): Promise<string> { let uri: any = "http://api.microsofttranslator.com/v2/Http.svc/Detect"; let query: any = `?text=${encodeURI(text)}`; return this.getAccessToken() .then(accessToken => { return request({ url: uri + query, method: 'GET', headers: { 'Authorization': 'Bearer ' + accessToken } }) }) .then(lang => Promise.resolve(lang.replace(/<[^>]*>/g, ''))) } translateArrayAsync(options: TranslateArrayOptions): Promise<TranslationResult[]> { let from = options.from; let to = options.to; let texts = options.texts; let orgTexts = []; texts.forEach((text, index, array) => { orgTexts.push(text); let escapedText = this.escapeHtml(text); texts[index] = `<string xmlns="http://schemas.microsoft.com/2003/10/Serialization/Arrays">${escapedText}</string>`; }); let uri: any = "https://api.microsofttranslator.com/v2/Http.svc/TranslateArray2"; let body: any = "<TranslateArrayRequest>" + "<AppId />" + `<From>${from}</From>` + "<Options>" + " <Category xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\" >generalnn</Category>" + "<ContentType xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\">text/plain</ContentType>" + "<ReservedFlags xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\" />" + "<State xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\" />" + "<Uri xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\" />" + "<User xmlns=\"http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2\" />" + "</Options>" + "<Texts>" + texts.join('') + "</Texts>" + `<To>${to}</To>` + "</TranslateArrayRequest>"; return this.getAccessToken() .then(accessToken => { return request({ url: uri, method: 'POST', headers: { 'Authorization': 'Bearer ' + accessToken, 'Content-Type': 'text/xml' }, body: body, }) }) .then(response => { let results: TranslationResult[] = []; let parser = new DOMParser(); let responseObj = parser.parseFromString(response); let elements = responseObj.getElementsByTagName("TranslateArray2Response"); Array.from(elements).forEach((element, index, array) => { let translation = element.getElementsByTagName('TranslatedText')[0].textContent as string; let alignment = element.getElementsByTagName('Alignment')[0].textContent as string; translation = this.postProcessor.fixTranslation(orgTexts[index], alignment, translation); let result: TranslationResult = { translatedText: translation }; results.push(result); }); return Promise.resolve(results); }) } } export class PostProcessTranslator { noTranslatePatterns: string[]; wordDictionary: { [id: string]: string}; constructor(noTranslatePatterns?: string[], wordDictionary?: { [id: string]: string }) { this.noTranslatePatterns = []; this.wordDictionary = wordDictionary; if (wordDictionary) { Object.keys(this.wordDictionary).forEach(word => { if (word != word.toLowerCase()) { Object.defineProperty(this.wordDictionary, word.toLowerCase(), Object.getOwnPropertyDescriptor(this.wordDictionary, word)); delete this.wordDictionary[word]; } }); } if(noTranslatePatterns) { noTranslatePatterns.forEach(pattern => { if (pattern.indexOf('(') == -1) { pattern = `(${pattern})`; } this.noTranslatePatterns.push(pattern); }); } } private join(delimiter: string, words: string[]): string { return words.join(delimiter).replace(/[ ]?'[ ]?/g, "'").trim(); } private splitSentence(sentence: string, alignments: string[], isSrcSentence = true): string[] { let wrds = sentence.split(' '); let lastWrd = wrds[wrds.length - 1]; if (".,:;?!".indexOf(lastWrd[lastWrd.length - 1]) != -1) { wrds[wrds.length - 1] = lastWrd.substr(0, lastWrd.length - 1); } let alignSplitWrds: string[] = []; let outWrds: string[] = []; let wrdIndexInAlignment = 1; if (isSrcSentence) { wrdIndexInAlignment = 0; } else { alignments.sort((a, b) => { let aIndex = parseInt(a.split('-')[wrdIndexInAlignment].split(':')[0]); let bIndex = parseInt(b.split('-')[wrdIndexInAlignment].split(':')[0]); if (aIndex <= bIndex) { return -1; } else { return 1; } }); } let sentenceWithoutSpaces = sentence.replace(/\s/g, ''); alignments.forEach(alignData => { alignSplitWrds = outWrds; let wordIndexes = alignData.split('-')[wrdIndexInAlignment]; let startIndex = parseInt(wordIndexes.split(':')[0]); let length = parseInt(wordIndexes.split(':')[1]) - startIndex + 1; let wrd = sentence.substr(startIndex, length); let newWrds: string[] = new Array(outWrds.length + 1); if (newWrds.length > 1) { newWrds = alignSplitWrds.slice(); } newWrds[outWrds.length] = wrd; let subSentence = this.join("", newWrds); if (sentenceWithoutSpaces.indexOf(subSentence) != -1) { outWrds.push(wrd); } }); alignSplitWrds = outWrds; if (this.join("", alignSplitWrds) == this.join("", wrds)) { return alignSplitWrds; } else { return wrds; } } private wordAlignmentParse(alignments: string[], srcWords: string[], trgWords: string[]): { [id: number] : number } { let alignMap: { [id: number] : number } = {}; let sourceMessage = this.join(" ", srcWords); let trgMessage = this.join(" ", trgWords); alignments.forEach(alignData => { let wordIndexes = alignData.split('-'); let srcStartIndex = parseInt(wordIndexes[0].split(':')[0]); let srcLength = parseInt(wordIndexes[0].split(':')[1]) - srcStartIndex + 1; let srcWrd = sourceMessage.substr(srcStartIndex, srcLength); let srcWrdIndex = srcWords.findIndex(wrd => wrd == srcWrd); let trgstartIndex = parseInt(wordIndexes[1].split(':')[0]); let trgLength = parseInt(wordIndexes[1].split(':')[1]) - trgstartIndex + 1; let trgWrd = trgMessage.substr(trgstartIndex, trgLength); let trgWrdIndex = trgWords.findIndex(wrd => wrd == trgWrd); if (srcWrdIndex != -1 && trgWrdIndex != -1) { alignMap[srcWrdIndex] = trgWrdIndex; } }); return alignMap; } private keepSrcWrdInTranslation(alignment: { [id: number] : number }, sourceWords: string[], targetWords: string[], srcWrdIndex: number) { if (!(typeof alignment[srcWrdIndex] === "undefined")) { targetWords[alignment[srcWrdIndex]] = sourceWords[srcWrdIndex]; } return targetWords; } private replaceWordInDictionary(alignment: { [id: number] : number }, sourceWords: string[], targetWords: string[], srcWrdIndex: number) { if (!(typeof alignment[srcWrdIndex] === "undefined")) { targetWords[alignment[srcWrdIndex]] = this.wordDictionary[sourceWords[srcWrdIndex].toLowerCase()]; } return targetWords; } public fixTranslation(sourceMessage: string, alignment: string, targetMessage: string): string { let numericMatches = sourceMessage.match(/[0-9]+/g); let containsNum = numericMatches != null; if ((!containsNum && this.noTranslatePatterns.length == 0 && !this.wordDictionary) || alignment.trim() == '') { return targetMessage; } let toBeReplaced: string[] = []; this.noTranslatePatterns.forEach(pattern => { let regExp = new RegExp(pattern, "i"); let matches = sourceMessage.match(regExp); if (matches != null) { toBeReplaced.push(pattern); } }); let toBeReplacedByDictionary: string [] = []; if (this.wordDictionary) { Object.keys(this.wordDictionary).forEach(word => { if (sourceMessage.toLowerCase().indexOf(word.toLowerCase()) != -1) { toBeReplacedByDictionary.push(word); } }); } let alignments = alignment.trim().split(' '); let srcWords = this.splitSentence(sourceMessage, alignments); let trgWords = this.splitSentence(targetMessage, alignments, false); let alignMap = this.wordAlignmentParse(alignments, srcWords, trgWords); if (toBeReplaced.length > 0) { toBeReplaced.forEach(pattern => { let regExp = new RegExp(pattern, "i"); let match = regExp.exec(sourceMessage); let noTranslateStartChrIndex = match.index + match[0].indexOf(match[1]); let noTranslateMatchLength = match[1].length; let wrdIndx = 0; let chrIndx = 0; let newChrLengthFromMatch = 0; let srcIndx = -1; let newNoTranslateArrayLength = 1; srcWords.forEach(wrd => { if (chrIndx == noTranslateStartChrIndex) { srcIndx = wrdIndx; } if (srcIndx != -1) { if (newChrLengthFromMatch + srcWords[wrdIndx].length >= noTranslateMatchLength) { return; } newNoTranslateArrayLength++; newChrLengthFromMatch += srcWords[wrdIndx].length + 1; } chrIndx += wrd.length + 1; wrdIndx++; }); let wrdNoTranslate = srcWords.slice(srcIndx, srcIndx + newNoTranslateArrayLength) wrdNoTranslate.forEach(srcWrds => { trgWords = this.keepSrcWrdInTranslation(alignMap, srcWords, trgWords, srcIndx); srcIndx++; }); }); } if (toBeReplacedByDictionary.length > 0) { toBeReplacedByDictionary.forEach(word => { let regExp = new RegExp(word, "i"); let match = regExp.exec(sourceMessage); let noTranslateStartChrIndex = match.index; let noTranslateMatchLength = match[0].length; let wrdIndx = 0; let chrIndx = 0; let newChrLengthFromMatch = 0; let srcIndx = -1; let newNoTranslateArrayLength = 1; srcWords.forEach(wrd => { chrIndx += wrd.length + 1; wrdIndx++; if (chrIndx == noTranslateStartChrIndex) { srcIndx = wrdIndx; return; } }); let wrdNoTranslate = srcWords.slice(srcIndx, srcIndx + 1) wrdNoTranslate.forEach(srcWrds => { trgWords = this.replaceWordInDictionary(alignMap, srcWords, trgWords, srcIndx); srcIndx++; }); }); } console.log(toBeReplacedByDictionary); if (toBeReplacedByDictionary.length > 0) { toBeReplacedByDictionary.forEach(word => { let regExp = new RegExp(word, "i"); let match = regExp.exec(sourceMessage); let noTranslateStartChrIndex = match.index; let noTranslateMatchLength = match[0].length; let wrdIndx = 0; let chrIndx = 0; let newChrLengthFromMatch = 0; let srcIndx = -1; let newNoTranslateArrayLength = 1; srcWords.forEach(wrd => { chrIndx += wrd.length + 1; wrdIndx++; if (chrIndx == noTranslateStartChrIndex) { srcIndx = wrdIndx; return; } }); let wrdNoTranslate = srcWords.slice(srcIndx, srcIndx + 1) wrdNoTranslate.forEach(srcWrds => { trgWords = this.replaceWordInDictionary(alignMap, srcWords, trgWords, srcIndx); srcIndx++; }); }); } if (containsNum) { numericMatches.forEach(numericMatch => { let srcIndx = srcWords.findIndex(wrd => wrd == numericMatch) trgWords = this.keepSrcWrdInTranslation(alignMap, srcWords, trgWords, srcIndx); }); } return this.join(" ", trgWords); } }