botium-core
Version:
The Selenium for Chatbots
513 lines (461 loc) • 20.4 kB
JavaScript
const util = require('util')
const XLSX = require('xlsx')
const _ = require('lodash')
const debug = require('debug')('botium-core-CompilerXlsx')
const Capabilities = require('../Capabilities')
const { E_SCRIPTING_MEMORY_COLUMN_MODE } = require('../Enums')
const CompilerBase = require('./CompilerBase')
const Constants = require('./Constants')
const Utterance = require('./Utterance')
const { Convo } = require('./Convo')
const { linesToConvoStep, convoStepToLines, validateConvo } = require('./helper')
module.exports = class CompilerXlsx extends CompilerBase {
constructor (context, caps = {}) {
super(context, caps)
this.colnames = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
}
_splitSheetnames (sheetnames) {
if (sheetnames) return sheetnames.split(/\s*[;,|]\s*/)
}
_filterSheetnames (sheetnames, selectors) {
const filteredSheetnames = sheetnames.filter(sheetname => !!selectors.find(selector => selector === '*' || sheetname === selector))
debug(`_filterSheetnames(sheetnames: ${JSON.stringify(sheetnames)}, selectors: ${JSON.stringify(selectors)}, filteredSheetnames: ${JSON.stringify(filteredSheetnames)})`)
return filteredSheetnames
}
Validate () {
super.Validate()
if (this.caps[Capabilities.SCRIPTING_XLSX_STARTCOL] !== undefined) {
if (_.isString(this.caps[Capabilities.SCRIPTING_XLSX_STARTCOL]) && this.colnames.findIndex((c) => c === this.caps[Capabilities.SCRIPTING_XLSX_STARTCOL]) < 0) {
throw new Error(`SCRIPTING_XLSX_STARTCOL ${this.caps[Capabilities.SCRIPTING_XLSX_STARTCOL]} invalid (A-Z)`)
} else if (this.caps[Capabilities.SCRIPTING_XLSX_STARTCOL] < 1 || this.caps[Capabilities.SCRIPTING_XLSX_STARTCOL] > this.colnames.length) {
throw new Error(`SCRIPTING_XLSX_STARTCOL ${this.caps[Capabilities.SCRIPTING_XLSX_STARTCOL]} invalid (1-${this.colnames.length})`)
}
}
}
Compile (scriptBuffer, scriptType = Constants.SCRIPTING_TYPE_CONVO) {
const workbook = XLSX.read(scriptBuffer, { type: 'buffer' })
if (!workbook) throw new Error('Workbook not readable')
debug(`Loaded XLSX with Props: ${workbook.Props ? JSON.stringify(workbook.Props) : '<none>'}`)
const eol = this.caps[Capabilities.SCRIPTING_XLSX_EOL_WRITE]
const maxEmptyRowCount = 10
let sheetnames = []
if (scriptType === Constants.SCRIPTING_TYPE_CONVO) {
if (this.caps[Capabilities.SCRIPTING_XLSX_SHEETNAMES]) {
sheetnames = this._filterSheetnames(workbook.SheetNames, this._splitSheetnames(this.caps[Capabilities.SCRIPTING_XLSX_SHEETNAMES]))
} else {
sheetnames = workbook.SheetNames.filter(s => (s.toLowerCase().indexOf('convo') >= 0 || s.toLowerCase().indexOf('dialog') >= 0) && s.toLowerCase().indexOf('partial') < 0) || []
}
} else if (scriptType === Constants.SCRIPTING_TYPE_PCONVO) {
if (this.caps[Capabilities.SCRIPTING_XLSX_SHEETNAMES_PCONVOS]) {
sheetnames = this._filterSheetnames(workbook.SheetNames, this._splitSheetnames(this.caps[Capabilities.SCRIPTING_XLSX_SHEETNAMES_PCONVOS]))
} else {
sheetnames = workbook.SheetNames.filter(s => s.toLowerCase().indexOf('partial') >= 0) || []
}
} else if (scriptType === Constants.SCRIPTING_TYPE_UTTERANCES) {
if (this.caps[Capabilities.SCRIPTING_XLSX_SHEETNAMES_UTTERANCES]) {
sheetnames = this._filterSheetnames(workbook.SheetNames, this._splitSheetnames(this.caps[Capabilities.SCRIPTING_XLSX_SHEETNAMES_UTTERANCES]))
} else {
sheetnames = workbook.SheetNames.filter(s => s.toLowerCase().indexOf('utter') >= 0) || []
}
} else if (scriptType === Constants.SCRIPTING_TYPE_SCRIPTING_MEMORY) {
if (this.caps[Capabilities.SCRIPTING_XLSX_SHEETNAMES_SCRIPTING_MEMORY]) {
sheetnames = this._filterSheetnames(workbook.SheetNames, this._splitSheetnames(this.caps[Capabilities.SCRIPTING_XLSX_SHEETNAMES_SCRIPTING_MEMORY]))
} else {
sheetnames = workbook.SheetNames.filter(s => s.toLowerCase().indexOf('memory') >= 0 || s.toLowerCase().indexOf('scripting') >= 0) || []
}
} else {
throw Error(`Invalid script type ${scriptType}`)
}
debug(`sheet names for ${scriptType}: ${util.inspect(sheetnames)}`)
const scriptResults = []
sheetnames.forEach((sheetname) => {
const sheet = workbook.Sheets[sheetname]
if (!sheet) return
let { rowindex, colindex, hasNameCol } = this._findOrigin(sheet, scriptType)
debug(`evaluating sheet name for ${scriptType}: ${util.inspect(sheetname)}, rowindex ${rowindex}, colindex ${colindex}`)
if (scriptType === Constants.SCRIPTING_TYPE_CONVO || scriptType === Constants.SCRIPTING_TYPE_PCONVO) {
const parseCell = (sender, content) => {
if (!content) return { messageText: '' }
if (!_.isString(content)) content = '' + content
let eolSplit = null
let lines = []
if (content.indexOf('\n') >= 0) {
eolSplit = '\n'
} else if (content.indexOf('\r') >= 0) {
eolSplit = '\r'
}
if (eolSplit) {
lines = content.split(eolSplit).map(l => l.trim()).filter(l => l)
} else {
lines = [content.trim()]
}
return linesToConvoStep(lines, sender, this.context, eol)
}
const _extractRow = (rowindex) => {
const cell1 = this.colnames[colindex] + rowindex
const cell1Value = (sheet[cell1] && sheet[cell1].v) || null
const cell2 = this.colnames[colindex + 1] + rowindex
const cell2Value = (sheet[cell2] && sheet[cell2].v) || null
const cell3 = this.colnames[colindex + 2] + rowindex
const cell3Value = (sheet[cell3] && sheet[cell3].v) || null
if (hasNameCol) {
return {
nameCell: cell1,
nameCellValue: cell1Value,
meCell: cell2,
meCellValue: cell2Value,
botCell: cell3,
botCellValue: cell3Value
}
} else {
return {
nameCell: null,
nameCellValue: null,
meCell: cell1,
meCellValue: cell1Value,
botCell: cell2,
botCellValue: cell2Value
}
}
}
let questionAnswerMode = this._GetOptionalCapability(Capabilities.SCRIPTING_XLSX_MODE)
if (questionAnswerMode !== null) {
questionAnswerMode = questionAnswerMode === 'QUESTION_ANSWER'
debug(`questionAnswerMode to ${questionAnswerMode} (capability)`)
} else {
let emptyRowCount = 0
let index = 0
const foundQARows = []
const foundConvoRows = []
while (emptyRowCount <= maxEmptyRowCount) {
const { meCell, meCellValue, botCell, botCellValue } = _extractRow(rowindex + index)
if (!meCellValue && !botCellValue) {
emptyRowCount++
} else if (meCellValue && botCellValue) {
foundQARows.push(meCell)
} else if (meCellValue && !botCellValue) {
foundConvoRows.push(meCell)
} else if (!meCellValue && botCellValue) {
foundConvoRows.push(botCell)
}
index++
}
if (foundQARows.length > 0 && foundConvoRows.length > 0) {
throw new Error(`Excel sheet "${sheetname}" invalid. Detected intermixed Q&A sections (for instance ${foundQARows.slice(0, 3).join(',')}) and convo sections (for instance ${foundConvoRows.slice(0, 3).join(',')})`)
} else if (foundQARows.length > 0 && foundConvoRows.length === 0) {
questionAnswerMode = true
debug('questionAnswerMode to true (question-answer row found)')
} else {
questionAnswerMode = false
debug('questionAnswerMode to false (no question-answer row found)')
}
}
const convoResults = []
let currentConvo = []
let currentConvoName = null
let emptyRowCount = 0
let startrowindex = -1
while (true) {
const { nameCellValue, meCell, meCellValue, botCell, botCellValue } = _extractRow(rowindex)
if (questionAnswerMode) {
if (meCellValue || botCellValue) {
currentConvo = []
currentConvo.push(Object.assign(
{ sender: 'me', stepTag: 'Cell ' + meCell },
parseCell('me', meCellValue)
))
startrowindex = rowindex
currentConvo.push(Object.assign(
{ sender: 'bot', stepTag: 'Cell ' + botCell },
parseCell('bot', botCellValue)
))
convoResults.push(new Convo(this.context, {
header: {
name: nameCellValue || null,
sheetname,
colindex,
rowindex: startrowindex
},
conversation: currentConvo
}))
} else {
emptyRowCount++
}
} else {
if (currentConvo.length === 0) {
currentConvoName = nameCellValue || null
}
if (meCellValue) {
currentConvo.push(Object.assign(
{ sender: 'me', stepTag: 'Cell ' + meCell },
parseCell('me', meCellValue)
))
if (startrowindex < 0) startrowindex = rowindex
emptyRowCount = 0
} else if (botCellValue) {
currentConvo.push(Object.assign(
{ sender: 'bot', stepTag: 'Cell ' + botCell },
parseCell('bot', botCellValue)
))
if (startrowindex < 0) startrowindex = rowindex
emptyRowCount = 0
} else {
if (currentConvo.length > 0) {
convoResults.push(new Convo(this.context, {
header: {
name: currentConvoName,
sheetname,
colindex,
rowindex: startrowindex
},
conversation: currentConvo
}))
}
currentConvo = []
currentConvoName = null
startrowindex = -1
emptyRowCount++
}
}
rowindex++
if (emptyRowCount > maxEmptyRowCount) break
}
if (convoResults.length > 0) {
const formatLength = Math.max(3, `${convoResults[convoResults.length - 1].header.rowindex}`.length)
const formatBase = '0'.repeat(formatLength)
const formatRowIndex = (rowindex) => (formatBase + `${rowindex}`).slice(-1 * formatLength)
convoResults.forEach(convo => {
if (!convo.header.name) {
convo.header.name = `${convo.header.sheetname}-${this.colnames[convo.header.colindex]}${formatRowIndex(convo.header.rowindex)}`
}
// it is not used anymore?
convo.header.sort = convo.header.name
scriptResults.push(convo)
})
}
}
if (scriptType === Constants.SCRIPTING_TYPE_UTTERANCES) {
let currentUtterance = null
let emptylines = 0
while (true) {
const nameCell = this.colnames[colindex] + rowindex
const uttCell = this.colnames[colindex + 1] + rowindex
if (sheet[nameCell] && sheet[nameCell].v && sheet[uttCell] && sheet[uttCell].v) {
currentUtterance = new Utterance({ name: sheet[nameCell].v, utterances: [`${sheet[uttCell].v}`] })
scriptResults.push(currentUtterance)
emptylines = 0
} else if (sheet[uttCell] && sheet[uttCell].v) {
if (currentUtterance) currentUtterance.utterances.push(`${sheet[uttCell].v}`)
emptylines = 0
} else {
currentUtterance = null
emptylines++
}
rowindex++
if (emptylines > maxEmptyRowCount) break
}
}
if (scriptType === Constants.SCRIPTING_TYPE_SCRIPTING_MEMORY) {
const guessScriptingMemoryColumnMode = () => {
const line1Cell = this.colnames[colindex] + (rowindex + 1)
if (sheet[line1Cell] && sheet[line1Cell].v) {
if (sheet[line1Cell].v.startsWith('$')) return E_SCRIPTING_MEMORY_COLUMN_MODE.TESTCASENAMES
}
return E_SCRIPTING_MEMORY_COLUMN_MODE.VARNAMES
}
const columnMode = this.caps[Capabilities.SCRIPTING_MEMORY_COLUMN_MODE] || guessScriptingMemoryColumnMode()
if (columnMode === E_SCRIPTING_MEMORY_COLUMN_MODE.TESTCASENAMES) {
const caseNames = []
let colindexTemp = colindex + 1
while (true) {
const caseNameCell = this.colnames[colindexTemp] + rowindex
if (sheet[caseNameCell] && sheet[caseNameCell].v) {
caseNames.push(sheet[caseNameCell].v)
} else {
break
}
colindexTemp++
}
const varNames = []
const varValues = []
rowindex += 1
while (true) {
const varNameCell = this.colnames[colindex] + rowindex
if (sheet[varNameCell] && sheet[varNameCell].v) {
varNames.push(sheet[varNameCell].v)
const values = []
for (let i = 0; i < caseNames.length; i++) {
const variableValueCell = this.colnames[colindex + 1 + i] + rowindex
if (sheet[variableValueCell] && sheet[variableValueCell].v) {
values.push(sheet[variableValueCell].v.toString())
} else {
values.push(null)
}
}
varValues.push(values)
rowindex += 1
} else {
break
}
}
for (let caseIndex = 0; caseIndex < caseNames.length; caseIndex++) {
const caseName = caseNames[caseIndex]
const values = varNames.reduce((agg, varName, varIndex) => {
agg[varName] = varValues[varIndex][caseIndex] || null
return agg
}, {})
scriptResults.push({ header: { name: caseName }, values })
}
} else {
const variableNames = []
let colindexTemp = colindex + 1
while (true) {
const variableNameCell = this.colnames[colindexTemp] + rowindex
if (sheet[variableNameCell] && sheet[variableNameCell].v) {
variableNames.push(sheet[variableNameCell].v)
} else {
break
}
colindexTemp++
}
rowindex += 1
while (true) {
const caseNameCell = this.colnames[colindex] + rowindex
if (sheet[caseNameCell] && sheet[caseNameCell].v) {
const caseName = sheet[caseNameCell].v
const values = {}
for (let i = 0; i < variableNames.length; i++) {
const variableValueCell = this.colnames[colindex + 1 + i] + rowindex
if (sheet[variableValueCell] && sheet[variableValueCell].v) {
values[variableNames[i]] = sheet[variableValueCell].v.toString()
} else {
values[variableNames[i]] = null
}
}
rowindex += 1
scriptResults.push({ header: { name: caseName }, values })
} else {
break
}
}
}
}
})
if (scriptResults && scriptResults.length > 0) {
if (scriptType === Constants.SCRIPTING_TYPE_CONVO) {
this.context.AddConvos(scriptResults)
} else if (scriptType === Constants.SCRIPTING_TYPE_PCONVO) {
this.context.AddPartialConvos(scriptResults)
} else if (scriptType === Constants.SCRIPTING_TYPE_UTTERANCES) {
this.context.AddUtterances(scriptResults)
} else if (scriptType === Constants.SCRIPTING_TYPE_SCRIPTING_MEMORY) {
this.context.AddScriptingMemories(scriptResults)
}
return scriptResults
}
}
Decompile (convos) {
const eol = this.caps[Capabilities.SCRIPTING_XLSX_EOL_WRITE]
let sheetname = 'Botium'
if (this.caps[Capabilities.SCRIPTING_XLSX_SHEETNAMES]) {
sheetname = this._splitSheetnames(this.caps[Capabilities.SCRIPTING_XLSX_SHEETNAMES])[0]
}
const data = []
const errors = []
if (convos) {
for (let i = 0; i < convos.length; i++) {
const convo = convos[i]
if (!convo.conversation) return
const validationResult = validateConvo(convo)
if (validationResult.errors.length > 0) {
errors.push(...validationResult.errors.map(e => new Error(`Convo ${i + 1} ${e.message}`)))
}
convo.conversation.forEach((step) => {
let cellContent = ''
const stepLines = convoStepToLines(step)
if (stepLines && stepLines.length > 0) cellContent = stepLines.join(eol)
data.push({ [step.sender]: cellContent })
})
data.push({})
}
}
if (errors.length > 0) {
throw new Error(errors.map(e => e.message).join(' - '))
}
const wb = XLSX.utils.book_new()
const ws = XLSX.utils.json_to_sheet(data, { header: ['me', 'bot'] })
XLSX.utils.book_append_sheet(wb, ws, sheetname)
const xlsxOutput = XLSX.write(wb, { type: 'buffer' })
return xlsxOutput
}
_get (sheet, rowindex, colindex) {
const cell = this.colnames[colindex] + rowindex
const cellValue = sheet[cell] && sheet[cell].v
return cellValue
}
_findOrigin (sheet, scriptType) {
let rowindex = this.caps[Capabilities.SCRIPTING_XLSX_STARTROW]
let colindex = this.caps[Capabilities.SCRIPTING_XLSX_STARTCOL]
let hasNameCol = _.has(this.caps, Capabilities.SCRIPTING_XLSX_HASNAMECOL) ? !!this.caps[Capabilities.SCRIPTING_XLSX_HASNAMECOL] : null
if (_.isString(this.caps[Capabilities.SCRIPTING_XLSX_STARTCOL])) {
colindex = this.colnames.findIndex((c) => c === this.caps[Capabilities.SCRIPTING_XLSX_STARTCOL])
} else if (colindex !== undefined) {
colindex = colindex - 1
}
if (rowindex === undefined && colindex === undefined) {
// eslint-disable-next-line no-labels
NestedLoop:
for (let cr = 1; cr < 1000; cr++) {
for (let cc = 0; cc < this.colnames.length; cc++) {
if (this._get(sheet, cr, cc)) {
if (scriptType === Constants.SCRIPTING_TYPE_SCRIPTING_MEMORY) {
if (cc > 0 && this._get(sheet, cr + 1, cc - 1)) {
rowindex = cr
colindex = cc - 1
// eslint-disable-next-line no-labels
break NestedLoop
}
} else {
rowindex = cr
colindex = cc
// eslint-disable-next-line no-labels
break NestedLoop
}
}
}
}
if (scriptType !== Constants.SCRIPTING_TYPE_SCRIPTING_MEMORY) {
if (rowindex !== undefined && this.caps[Capabilities.SCRIPTING_XLSX_HASHEADERS]) {
rowindex++
}
}
} else if (rowindex === undefined && colindex !== undefined) {
for (let i = 1; i < 1000; i++) {
if (this._get(sheet, i, colindex)) {
rowindex = i
break
}
}
if (this.caps[Capabilities.SCRIPTING_XLSX_HASHEADERS]) {
rowindex++
}
} else if (rowindex !== undefined && colindex === undefined) {
for (let i = 0; i < this.colnames.length; i++) {
if (this._get(sheet, rowindex, i)) {
colindex = i
break
}
}
}
if (_.isNull(hasNameCol)) {
if (scriptType === Constants.SCRIPTING_TYPE_CONVO || scriptType === Constants.SCRIPTING_TYPE_PCONVO) {
if (this.caps[Capabilities.SCRIPTING_XLSX_HASHEADERS]) {
if (this._get(sheet, rowindex - 1, colindex) && this._get(sheet, rowindex - 1, colindex + 1) && this._get(sheet, rowindex - 1, colindex + 2)) {
hasNameCol = true
}
}
}
}
return { rowindex, colindex, hasNameCol }
}
}