apg-unicode
Version:
JavaScript APG parser of Unicode code point arrays
315 lines (297 loc) • 9.26 kB
JavaScript
// ## Stats Class Overview
// The `Stats` class provides basic profiling capabilities for the parser's
// performance for a given SABNF grammar and input string.
// It tracks how frequently each parse tree node type is invoked during parsing,
// offering insight into parser behavior and grammar optimization.
// ### Key Features
// - **Node Type Profiling**
// Counts the number of times each parse tree node type is triggered during parsing.
// - **Rule-Level Granularity**
// Provides individual hit counts for each named rule and user-defined terminal (UDT) node.
// - **Grammar Diagnostics**
// Useful for identifying hotspots, optimizing grammar structure, and understanding parser performance.
// ### Use Case
// Ideal for developers seeking lightweight diagnostics and performance insights
// during grammar development or parser tuning.
import { identifiers as id } from '../src/identifiers.js';
export { Stats };
class Stats {
#FILENAME = 'stats.js: ';
#rules;
#udts;
#totals;
#stats = [];
#ruleStats = [];
#udtStats = [];
// Called by the user after the parser has completed.
// Displays hit counts and totals for each and all node types.
displayStats = () => {
let out = '';
const displayRow = (op, m, e, n, t) => {
this.#totals.match += m;
this.#totals.empty += e;
this.#totals.nomatch += n;
this.#totals.total += t;
const mm = this.#normalize(m);
const ee = this.#normalize(e);
const nn = this.#normalize(n);
const tt = this.#normalize(t);
return `${op} | ${mm} | ${ee} | ${nn} | ${tt} |\n`;
};
out += ' OPERATOR STATS\n';
out += ' | MATCH | EMPTY | NOMATCH | TOTAL |\n';
out += displayRow(
' ALT',
this.#stats[id.ALT].match,
this.#stats[id.ALT].empty,
this.#stats[id.ALT].nomatch,
this.#stats[id.ALT].total
);
out += displayRow(
' CAT',
this.#stats[id.CAT].match,
this.#stats[id.CAT].empty,
this.#stats[id.CAT].nomatch,
this.#stats[id.CAT].total
);
out += displayRow(
' REP',
this.#stats[id.REP].match,
this.#stats[id.REP].empty,
this.#stats[id.REP].nomatch,
this.#stats[id.REP].total
);
out += displayRow(
' RNM',
this.#stats[id.RNM].match,
this.#stats[id.RNM].empty,
this.#stats[id.RNM].nomatch,
this.#stats[id.RNM].total
);
out += displayRow(
' TRG',
this.#stats[id.TRG].match,
this.#stats[id.TRG].empty,
this.#stats[id.TRG].nomatch,
this.#stats[id.TRG].total
);
out += displayRow(
' TBS',
this.#stats[id.TBS].match,
this.#stats[id.TBS].empty,
this.#stats[id.TBS].nomatch,
this.#stats[id.TBS].total
);
out += displayRow(
' TLS',
this.#stats[id.TLS].match,
this.#stats[id.TLS].empty,
this.#stats[id.TLS].nomatch,
this.#stats[id.TLS].total
);
out += displayRow(
' UDT',
this.#stats[id.UDT].match,
this.#stats[id.UDT].empty,
this.#stats[id.UDT].nomatch,
this.#stats[id.UDT].total
);
out += displayRow(
' AND',
this.#stats[id.AND].match,
this.#stats[id.AND].empty,
this.#stats[id.AND].nomatch,
this.#stats[id.AND].total
);
out += displayRow(
' NOT',
this.#stats[id.NOT].match,
this.#stats[id.NOT].empty,
this.#stats[id.NOT].nomatch,
this.#stats[id.NOT].total
);
out += displayRow('TOTAL', this.#totals.match, this.#totals.empty, this.#totals.nomatch, this.#totals.total);
return out;
};
// Called by the user after parser completion.
// * @param {string | undefined} type
// * - 'hits'(default) - rule/UDT names ordered by hit count descending
// * - 'alpha' - rule/UDT names ordered alphabetically
// * - 'index' - rule/UDT names ordered by index (the order they appear in the SABNF grammar)
displayHits = (type) => {
let out = '';
const displayRow = (m, e, n, t, name) => {
this.#totals.match += m;
this.#totals.empty += e;
this.#totals.nomatch += n;
this.#totals.total += t;
const mm = this.#normalize(m);
const ee = this.#normalize(e);
const nn = this.#normalize(n);
const tt = this.#normalize(t);
return `| ${mm} | ${ee} | ${nn} | ${tt} | ${name}\n`;
};
if (typeof type === 'string' && type.toLowerCase()[0] === 'a') {
this.#ruleStats.sort(this.#sortAlpha);
this.#udtStats.sort(this.#sortAlpha);
out += ' RULES/UDTS ALPHABETICALLY\n';
} else if (typeof type === 'string' && type.toLowerCase()[0] === 'i') {
this.#ruleStats.sort(this.#sortIndex);
this.#udtStats.sort(this.#sortIndex);
out += ' RULES/UDTS BY INDEX\n';
} else {
this.#ruleStats.sort(this.#sortHits);
this.#udtStats.sort(this.#sortHits);
out += ' RULES/UDTS BY HIT COUNT\n';
}
out += '| MATCH | EMPTY | NOMATCH | TOTAL | NAME\n';
for (let i = 0; i < this.#ruleStats.length; i += 1) {
let r = this.#ruleStats[i];
if (r.total) {
out += displayRow(r.match, r.empty, r.nomatch, r.total, r.name);
}
}
for (let i = 0; i < this.#udtStats.length; i += 1) {
let r = this.#udtStats[i];
if (r.total) {
out += displayRow(r.match, r.empty, r.nomatch, r.total, r.name);
}
}
return out;
};
// Called by parser to initialize the Stats object.
init = (r, u) => {
this.#rules = r;
this.#udts = u;
this.#clear();
};
// Called by the parser after each node has been traversed.
collect = (op, sys) => {
this.#incStat(this.#totals, sys.state, sys.phraseLength);
this.#incStat(this.#stats[op.type], sys.state, sys.phraseLength);
if (op.type === id.RNM) {
this.#incStat(this.#ruleStats[op.index], sys.state, sys.phraseLength);
}
if (op.type === id.UDT) {
this.#incStat(this.#udtStats[op.index], sys.state, sys.phraseLength);
}
};
// Zero out all stats.
#clear = () => {
class EmptyStat {
constructor() {
this.empty = 0;
this.match = 0;
this.nomatch = 0;
this.total = 0;
}
}
this.#stats.length = 0;
this.#totals = new EmptyStat();
this.#stats[id.ALT] = new EmptyStat();
this.#stats[id.CAT] = new EmptyStat();
this.#stats[id.REP] = new EmptyStat();
this.#stats[id.RNM] = new EmptyStat();
this.#stats[id.TRG] = new EmptyStat();
this.#stats[id.TBS] = new EmptyStat();
this.#stats[id.TLS] = new EmptyStat();
this.#stats[id.UDT] = new EmptyStat();
this.#stats[id.AND] = new EmptyStat();
this.#stats[id.NOT] = new EmptyStat();
this.#ruleStats.length = 0;
for (let i = 0; i < this.#rules.length; i += 1) {
this.#ruleStats.push({
empty: 0,
match: 0,
nomatch: 0,
total: 0,
name: this.#rules[i].name,
lower: this.#rules[i].lower,
index: this.#rules[i].index,
});
}
if (this.#udts.length > 0) {
this.#udtStats.length = 0;
for (let i = 0; i < this.#udts.length; i += 1) {
this.#udtStats.push({
empty: 0,
match: 0,
nomatch: 0,
total: 0,
name: this.#udts[i].name,
lower: this.#udts[i].lower,
index: this.#udts[i].index,
});
}
}
};
// Set leading spaces to keep the count display at a fixed number of characters.
#normalize = (n) => {
if (n < 10) {
return ` ${n}`;
}
if (n < 100) {
return ` ${n}`;
}
if (n < 1000) {
return ` ${n}`;
}
if (n < 10000) {
return ` ${n}`;
}
if (n < 100000) {
return ` ${n}`;
}
if (n < 1000000) {
return ` ${n}`;
}
return `${n}`;
};
// The sort callback for alphabetical sorting.
#sortAlpha = (lhs, rhs) => {
if (lhs.lower < rhs.lower) {
return -1;
}
if (lhs.lower > rhs.lower) {
return 1;
}
return 0;
};
// The sort callback for hit count sorting.
#sortHits = (lhs, rhs) => {
if (lhs.total < rhs.total) {
return 1;
}
if (lhs.total > rhs.total) {
return -1;
}
return this.#sortAlpha(lhs, rhs);
};
// The sort callback for index sorting.
#sortIndex = (lhs, rhs) => {
if (lhs.index < rhs.index) {
return -1;
}
if (lhs.index > rhs.index) {
return 1;
}
return 0;
};
// Increment the designated operator hit count by one.
#incStat = (stat, state) => {
stat.total += 1;
switch (state) {
case id.EMPTY:
stat.empty += 1;
break;
case id.MATCH:
stat.match += 1;
break;
case id.NOMATCH:
stat.nomatch += 1;
break;
default:
throw new Error(`${this.#FILENAME}collect(): this.#incStat(): unrecognized state: ${state}`);
}
};
}