kokopu
Version:
A JavaScript/TypeScript library implementing the chess game rules and providing tools to read/write the standard chess file formats.
491 lines (400 loc) • 17.2 kB
text/typescript
/*!
* -------------------------------------------------------------------------- *
* *
* Kokopu - A JavaScript/TypeScript chess library. *
* <https://www.npmjs.com/package/kokopu> *
* Copyright (C) 2018-2025 Yoann Le Montagner <yo35 -at- melix.net> *
* *
* Kokopu is free software: you can redistribute it and/or *
* modify it under the terms of the GNU Lesser General Public License *
* as published by the Free Software Foundation, either version 3 of *
* the License, or (at your option) any later version. *
* *
* Kokopu is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General *
* Public License along with this program. If not, see *
* <http://www.gnu.org/licenses/>. *
* *
* -------------------------------------------------------------------------- */
import { InvalidPGN } from '../exception';
import { i18n } from '../i18n';
import { trimAndCollapseSpaces } from '../private_game/common';
interface RegExpWrapper extends RegExp {
needIncrementLineIndex: boolean,
matchedIndex: number,
matched: RegExpExecArray | null,
}
function regExpWrapper(re: RegExp, needIncrementLineIndex?: boolean) {
const result = re as RegExpWrapper;
result.needIncrementLineIndex = needIncrementLineIndex !== undefined && needIncrementLineIndex;
result.matchedIndex = -1;
result.matched = null;
return result;
}
/**
* Types of tokens that could be encountered in a PGN.
*/
export const enum TokenType {
/* eslint-disable @stylistic/no-multi-spaces */
INVALID = 0,
BEGIN_HEADER = 1, // [
END_HEADER = 2, // ]
HEADER_ID = 3, // Identifier of a header (e.g. `White` in header `[White "Kasparov, G."]`)
HEADER_VALUE = 4, // Value of a header (e.g. `Kasparov, G.` in header `[White "Kasparov, G."]`)
MOVE_NUMBER = 5, // 42. or 23...
MOVE = 6, // SAN notation
NAG = 7, // $[1-9][0-9]* or a key from table SPECIAL_NAGS_LOOKUP (!!, +-, etc..)
COMMENT = 8, // {some text}
BEGIN_VARIATION = 9, // (
END_VARIATION = 10, // )
END_OF_GAME = 11, // 1-0, 0-1, 1/2-1/2 or *
/* eslint-enable */
}
// Movetext-related tokens are found within this interval.
const FIRST_MOVE_TEXT_TOKEN = TokenType.MOVE_NUMBER;
const LAST_MOVE_TEXT_TOKEN = TokenType.END_OF_GAME;
// Conversion table NAG -> numeric code
const SPECIAL_NAGS_LOOKUP = new Map<string, number>();
/* eslint-disable @stylistic/no-multi-spaces, @stylistic/comma-spacing */
SPECIAL_NAGS_LOOKUP.set('!!' , 3); // very good move
SPECIAL_NAGS_LOOKUP.set('!' , 1); // good move
SPECIAL_NAGS_LOOKUP.set('!?' , 5); // interesting move
SPECIAL_NAGS_LOOKUP.set('?!' , 6); // questionable move
SPECIAL_NAGS_LOOKUP.set('?' , 2); // bad move
SPECIAL_NAGS_LOOKUP.set('??' , 4); // very bad move
SPECIAL_NAGS_LOOKUP.set('+-' , 18); // White has a decisive advantage
SPECIAL_NAGS_LOOKUP.set('+/-', 16); // White has a moderate advantage
SPECIAL_NAGS_LOOKUP.set('+/=', 14); // White has a slight advantage
SPECIAL_NAGS_LOOKUP.set('+=' , 14); // (same)
SPECIAL_NAGS_LOOKUP.set('=' , 10); // equal position
SPECIAL_NAGS_LOOKUP.set('~' , 13); // unclear position
SPECIAL_NAGS_LOOKUP.set('inf', 13); // (same)
SPECIAL_NAGS_LOOKUP.set('=/+', 15); // Black has a slight advantage
SPECIAL_NAGS_LOOKUP.set('=+' , 15); // (same)
SPECIAL_NAGS_LOOKUP.set('-/+', 17); // Black has a moderate advantage
SPECIAL_NAGS_LOOKUP.set('-+' , 19); // Black has a decisive advantage
SPECIAL_NAGS_LOOKUP.set('RR' , 145); // Editorial comment
SPECIAL_NAGS_LOOKUP.set('N' , 146); // Novelty
/* eslint-enable */
/**
* Location within a PGN text.
*/
export interface StreamPosition {
pos: number,
lineIndex: number,
}
/**
* Stream of PGN tokens.
*/
export class TokenStream {
/** What is being parsed. */
private _text: string;
/** Current position in the string. */
private _pos = 0;
/** Current line index in the string. */
private _lineIndex = 1;
/** Current token. */
private _token = TokenType.INVALID;
/** Current token value (if any). */
private _tokenValue: unknown = null;
/** Position of the current token in the string. */
private _tokenCharacterIndex = -1;
/** Line index of the current token in the string. */
private _tokenLineIndex = -1;
/** Whether an empty line has been encountered before the current token. */
private _emptyLineBeforeToken = false;
/** Whether an empty line will be encountered after the current token. */
private _emptyLineAfterToken = false;
// Space-like matchers
private _matchSpaces = regExpWrapper(/[ \f\t\v]+/g);
private _matchLineBreak = regExpWrapper(/\r?\n|\r/g, true);
private _matchFastAdvance = regExpWrapper(/[^ \f\t\v\r\n"{][^ \f\t\v\r\n"{10*]*/g);
// Token matchers
private _matchBeginHeader = regExpWrapper(/\[/g);
private _matchEndHeader = regExpWrapper(/\]/g);
private _matchHeaderId = regExpWrapper(/(\w+)/g);
private _matchEnterHeaderValue = regExpWrapper(/"/g);
private _matchMoveNumber = regExpWrapper(/[0-9]+\.(?:\.\.)?/g);
private _matchMove = regExpWrapper(/(?:O-O(?:-O)?|0-0(?:-0)?|[KQRBN][a-h]?[1-8]?x?[a-h][1-8]|(?:[a-h]x?)?[a-h][1-8](?:=?[KQRBNP])?)[+#]?|--/g);
private _matchNag = regExpWrapper(/([!?][!?]?|\+\/?[-=]|[-=]\/?\+|=|inf|~|RR|N)|\$([1-9][0-9]*)/g);
private _matchEnterComment = regExpWrapper(/\{/g);
private _matchBeginVariation = regExpWrapper(/\(/g);
private _matchEndVariation = regExpWrapper(/\)/g);
private _matchEndOfGame = regExpWrapper(/1-0|0-1|1\/2-1\/2|\*/g);
// Special modes
private _headerValueMode = regExpWrapper(/((?:[^\\"\f\t\v\r\n]|\\[^\f\t\v\r\n])*)"/g);
private _headerValueDegradedMode = regExpWrapper(/[^\r\n]*/g);
private _commentMode = regExpWrapper(/((?:[^\\}]|\\(?:.|[\r\n]))*)\}/g, true);
constructor(text: string, initialLocation?: StreamPosition) {
// Remove the BOM (byte order mark) if any.
if (text.codePointAt(0) === 0xFEFF) {
text = text.substring(1);
}
this._text = text;
// Skip the beginning of the text if requested.
if (initialLocation !== undefined) {
this._pos = initialLocation.pos;
this._lineIndex = initialLocation.lineIndex;
}
}
/**
* PGN string being parsed.
*/
text() {
return this._text;
}
/**
* Current location within the stream.
*/
currentLocation() {
return { pos: this._pos, lineIndex: this._lineIndex };
}
/**
* Whether there is an empty line just before the current token. WARNING: valid only after a call to `consumeToken()`.
*/
emptyLineBeforeToken() {
return this._emptyLineBeforeToken;
}
/**
* Whether there is an empty line just after the current token. WARNING: valid only after a call to `consumeToken()`.
*/
emptyLineAfterToken() {
return this._emptyLineAfterToken;
}
/**
* Current token type. WARNING: valid only after a call to `consumeToken()`.
*/
token() {
return this._token;
}
/**
* Value associated to the current token, if any. WARNING: valid only after a call to `consumeToken()`.
*/
tokenValue<T>(): T {
return this._tokenValue as T;
}
/**
* Character index of the current token. WARNING: valid only after a call to `consumeToken()`.
*/
tokenCharacterIndex() {
return this._tokenCharacterIndex;
}
/**
* Line index of the current token. WARNING: valid only after a call to `consumeToken()`.
*/
tokenLineIndex() {
return this._tokenLineIndex;
}
/**
* Wether the current token is a token of the move-text section. WARNING: valid only after a call to `consumeToken()`.
*/
isMoveTextSection() {
return this._token >= FIRST_MOVE_TEXT_TOKEN && this._token <= LAST_MOVE_TEXT_TOKEN;
}
/**
* Try to consume 1 token.
*
* @returns `true` if a token could have been read, `false` if the end of the text has been reached.
* @throws {@link exception.InvalidPGN} if the text cannot be interpreted as a valid token.
*/
consumeToken() {
// Consume blank (i.e. meaning-less) characters
this._emptyLineBeforeToken = this._token === TokenType.INVALID || this._token === TokenType.END_OF_GAME ? this.skipBlanks() : this._emptyLineAfterToken;
if (this._pos >= this._text.length) {
this._tokenCharacterIndex = this._text.length;
this._tokenLineIndex = this._lineIndex;
return false;
}
// Save the location of the token.
this._tokenCharacterIndex = this._pos;
this._tokenLineIndex = this._lineIndex;
// Match a move number
if (this.testAtPos(this._matchMoveNumber)) {
this._token = TokenType.MOVE_NUMBER;
this._tokenValue = null;
}
// Match a move or a null-move
else if (this.testAtPos(this._matchMove)) {
this._token = TokenType.MOVE;
this._tokenValue = this._matchMove.matched![0];
}
// Match a NAG
else if (this.testAtPos(this._matchNag)) {
this._token = TokenType.NAG;
this._tokenValue = this._matchNag.matched![2] === undefined ?
SPECIAL_NAGS_LOOKUP.get(this._matchNag.matched![1]) :
parseInt(this._matchNag.matched![2], 10);
}
// Match a comment
else if (this.testAtPos(this._matchEnterComment)) {
if (!this.testAtPos(this._commentMode)) {
throw new InvalidPGN(this._text, this._pos, this._lineIndex, i18n.INVALID_PGN_TOKEN);
}
this._token = TokenType.COMMENT;
this._tokenValue = parseCommentValue(this._commentMode.matched![1]);
}
// Match the beginning of a variation
else if (this.testAtPos(this._matchBeginVariation)) {
this._token = TokenType.BEGIN_VARIATION;
this._tokenValue = null;
}
// Match the end of a variation
else if (this.testAtPos(this._matchEndVariation)) {
this._token = TokenType.END_VARIATION;
this._tokenValue = null;
}
// Match a end-of-game marker
else if (this.testAtPos(this._matchEndOfGame)) {
this._token = TokenType.END_OF_GAME;
this._tokenValue = this._matchEndOfGame.matched![0];
}
// Match the beginning of a game header
else if (this.testAtPos(this._matchBeginHeader)) {
this._token = TokenType.BEGIN_HEADER;
this._tokenValue = null;
}
// Match the end of a game header
else if (this.testAtPos(this._matchEndHeader)) {
this._token = TokenType.END_HEADER;
this._tokenValue = null;
}
// Match the ID of a game header
else if (this.testAtPos(this._matchHeaderId)) {
this._token = TokenType.HEADER_ID;
this._tokenValue = this._matchHeaderId.matched![1];
}
// Match the value of a game header
else if (this.testAtPos(this._matchEnterHeaderValue)) {
if (!this.testAtPos(this._headerValueMode)) {
throw new InvalidPGN(this._text, this._pos, this._lineIndex, i18n.INVALID_PGN_TOKEN);
}
this._token = TokenType.HEADER_VALUE;
this._tokenValue = parseHeaderValue(this._headerValueMode.matched![1]);
}
// Otherwise, the string is badly formatted with respect to the PGN syntax
else {
throw new InvalidPGN(this._text, this._pos, this._lineIndex, i18n.INVALID_PGN_TOKEN);
}
this._emptyLineAfterToken = this._token === TokenType.END_OF_GAME ? false : this.skipBlanks();
return true;
}
/**
* Try to skip all the tokens until a END_OF_GAME token or the end of the file is encountered.
*
* @returns `true` if any token have been found, `false` if the end of the file has been reached without finding any token.
*/
skipGame() {
let atLeastOneTokenFound = false;
this._token = TokenType.INVALID;
while (true) {
// Consume blank (i.e. meaning-less) characters
this.skipBlanks();
if (this._pos >= this._text.length) {
return atLeastOneTokenFound;
}
atLeastOneTokenFound = true;
// Skip comments.
if (this.testAtPos(this._matchEnterComment)) {
if (!this.testAtPos(this._commentMode)) {
this._pos = this._text.length;
return true;
}
}
// Skip header values.
else if (this.testAtPos(this._matchEnterHeaderValue)) {
if (!this.testAtPos(this._headerValueMode)) {
this.testAtPos(this._headerValueDegradedMode); // Always true as `_headerValueDegradedMode` matches the empty string.
}
}
// Match a end-of-game marker.
else if (this.testAtPos(this._matchEndOfGame)) {
return true;
}
// Skip everything else until the next space or comment/header-value beginning.
else {
this.testAtPos(this._matchFastAdvance); // Always true given the other regexes `_matchEnterComment` and `_matchEnterHeaderValue`.
}
}
}
/**
* Advance until the first non-blank character.
*
* @returns `true` if an empty line has been encountered.
*/
private skipBlanks() {
let newLineCount = 0;
while (this._pos < this._text.length) {
if (this.testAtPos(this._matchSpaces)) {
// Nothing to do...
}
else if (this.testAtPos(this._matchLineBreak)) {
++newLineCount;
}
else {
break;
}
}
// An empty line was encountered if and only if at least 2 line breaks were found.
return newLineCount >= 2;
}
/**
* Try to match the given regular expression at the current position, and increment the stream cursor `this._pos`
* and the line counter `this._lineIndex` in case of a match.
*/
private testAtPos(re: RegExpWrapper) {
if (re.matchedIndex < this._pos) {
re.lastIndex = this._pos;
re.matched = re.exec(this._text);
re.matchedIndex = re.matched === null ? this._text.length : re.matched.index;
}
if (re.matchedIndex === this._pos) {
this._pos = re.lastIndex;
if (re.needIncrementLineIndex) {
const reLineBreak = /\r?\n|\r/g;
while (reLineBreak.exec(re.matched![0])) {
++this._lineIndex;
}
}
return true;
}
else {
return false;
}
}
}
export interface TokenCommentData {
comment: string | undefined,
tags: Map<string, string>,
}
/**
* Parse a header value, unescaping special characters.
*/
function parseHeaderValue(rawHeaderValue: string): string {
return trimAndCollapseSpaces(rawHeaderValue.replace(/\\([\\"])/g, '$1'));
}
/**
* Parse a comment, unescaping special characters, and looking for the `[%key value]` tags.
*/
function parseCommentValue(rawComment: string): TokenCommentData {
rawComment = rawComment.replace(/\\([\\}])/g, '$1');
// Find and remove the tags from the raw comment.
const tags = new Map();
let comment: string | undefined = rawComment.replace(/\[%(\w+)\s([^[\]]*)\]/g, (_, p1, p2) => {
p2 = trimAndCollapseSpaces(p2);
if (p2 !== '') {
tags.set(p1, p2);
}
return ' ';
});
// Trim the comment and collapse sequences of space characters into 1 character only.
comment = trimAndCollapseSpaces(comment);
if (comment === '') {
comment = undefined;
}
// Return the result
return { comment: comment, tags: tags };
}