UNPKG

qasm-ts

Version:

QASM, the low-level programming language for quantum circuit specification, implemented in TypeScript.

490 lines (489 loc) 19.6 kB
"use strict"; /** * OpenQASM 2.0 Lexical Analyzer * * This module implements the lexer for OpenQASM 2.0, which provides a simpler * token set compared to OpenQASM 3.0. The lexer focuses on basic quantum circuit * constructs without the advanced classical programming features of version 3.0. * * Key characteristics of OpenQASM 2.0 lexing: * - **Limited token set**: Basic quantum and classical registers only * - **Simple operators**: Basic arithmetic and comparison operators * - **No control flow**: No tokens for loops, conditionals, or functions * - **Gate-focused**: Emphasis on gate definitions and applications * - **Mathematical functions**: Built-in math functions (sin, cos, etc.) * * Supported constructs: * - Quantum registers (`qreg`) and classical registers (`creg`) * - Gate definitions and applications * - Measurements with arrow notation (`->`) * - Basic arithmetic expressions for gate parameters * - Include statements for library files * * @module * * @example OpenQASM 2.0 lexing * ```typescript * const lexer = new Lexer('qreg q[2]; h q[0]; measure q -> c;'); * const tokens = lexer.lex(); * // Produces tokens for register declaration, gate, and measurement * ``` */ Object.defineProperty(exports, "__esModule", { value: true }); var token_1 = require("./token"); var errors_1 = require("../errors"); /** * Handles throwing lexer errors with basic stack trace. * @param error - The error to throw. * @param number - The line number in the source code. * @param code - The source code that the error is about. */ function throwLexerError(error, line, code) { throw new error("Line ".concat(line, ": ").concat(code)); } /** * Returns whether a given character could be an element of a numeric value. * @param c - The character. * @return Whether the character is numeric. */ function isNumeric(c) { return c == "." || !isNaN(parseInt(c)); } /** * Returns whether a given character is a letter. * @param c - The character. * @param matchCase - Whether to check for a letter that is upper case, lower case, or either. (optional) * @return Whether the character is a letter. */ function isLetter(c, matchCase) { switch (matchCase) { case "upper": return /^[A-Z]$/.test(c); case "lower": return /^[a-z]$/.test(c); default: return /^[A-Za-z]$/.test(c); } } /** * Returns whether a given character is unicode. * @param c - The character. * @param excludePi - Whether to exclude the Pi symbol from consideration. * @return - Whether the given character is valid unicode. */ function isUnicode(c, excludePi) { var isBasicUnicode = /^\u0000-\u00ff/.test(c); switch (excludePi) { case true: return isBasicUnicode && c !== "\u03C0"; case false: return isBasicUnicode; default: return isBasicUnicode; } } /** * Returns whether a given character is alphanumeric. * @param c - The character. * @return Whether the character is alphanumeric. */ function isAlpha(c) { return /^[0-9a-zA-Z]+$/.test(c); } /** * Returns whether a given character is a newline character. * @param c - The character. * @return Whether the character is a newline. */ function isNewline(c) { return /\n|\r(?!\n)|\u2028|\u2029|\r\n/.test(c); } /** * OpenQASM 2.0 Lexical Analyzer * * A simpler lexer implementation focused on the core quantum circuit description * features of OpenQASM 2.0. This lexer handles the essential constructs needed * for basic quantum programming without the complexity of classical programming * language features. * * @example Basic OpenQASM 2.0 tokenization * ```typescript * const source = ` * OPENQASM 2.0; * include "qelib1.inc"; * qreg q[2]; * creg c[2]; * h q[0]; * cx q[0],q[1]; * measure q -> c; * `; * * const lexer = new Lexer(source); * const tokens = lexer.lex(); * ``` */ var Lexer = /** @class */ (function () { /** * Creates a lexer. * @param input - The string to lex. * @param cursor - The starting cursor position. */ function Lexer(input, cursor) { if (cursor === void 0) { cursor = 0; } var _this = this; /** * Verifies that all appropriate lines end with a semicolon. * @return A tuple of the status and if False, returns the problematic line. */ this.verifyInput = function () { var lines = _this.input.split(/\n|\r(?!\n)|\u2028|\u2029|\r\n/g); for (var i = 0; i < lines.length; i++) { if (!lines[i].startsWith("//") && !(lines[i].length == 0) && !lines[i].includes("gate") && !(lines[i].trim() == "{" || lines[i].trim() == "}") && !lines[i].includes(";")) { return [false, i + 1, lines[i]]; } } return [true, null, null]; }; /** * Calling this method lexes the code represented by the provided string. * @return An array of tokens and their corresponding values. */ this.lex = function () { var tokens = []; var token; var verifyInputResult = _this.verifyInput(); if (!verifyInputResult[0]) { throwLexerError(errors_1.MissingSemicolonError, verifyInputResult[1], verifyInputResult[2]); } while (_this.cursor < _this.input.length) { token = _this.nextToken(); if (token) { tokens.push(token); } } return tokens; }; /** * Reads a character and advances the cursor. * @param num - Optional cursor position modifier. */ this.readChar = function (num) { if (num === void 0) { num = 1; } _this.cursor += num; return _this.input[_this.cursor - num]; }; /** * Advances the cusor past the next comment. */ this.skipComment = function () { var char = ""; while (!isNewline(char)) { char = _this.readChar(); } }; /** * Determines whether the next character to process equals a given character. * @param c - The given character. * @return Whether the next character equals the given character. */ this.peekEq = function (c) { return _this.peek() == c; }; /** * Reads a character without advancing the cursor. * @param index - Optional peek position offset. */ this.peek = function () { return _this.input[_this.cursor]; }; /** * Reads a numeric value. * @return The numeric value as a string. */ this.readNumeric = function () { var num = ""; while (isNumeric(_this.peek())) { num += _this.readChar(); } return num; }; /** * Reads an identifier. * @return The identifier as a string. */ this.readIdentifier = function () { var id = ""; var next = _this.peek(); while (isAlpha(next) || next == "_" || isUnicode(next)) { id += _this.readChar(); next = _this.peek(); } return id; }; /** * Reads a string literal. * @param terminator - The literal's termination character. * @return The literal as a string. */ this.readStringLiteral = function (terminator) { var lit = ""; var char = ""; while (!(terminator == char)) { char = _this.readChar(); lit += char; } return lit; }; /** * Advances the cusor past the next block of whitespace. */ this.skipWhitespace = function () { while (" \t\n\r\v".indexOf(_this.peek()) > -1) { _this.cursor += 1; } return null; }; /** * Lexes the next token. * @return The next token and its corresponding value. */ this.nextToken = function () { _this.skipWhitespace(); if (_this.cursor == _this.input.length) { return [token_1.Token.EndOfFile]; } var char = _this.peek(); _this.readChar(); switch (char) { case "=": if (_this.peekEq("=")) { _this.readChar(); return [token_1.Token.Equals]; } else { throwLexerError(errors_1.BadEqualsError, _this.getLineNumber(_this.cursor), _this.getCurrentLine(_this.cursor)); break; } case "-": if (_this.peekEq(">")) { _this.readChar(); return [token_1.Token.Arrow]; } else { return [token_1.Token.Minus]; } case "+": return [token_1.Token.Plus]; case "*": return [token_1.Token.Times]; case "^": return [token_1.Token.Power]; case ";": return [token_1.Token.Semicolon]; case ",": return [token_1.Token.Comma]; case "(": return [token_1.Token.LParen]; case "[": return [token_1.Token.LSParen]; case "{": return [token_1.Token.LCParen]; case ")": return [token_1.Token.RParen]; case "]": return [token_1.Token.RSParen]; case "}": return [token_1.Token.RCParen]; case "/": if (_this.peekEq("/")) { _this.skipComment(); return; } else { return [token_1.Token.Divide]; } case "g": if (_this.input[_this.cursor] == "a" && _this.input[_this.cursor + 1] == "t" && _this.input[_this.cursor + 2] == "e") { _this.readChar(3); return [token_1.Token.Gate]; } { var literal = char + _this.readIdentifier(); return [(0, token_1.lookup)(literal), literal]; } case "q": if (_this.input[_this.cursor] == "r" && _this.input[_this.cursor + 1] == "e" && _this.input[_this.cursor + 2] == "g") { _this.readChar(3); return [token_1.Token.QReg]; } { var qregLit = char + _this.readIdentifier(); return [(0, token_1.lookup)(qregLit), qregLit]; } case "c": if (_this.input[_this.cursor] == "r" && _this.input[_this.cursor + 1] == "e" && _this.input[_this.cursor + 2] == "g") { _this.readChar(3); return [token_1.Token.CReg]; } { var cregLit = char + _this.readIdentifier(); return [(0, token_1.lookup)(cregLit), cregLit]; } case "b": if (_this.input[_this.cursor] == "a" && _this.input[_this.cursor + 1] == "r" && _this.input[_this.cursor + 2] == "r" && _this.input[_this.cursor + 3] == "i" && _this.input[_this.cursor + 4] == "e" && _this.input[_this.cursor + 5] == "r") { _this.readChar(6); return [token_1.Token.Barrier]; } { var barLit = char + _this.readIdentifier(); return [(0, token_1.lookup)(barLit), barLit]; } case "m": if (_this.input[_this.cursor] == "e" && _this.input[_this.cursor + 1] == "a" && _this.input[_this.cursor + 2] == "s" && _this.input[_this.cursor + 3] == "u" && _this.input[_this.cursor + 4] == "r" && _this.input[_this.cursor + 5] == "e") { _this.readChar(6); return [token_1.Token.Measure]; } { var measureLit = char + _this.readIdentifier(); return [(0, token_1.lookup)(measureLit), measureLit]; } case "O": if (_this.input[_this.cursor].toLowerCase() == "p" && _this.input[_this.cursor + 1].toLowerCase() == "e" && _this.input[_this.cursor + 2].toLowerCase() == "n" && _this.input[_this.cursor + 3] == "Q" && _this.input[_this.cursor + 4] == "A" && _this.input[_this.cursor + 5] == "S" && _this.input[_this.cursor + 6] == "M") { _this.readChar(7); var offset = 0; while (_this.cursor + offset < _this.input.length && " \t".indexOf(_this.input[_this.cursor + offset]) > -1) { offset++; } // Read the major version var majorVersion = ""; while (_this.cursor + offset < _this.input.length && !isNaN(parseInt(_this.input[_this.cursor + offset], 10))) { majorVersion += _this.input[_this.cursor + offset]; offset++; } // Attempt to read the minor version var minorVersion = undefined; if (_this.input[_this.cursor + offset] == ".") { offset++; minorVersion = ""; while (_this.cursor + offset < _this.input.length && !isNaN(parseInt(_this.input[_this.cursor + offset], 10))) { minorVersion += _this.input[_this.cursor + offset]; offset++; } } // Parse major and minor versions var major = parseInt(majorVersion, 10); var minor = minorVersion ? parseInt(minorVersion, 10) : undefined; if (major !== 2) { throw new errors_1.UnsupportedOpenQASMVersionError("Unsupported OpenQASM version detected: ".concat(majorVersion, ".").concat(minor !== null && minor !== void 0 ? minor : 0)); } return [token_1.Token.OpenQASM]; } { var openQasmLit = char + _this.readIdentifier(); return [token_1.lookup[openQasmLit], openQasmLit]; } case "i": if (_this.input[_this.cursor] == "n" && _this.input[_this.cursor + 1] == "c" && _this.input[_this.cursor + 2] == "l" && _this.input[_this.cursor + 3] == "u" && _this.input[_this.cursor + 4] == "d" && _this.input[_this.cursor + 5] == "e") { _this.readChar(6); return [token_1.Token.Include]; } { var includeLit = char + _this.readIdentifier(); return [(0, token_1.lookup)(includeLit), includeLit]; } case "o": if (_this.input[_this.cursor] == "p" && _this.input[_this.cursor + 1] == "a" && _this.input[_this.cursor + 2] == "q" && _this.input[_this.cursor + 3] == "u" && _this.input[_this.cursor + 4] == "e") { _this.readChar(5); return [token_1.Token.Opaque]; } { var opaqueLit = char + _this.readIdentifier(); return [(0, token_1.lookup)(opaqueLit), opaqueLit]; } case '"': { var stringLiteral = char + _this.readStringLiteral('"'); return [token_1.Token.String, stringLiteral]; } case "’": { var singleStringLiteral = char + _this.readStringLiteral("’"); return [token_1.Token.String, singleStringLiteral]; } default: if (isLetter(char)) { var literal = char + _this.readIdentifier(); return [(0, token_1.lookup)(literal), literal]; } else if (isNumeric(char)) { var num = char + _this.readNumeric(); if (num.indexOf(".") != -1) { return [token_1.Token.Real, parseFloat(num)]; } else { return [token_1.Token.NNInteger, parseFloat(num)]; } } else { return [token_1.Token.Illegal]; } } }; /** * Returns the line number where the current cursor is located. * @param cursor - The current cursor position in the input string. * @return The line number. */ this.getLineNumber = function (cursor) { return _this.input .substring(0, cursor) .split(/\n|\r(?!\n)|\u2028|\u2029|\r\n/).length; }; /** * Returns the current line of code where the cursor is located. * @param cursor - The current cursor position in the input string. * @return The specific line where the cursor is located. */ this.getCurrentLine = function (cursor) { var lines = _this.input.split(/\n|\r(?!\n)|\u2028|\u2029|\r\n/); var lineNumber = _this.getLineNumber(cursor); return lines[lineNumber - 1]; }; this.input = input; this.cursor = cursor; } return Lexer; }()); exports.default = Lexer;