qasm-ts
Version:
QASM, the low-level programming language for quantum circuit specification, implemented in TypeScript.
490 lines (489 loc) • 19.6 kB
JavaScript
"use strict";
/**
* OpenQASM 2.0 Lexical Analyzer
*
* This module implements the lexer for OpenQASM 2.0, which provides a simpler
* token set compared to OpenQASM 3.0. The lexer focuses on basic quantum circuit
* constructs without the advanced classical programming features of version 3.0.
*
* Key characteristics of OpenQASM 2.0 lexing:
* - **Limited token set**: Basic quantum and classical registers only
* - **Simple operators**: Basic arithmetic and comparison operators
* - **No control flow**: No tokens for loops, conditionals, or functions
* - **Gate-focused**: Emphasis on gate definitions and applications
* - **Mathematical functions**: Built-in math functions (sin, cos, etc.)
*
* Supported constructs:
* - Quantum registers (`qreg`) and classical registers (`creg`)
* - Gate definitions and applications
* - Measurements with arrow notation (`->`)
* - Basic arithmetic expressions for gate parameters
* - Include statements for library files
*
* @module
*
* @example OpenQASM 2.0 lexing
* ```typescript
* const lexer = new Lexer('qreg q[2]; h q[0]; measure q -> c;');
* const tokens = lexer.lex();
* // Produces tokens for register declaration, gate, and measurement
* ```
*/
Object.defineProperty(exports, "__esModule", { value: true });
var token_1 = require("./token");
var errors_1 = require("../errors");
/**
* Handles throwing lexer errors with basic stack trace.
* @param error - The error to throw.
* @param number - The line number in the source code.
* @param code - The source code that the error is about.
*/
function throwLexerError(error, line, code) {
throw new error("Line ".concat(line, ": ").concat(code));
}
/**
* Returns whether a given character could be an element of a numeric value.
* @param c - The character.
* @return Whether the character is numeric.
*/
function isNumeric(c) {
return c == "." || !isNaN(parseInt(c));
}
/**
* Returns whether a given character is a letter.
* @param c - The character.
* @param matchCase - Whether to check for a letter that is upper case, lower case, or either. (optional)
* @return Whether the character is a letter.
*/
function isLetter(c, matchCase) {
switch (matchCase) {
case "upper":
return /^[A-Z]$/.test(c);
case "lower":
return /^[a-z]$/.test(c);
default:
return /^[A-Za-z]$/.test(c);
}
}
/**
* Returns whether a given character is unicode.
* @param c - The character.
* @param excludePi - Whether to exclude the Pi symbol from consideration.
* @return - Whether the given character is valid unicode.
*/
function isUnicode(c, excludePi) {
var isBasicUnicode = /^\u0000-\u00ff/.test(c);
switch (excludePi) {
case true:
return isBasicUnicode && c !== "\u03C0";
case false:
return isBasicUnicode;
default:
return isBasicUnicode;
}
}
/**
* Returns whether a given character is alphanumeric.
* @param c - The character.
* @return Whether the character is alphanumeric.
*/
function isAlpha(c) {
return /^[0-9a-zA-Z]+$/.test(c);
}
/**
* Returns whether a given character is a newline character.
* @param c - The character.
* @return Whether the character is a newline.
*/
function isNewline(c) {
return /\n|\r(?!\n)|\u2028|\u2029|\r\n/.test(c);
}
/**
* OpenQASM 2.0 Lexical Analyzer
*
* A simpler lexer implementation focused on the core quantum circuit description
* features of OpenQASM 2.0. This lexer handles the essential constructs needed
* for basic quantum programming without the complexity of classical programming
* language features.
*
* @example Basic OpenQASM 2.0 tokenization
* ```typescript
* const source = `
* OPENQASM 2.0;
* include "qelib1.inc";
* qreg q[2];
* creg c[2];
* h q[0];
* cx q[0],q[1];
* measure q -> c;
* `;
*
* const lexer = new Lexer(source);
* const tokens = lexer.lex();
* ```
*/
var Lexer = /** @class */ (function () {
/**
* Creates a lexer.
* @param input - The string to lex.
* @param cursor - The starting cursor position.
*/
function Lexer(input, cursor) {
if (cursor === void 0) { cursor = 0; }
var _this = this;
/**
* Verifies that all appropriate lines end with a semicolon.
* @return A tuple of the status and if False, returns the problematic line.
*/
this.verifyInput = function () {
var lines = _this.input.split(/\n|\r(?!\n)|\u2028|\u2029|\r\n/g);
for (var i = 0; i < lines.length; i++) {
if (!lines[i].startsWith("//") &&
!(lines[i].length == 0) &&
!lines[i].includes("gate") &&
!(lines[i].trim() == "{" || lines[i].trim() == "}") &&
!lines[i].includes(";")) {
return [false, i + 1, lines[i]];
}
}
return [true, null, null];
};
/**
* Calling this method lexes the code represented by the provided string.
* @return An array of tokens and their corresponding values.
*/
this.lex = function () {
var tokens = [];
var token;
var verifyInputResult = _this.verifyInput();
if (!verifyInputResult[0]) {
throwLexerError(errors_1.MissingSemicolonError, verifyInputResult[1], verifyInputResult[2]);
}
while (_this.cursor < _this.input.length) {
token = _this.nextToken();
if (token) {
tokens.push(token);
}
}
return tokens;
};
/**
* Reads a character and advances the cursor.
* @param num - Optional cursor position modifier.
*/
this.readChar = function (num) {
if (num === void 0) { num = 1; }
_this.cursor += num;
return _this.input[_this.cursor - num];
};
/**
* Advances the cusor past the next comment.
*/
this.skipComment = function () {
var char = "";
while (!isNewline(char)) {
char = _this.readChar();
}
};
/**
* Determines whether the next character to process equals a given character.
* @param c - The given character.
* @return Whether the next character equals the given character.
*/
this.peekEq = function (c) { return _this.peek() == c; };
/**
* Reads a character without advancing the cursor.
* @param index - Optional peek position offset.
*/
this.peek = function () { return _this.input[_this.cursor]; };
/**
* Reads a numeric value.
* @return The numeric value as a string.
*/
this.readNumeric = function () {
var num = "";
while (isNumeric(_this.peek())) {
num += _this.readChar();
}
return num;
};
/**
* Reads an identifier.
* @return The identifier as a string.
*/
this.readIdentifier = function () {
var id = "";
var next = _this.peek();
while (isAlpha(next) || next == "_" || isUnicode(next)) {
id += _this.readChar();
next = _this.peek();
}
return id;
};
/**
* Reads a string literal.
* @param terminator - The literal's termination character.
* @return The literal as a string.
*/
this.readStringLiteral = function (terminator) {
var lit = "";
var char = "";
while (!(terminator == char)) {
char = _this.readChar();
lit += char;
}
return lit;
};
/**
* Advances the cusor past the next block of whitespace.
*/
this.skipWhitespace = function () {
while (" \t\n\r\v".indexOf(_this.peek()) > -1) {
_this.cursor += 1;
}
return null;
};
/**
* Lexes the next token.
* @return The next token and its corresponding value.
*/
this.nextToken = function () {
_this.skipWhitespace();
if (_this.cursor == _this.input.length) {
return [token_1.Token.EndOfFile];
}
var char = _this.peek();
_this.readChar();
switch (char) {
case "=":
if (_this.peekEq("=")) {
_this.readChar();
return [token_1.Token.Equals];
}
else {
throwLexerError(errors_1.BadEqualsError, _this.getLineNumber(_this.cursor), _this.getCurrentLine(_this.cursor));
break;
}
case "-":
if (_this.peekEq(">")) {
_this.readChar();
return [token_1.Token.Arrow];
}
else {
return [token_1.Token.Minus];
}
case "+":
return [token_1.Token.Plus];
case "*":
return [token_1.Token.Times];
case "^":
return [token_1.Token.Power];
case ";":
return [token_1.Token.Semicolon];
case ",":
return [token_1.Token.Comma];
case "(":
return [token_1.Token.LParen];
case "[":
return [token_1.Token.LSParen];
case "{":
return [token_1.Token.LCParen];
case ")":
return [token_1.Token.RParen];
case "]":
return [token_1.Token.RSParen];
case "}":
return [token_1.Token.RCParen];
case "/":
if (_this.peekEq("/")) {
_this.skipComment();
return;
}
else {
return [token_1.Token.Divide];
}
case "g":
if (_this.input[_this.cursor] == "a" &&
_this.input[_this.cursor + 1] == "t" &&
_this.input[_this.cursor + 2] == "e") {
_this.readChar(3);
return [token_1.Token.Gate];
}
{
var literal = char + _this.readIdentifier();
return [(0, token_1.lookup)(literal), literal];
}
case "q":
if (_this.input[_this.cursor] == "r" &&
_this.input[_this.cursor + 1] == "e" &&
_this.input[_this.cursor + 2] == "g") {
_this.readChar(3);
return [token_1.Token.QReg];
}
{
var qregLit = char + _this.readIdentifier();
return [(0, token_1.lookup)(qregLit), qregLit];
}
case "c":
if (_this.input[_this.cursor] == "r" &&
_this.input[_this.cursor + 1] == "e" &&
_this.input[_this.cursor + 2] == "g") {
_this.readChar(3);
return [token_1.Token.CReg];
}
{
var cregLit = char + _this.readIdentifier();
return [(0, token_1.lookup)(cregLit), cregLit];
}
case "b":
if (_this.input[_this.cursor] == "a" &&
_this.input[_this.cursor + 1] == "r" &&
_this.input[_this.cursor + 2] == "r" &&
_this.input[_this.cursor + 3] == "i" &&
_this.input[_this.cursor + 4] == "e" &&
_this.input[_this.cursor + 5] == "r") {
_this.readChar(6);
return [token_1.Token.Barrier];
}
{
var barLit = char + _this.readIdentifier();
return [(0, token_1.lookup)(barLit), barLit];
}
case "m":
if (_this.input[_this.cursor] == "e" &&
_this.input[_this.cursor + 1] == "a" &&
_this.input[_this.cursor + 2] == "s" &&
_this.input[_this.cursor + 3] == "u" &&
_this.input[_this.cursor + 4] == "r" &&
_this.input[_this.cursor + 5] == "e") {
_this.readChar(6);
return [token_1.Token.Measure];
}
{
var measureLit = char + _this.readIdentifier();
return [(0, token_1.lookup)(measureLit), measureLit];
}
case "O":
if (_this.input[_this.cursor].toLowerCase() == "p" &&
_this.input[_this.cursor + 1].toLowerCase() == "e" &&
_this.input[_this.cursor + 2].toLowerCase() == "n" &&
_this.input[_this.cursor + 3] == "Q" &&
_this.input[_this.cursor + 4] == "A" &&
_this.input[_this.cursor + 5] == "S" &&
_this.input[_this.cursor + 6] == "M") {
_this.readChar(7);
var offset = 0;
while (_this.cursor + offset < _this.input.length &&
" \t".indexOf(_this.input[_this.cursor + offset]) > -1) {
offset++;
}
// Read the major version
var majorVersion = "";
while (_this.cursor + offset < _this.input.length &&
!isNaN(parseInt(_this.input[_this.cursor + offset], 10))) {
majorVersion += _this.input[_this.cursor + offset];
offset++;
}
// Attempt to read the minor version
var minorVersion = undefined;
if (_this.input[_this.cursor + offset] == ".") {
offset++;
minorVersion = "";
while (_this.cursor + offset < _this.input.length &&
!isNaN(parseInt(_this.input[_this.cursor + offset], 10))) {
minorVersion += _this.input[_this.cursor + offset];
offset++;
}
}
// Parse major and minor versions
var major = parseInt(majorVersion, 10);
var minor = minorVersion ? parseInt(minorVersion, 10) : undefined;
if (major !== 2) {
throw new errors_1.UnsupportedOpenQASMVersionError("Unsupported OpenQASM version detected: ".concat(majorVersion, ".").concat(minor !== null && minor !== void 0 ? minor : 0));
}
return [token_1.Token.OpenQASM];
}
{
var openQasmLit = char + _this.readIdentifier();
return [token_1.lookup[openQasmLit], openQasmLit];
}
case "i":
if (_this.input[_this.cursor] == "n" &&
_this.input[_this.cursor + 1] == "c" &&
_this.input[_this.cursor + 2] == "l" &&
_this.input[_this.cursor + 3] == "u" &&
_this.input[_this.cursor + 4] == "d" &&
_this.input[_this.cursor + 5] == "e") {
_this.readChar(6);
return [token_1.Token.Include];
}
{
var includeLit = char + _this.readIdentifier();
return [(0, token_1.lookup)(includeLit), includeLit];
}
case "o":
if (_this.input[_this.cursor] == "p" &&
_this.input[_this.cursor + 1] == "a" &&
_this.input[_this.cursor + 2] == "q" &&
_this.input[_this.cursor + 3] == "u" &&
_this.input[_this.cursor + 4] == "e") {
_this.readChar(5);
return [token_1.Token.Opaque];
}
{
var opaqueLit = char + _this.readIdentifier();
return [(0, token_1.lookup)(opaqueLit), opaqueLit];
}
case '"': {
var stringLiteral = char + _this.readStringLiteral('"');
return [token_1.Token.String, stringLiteral];
}
case "’": {
var singleStringLiteral = char + _this.readStringLiteral("’");
return [token_1.Token.String, singleStringLiteral];
}
default:
if (isLetter(char)) {
var literal = char + _this.readIdentifier();
return [(0, token_1.lookup)(literal), literal];
}
else if (isNumeric(char)) {
var num = char + _this.readNumeric();
if (num.indexOf(".") != -1) {
return [token_1.Token.Real, parseFloat(num)];
}
else {
return [token_1.Token.NNInteger, parseFloat(num)];
}
}
else {
return [token_1.Token.Illegal];
}
}
};
/**
* Returns the line number where the current cursor is located.
* @param cursor - The current cursor position in the input string.
* @return The line number.
*/
this.getLineNumber = function (cursor) {
return _this.input
.substring(0, cursor)
.split(/\n|\r(?!\n)|\u2028|\u2029|\r\n/).length;
};
/**
* Returns the current line of code where the cursor is located.
* @param cursor - The current cursor position in the input string.
* @return The specific line where the cursor is located.
*/
this.getCurrentLine = function (cursor) {
var lines = _this.input.split(/\n|\r(?!\n)|\u2028|\u2029|\r\n/);
var lineNumber = _this.getLineNumber(cursor);
return lines[lineNumber - 1];
};
this.input = input;
this.cursor = cursor;
}
return Lexer;
}());
exports.default = Lexer;