UNPKG

antlr4ts

Version:

ANTLR 4 runtime for JavaScript written in Typescript

133 lines 6.33 kB
"use strict"; /*! * Copyright 2016 The ANTLR Project. All rights reserved. * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.CharStreams = void 0; const CodePointBuffer_1 = require("./CodePointBuffer"); const CodePointCharStream_1 = require("./CodePointCharStream"); const IntStream_1 = require("./IntStream"); // const DEFAULT_BUFFER_SIZE: number = 4096; /** This class represents the primary interface for creating {@link CharStream}s * from a variety of sources as of 4.7. The motivation was to support * Unicode code points > U+FFFF. {@link ANTLRInputStream} and * {@link ANTLRFileStream} are now deprecated in favor of the streams created * by this interface. * * DEPRECATED: {@code new ANTLRFileStream("myinputfile")} * NEW: {@code CharStreams.fromFileName("myinputfile")} * * WARNING: If you use both the deprecated and the new streams, you will see * a nontrivial performance degradation. This speed hit is because the * {@link Lexer}'s internal code goes from a monomorphic to megamorphic * dynamic dispatch to get characters from the input stream. Java's * on-the-fly compiler (JIT) is unable to perform the same optimizations * so stick with either the old or the new streams, if performance is * a primary concern. See the extreme debugging and spelunking * needed to identify this issue in our timing rig: * * https://github.com/antlr/antlr4/pull/1781 * * The ANTLR character streams still buffer all the input when you create * the stream, as they have done for ~20 years. If you need unbuffered * access, please note that it becomes challenging to create * parse trees. The parse tree has to point to tokens which will either * point into a stale location in an unbuffered stream or you have to copy * the characters out of the buffer into the token. That defeats the purpose * of unbuffered input. Per the ANTLR book, unbuffered streams are primarily * useful for processing infinite streams *during the parse.* * * The new streams also use 8-bit buffers when possible so this new * interface supports character streams that use half as much memory * as the old {@link ANTLRFileStream}, which assumed 16-bit characters. * * A big shout out to Ben Hamilton (github bhamiltoncx) for his superhuman * efforts across all targets to get true Unicode 3.1 support for U+10FFFF. * * @since 4.7 */ var CharStreams; (function (CharStreams) { // /** // * Creates a {@link CharStream} given a path to a UTF-8 // * encoded file on disk. // * // * Reads the entire contents of the file into the result before returning. // */ // export function fromFile(file: File): CharStream; // export function fromFile(file: File, charset: Charset): CharStream; // export function fromFile(file: File, charset?: Charset): CharStream { // if (charset === undefined) { // charset = Charset.forName("UTF-8"); // } function fromString(s, sourceName) { if (sourceName === undefined || sourceName.length === 0) { sourceName = IntStream_1.IntStream.UNKNOWN_SOURCE_NAME; } // Initial guess assumes no code points > U+FFFF: one code // point for each code unit in the string let codePointBufferBuilder = CodePointBuffer_1.CodePointBuffer.builder(s.length); // TODO: CharBuffer.wrap(String) rightfully returns a read-only buffer // which doesn't expose its array, so we make a copy. let cb = new Uint16Array(s.length); for (let i = 0; i < s.length; i++) { cb[i] = s.charCodeAt(i); } codePointBufferBuilder.append(cb); return CodePointCharStream_1.CodePointCharStream.fromBuffer(codePointBufferBuilder.build(), sourceName); } CharStreams.fromString = fromString; // export function bufferFromChannel( // channel: ReadableByteChannel, // charset: Charset, // bufferSize: number, // decodingErrorAction: CodingErrorAction, // inputSize: number): CodePointBuffer { // try { // let utf8BytesIn: Uint8Array = new Uint8Array(bufferSize); // let utf16CodeUnitsOut: Uint16Array = new Uint16Array(bufferSize); // if (inputSize === -1) { // inputSize = bufferSize; // } else if (inputSize > Integer.MAX_VALUE) { // // ByteBuffer et al don't support long sizes // throw new RangeError(`inputSize ${inputSize} larger than max ${Integer.MAX_VALUE}`); // } // let codePointBufferBuilder: CodePointBuffer.Builder = CodePointBuffer.builder(inputSize); // let decoder: CharsetDecoder = charset // .newDecoder() // .onMalformedInput(decodingErrorAction) // .onUnmappableCharacter(decodingErrorAction); // let endOfInput: boolean = false; // while (!endOfInput) { // let bytesRead: number = channel.read(utf8BytesIn); // endOfInput = (bytesRead === -1); // utf8BytesIn.flip(); // let result: CoderResult = decoder.decode( // utf8BytesIn, // utf16CodeUnitsOut, // endOfInput); // if (result.isError() && decodingErrorAction === CodingErrorAction.REPORT) { // result.throwException(); // } // utf16CodeUnitsOut.flip(); // codePointBufferBuilder.append(utf16CodeUnitsOut); // utf8BytesIn.compact(); // utf16CodeUnitsOut.compact(); // } // // Handle any bytes at the end of the file which need to // // be represented as errors or substitution characters. // let flushResult: CoderResult = decoder.flush(utf16CodeUnitsOut); // if (flushResult.isError() && decodingErrorAction === CodingErrorAction.REPORT) { // flushResult.throwException(); // } // utf16CodeUnitsOut.flip(); // codePointBufferBuilder.append(utf16CodeUnitsOut); // return codePointBufferBuilder.build(); // } // finally { // channel.close(); // } // } })(CharStreams = exports.CharStreams || (exports.CharStreams = {})); //# sourceMappingURL=CharStreams.js.map