UNPKG

rhombic

Version:

SQL parsing, lineage extraction and manipulation

889 lines 29.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.parseFilter = exports.parseSql = exports.parser = exports.SqlLexer = void 0; const chevrotain_1 = require("chevrotain"); const matchFunctionName_1 = require("./utils/matchFunctionName"); const Identifier = chevrotain_1.createToken({ name: "Identifier", pattern: /[a-zA-Z][\w]*|"[^"]*"/ }); const FunctionIdentifier = chevrotain_1.createToken({ name: "FunctionIdentifier", pattern: matchFunctionName_1.matchFunctionName, line_breaks: false }); // We specify the "longer_alt" property to resolve keywords vs identifiers ambiguity. // See: https://github.com/SAP/chevrotain/blob/master/examples/lexer/keywords_vs_identifiers/keywords_vs_identifiers.js const Select = chevrotain_1.createToken({ name: "Select", pattern: /SELECT/i, longer_alt: Identifier }); const Cast = chevrotain_1.createToken({ name: "Cast", pattern: /CAST/i, longer_alt: Identifier }); const SqlTypeName = chevrotain_1.createToken({ name: "SqlTypeName", pattern: /CHAR(ACTER)?( VARYING)?|VARCHAR|DATE|TIME(STAMP)?|CHARACTER SET|GEOMETRY|DEC(IMAL)?|NUMERIC|INT(EGER)?|BOOLEAN|BINARY( VARYING)?|VARBINARY|TINYINT|SMALLINT|BIGINT|REAL|DOUBLE|FLOAT|ANY/i, longer_alt: Identifier }); const CollectionTypeName = chevrotain_1.createToken({ name: "CollectionTypeName", pattern: /ARRAY|MULTISET/i, longer_alt: Identifier }); const OrderBy = chevrotain_1.createToken({ name: "OrderBy", pattern: /ORDER BY/i, longer_alt: Identifier }); const Asc = chevrotain_1.createToken({ name: "Asc", pattern: /ASC/i, longer_alt: Identifier }); const Desc = chevrotain_1.createToken({ name: "Desc", pattern: /DESC/i, longer_alt: Identifier }); const Nulls = chevrotain_1.createToken({ name: "Nulls", pattern: /NULLS/i, longer_alt: Identifier }); const First = chevrotain_1.createToken({ name: "First", pattern: /FIRST/i, longer_alt: Identifier }); const Last = chevrotain_1.createToken({ name: "Last", pattern: /LAST/i, longer_alt: Identifier }); const From = chevrotain_1.createToken({ name: "From", pattern: /FROM/i, longer_alt: Identifier }); const Where = chevrotain_1.createToken({ name: "Where", pattern: /WHERE/i, longer_alt: Identifier }); const Group = chevrotain_1.createToken({ name: "Group", pattern: /GROUP/i, longer_alt: Identifier }); const Cube = chevrotain_1.createToken({ name: "Cube", pattern: /CUBE/i, longer_alt: Identifier }); const Rollup = chevrotain_1.createToken({ name: "Rollup", pattern: /ROLLUP/i, longer_alt: Identifier }); const By = chevrotain_1.createToken({ name: "By", pattern: /BY/i, longer_alt: Identifier }); const Natural = chevrotain_1.createToken({ name: "Natural", pattern: /NATURAL/i, longer_alt: Identifier }); const Left = chevrotain_1.createToken({ name: "Left", pattern: /LEFT/i, longer_alt: Identifier }); const Right = chevrotain_1.createToken({ name: "Right", pattern: /RIGHT/i, longer_alt: Identifier }); const Full = chevrotain_1.createToken({ name: "Full", pattern: /FULL/i, longer_alt: Identifier }); const Outer = chevrotain_1.createToken({ name: "Outer", pattern: /OUTER/i, longer_alt: Identifier }); const Inner = chevrotain_1.createToken({ name: "Inner", pattern: /INNER/i, longer_alt: Identifier }); const Cross = chevrotain_1.createToken({ name: "Cross", pattern: /CROSS/i, longer_alt: Identifier }); const Apply = chevrotain_1.createToken({ name: "Apply", pattern: /APPLY/i, longer_alt: Identifier }); const Join = chevrotain_1.createToken({ name: "Join", pattern: /JOIN/i, longer_alt: Identifier }); const On = chevrotain_1.createToken({ name: "On", pattern: /ON/i, longer_alt: Identifier }); const Using = chevrotain_1.createToken({ name: "Using", pattern: /USING/i, longer_alt: Identifier }); const Values = chevrotain_1.createToken({ name: "Values", pattern: /VALUES/i, longer_alt: Identifier }); const All = chevrotain_1.createToken({ name: "All", pattern: /ALL/i, longer_alt: Identifier }); const Distinct = chevrotain_1.createToken({ name: "Distinct", pattern: /DISTINCT/i, longer_alt: Identifier }); const Stream = chevrotain_1.createToken({ name: "Stream", pattern: /STREAM/i, longer_alt: Identifier }); const And = chevrotain_1.createToken({ name: "And", pattern: /AND/i, longer_alt: Identifier }); const Or = chevrotain_1.createToken({ name: "Or", pattern: /OR/i, longer_alt: Identifier }); const As = chevrotain_1.createToken({ name: "As", pattern: /AS/i, longer_alt: Identifier }); const IsNull = chevrotain_1.createToken({ name: "IsNull", pattern: /IS NULL/i, longer_alt: Identifier }); const IsNotNull = chevrotain_1.createToken({ name: "IsNotNull", pattern: /IS NOT NULL/i, longer_alt: Identifier }); const Limit = chevrotain_1.createToken({ name: "Limit", pattern: /LIMIT/i, longer_alt: Identifier }); const Null = chevrotain_1.createToken({ name: "Null", pattern: /null/ }); const Asterisk = chevrotain_1.createToken({ name: "Asterisk", pattern: /\*/ }); const Comma = chevrotain_1.createToken({ name: "Comma", pattern: /,/ }); const Period = chevrotain_1.createToken({ name: "Period", pattern: /\./ }); const LSquare = chevrotain_1.createToken({ name: "LSquare", pattern: /\[/ }); const RSquare = chevrotain_1.createToken({ name: "RSquare", pattern: /]/ }); const LParen = chevrotain_1.createToken({ name: "LParen", pattern: /\(/ }); const RParen = chevrotain_1.createToken({ name: "RParen", pattern: /\)/ }); const Colon = chevrotain_1.createToken({ name: "Colon", pattern: /:/ }); const SemiColon = chevrotain_1.createToken({ name: "SemiColon", pattern: /;/ }); const BinaryOperator = chevrotain_1.createToken({ name: "BinaryOperator", pattern: /=|>=?|<=?|\!=|LIKE/i, longer_alt: Identifier }); const MultivalOperator = chevrotain_1.createToken({ name: "MultivalOperator", pattern: /NOT IN|IN/i, longer_alt: Identifier }); const BooleanValue = chevrotain_1.createToken({ name: "BooleanValue", pattern: /TRUE|FALSE/i, longer_alt: Identifier }); const IntegerValue = chevrotain_1.createToken({ name: "IntegerValue", pattern: /0|[1-9]\d*/ }); const StringValue = chevrotain_1.createToken({ name: "StringValue", pattern: /((`[^`]*(`))+)|((\[[^\]]*(\]))(\][^\]]*(\]))*)|(("[^"\\]*(?:\\.[^"\\]*)*("))+)|(('[^'\\]*(?:\\.[^'\\]*)*('))+)|((N'[^N'\\]*(?:\\.[^N'\\]*)*('))+)/ }); const DateValue = chevrotain_1.createToken({ name: "DateValue", pattern: /DATE '\d{4}-((0[1-9])|(1[0-2]))-((0[1-9])|([1-2][0-9])|3[0-1])'/i }); const WhiteSpace = chevrotain_1.createToken({ name: "WhiteSpace", pattern: /\s+/, group: chevrotain_1.Lexer.SKIPPED }); const Comment = chevrotain_1.createToken({ name: "Comment", pattern: /--.*/, group: chevrotain_1.Lexer.SKIPPED }); // note we are placing WhiteSpace first as it is very common thus it will speed up the lexer. const allTokens = [ WhiteSpace, Comment, // "keywords" appear before the Identifier Select, From, Where, Group, Cube, Rollup, By, Natural, Left, Right, Full, Outer, Inner, Cross, Apply, Join, On, Using, Values, And, OrderBy, Or, IsNotNull, IsNull, Nulls, Null, Asc, As, Distinct, All, Stream, FunctionIdentifier, DateValue, SqlTypeName, CollectionTypeName, Cast, Desc, Last, First, Limit, MultivalOperator, BinaryOperator, BooleanValue, // The Identifier must appear after the keywords because all keywords are valid identifiers. Identifier, IntegerValue, StringValue, Asterisk, Colon, SemiColon, LSquare, RSquare, LParen, RParen, Comma, Period ]; // reuse the same lexer instance exports.SqlLexer = new chevrotain_1.Lexer(allTokens); class SqlParser extends chevrotain_1.CstParser { constructor(serializedGrammar) { super(allTokens, { serializedGrammar }); /** * statement: * query */ this.statement = this.RULE("statement", () => { this.OR([{ ALT: () => this.SUBRULE(this.query) }]); }); /** * query: * values * | WITH withItem [ , withItem ]* query * | { * select * | selectWithoutFrom * | query UNION [ ALL | DISTINCT ] query * | query EXCEPT [ ALL | DISTINCT ] query * | query MINUS [ ALL | DISTINCT ] query * | query INTERSECT [ ALL | DISTINCT ] query * } * [ ORDER BY orderItem [, orderItem ]* ] * [ LIMIT [ start, ] { count | ALL } ] * [ OFFSET start { ROW | ROWS } ] * [ FETCH { FIRST | NEXT } [ count ] { ROW | ROWS } ONLY ] */ this.query = this.RULE("query", () => { this.OR([ { ALT: () => this.SUBRULE(this.values) }, { ALT: () => { this.SUBRULE(this.select); this.OPTION(() => { this.SUBRULE(this.orderBy); }); this.OPTION1(() => { this.CONSUME(Limit); this.OR1([{ ALT: () => this.CONSUME(IntegerValue) }, { ALT: () => this.CONSUME(All) }]); }); } } ]); this.OPTION2(() => this.CONSUME(SemiColon)); }); /** * expression: * valueExpression * | null */ this.expression = this.RULE("expression", () => { this.OR([ { ALT: () => this.CONSUME(IntegerValue) }, { ALT: () => this.CONSUME(StringValue) }, { ALT: () => this.CONSUME(Null) }, { ALT: () => { this.CONSUME(LParen); this.MANY_SEP({ SEP: Comma, DEF: () => this.SUBRULE(this.expression) }); this.CONSUME(RParen); } }, { ALT: () => this.SUBRULE(this.columnPrimary) }, { ALT: () => { this.CONSUME(FunctionIdentifier), this.CONSUME1(LParen); this.MANY_SEP1({ SEP: Comma, DEF: () => this.SUBRULE1(this.expression) }); this.CONSUME1(RParen); } }, { ALT: () => this.SUBRULE(this.cast) } ]); }); this.cast = this.RULE("cast", () => { this.CONSUME(Cast); this.CONSUME(LParen); this.SUBRULE(this.expression); this.CONSUME(As); this.SUBRULE(this.type); this.OPTION(() => { this.CONSUME1(LParen); this.CONSUME(IntegerValue); // precision this.OPTION1(() => { this.CONSUME(Comma); this.CONSUME1(IntegerValue); // scale }); this.CONSUME1(RParen); }); this.CONSUME(RParen); }); this.orderBy = this.RULE("orderBy", () => { this.CONSUME(OrderBy); this.AT_LEAST_ONE_SEP({ SEP: Comma, DEF: () => this.SUBRULE(this.orderItem) }); }); /** * type: * typeName * [ collectionsTypeName ]* * * typeName: * sqlTypeName */ this.type = this.RULE("type", () => { this.OR([{ ALT: () => this.CONSUME(SqlTypeName) }]); this.OPTION(() => { this.MANY(() => { this.CONSUME(CollectionTypeName); }); }); }); /** * <value expression> ::= * <numeric value expression> * | <string value expression> * | <datetime value expression> * | <interval value expression> * | <boolean value expression> * | <user-defined type value expression> * | <row value expression> * | <reference value expression> * | <collection value expression> * * https://github.com/ronsavage/SQL/blob/master/sql-2003-2.bnf */ this.valueExpression = this.RULE("valueExpression", () => { this.OR([ { ALT: () => this.CONSUME(IntegerValue) }, { ALT: () => this.CONSUME(StringValue) }, { ALT: () => this.CONSUME(BooleanValue) }, { ALT: () => this.CONSUME(DateValue) } ]); }); this.booleanExpression = this.RULE("booleanExpression", () => { this.OR([ { ALT: () => { this.CONSUME(LParen); this.SUBRULE(this.booleanExpression); this.CONSUME(RParen); } }, { ALT: () => this.SUBRULE1(this.booleanExpressionValue) } ]); this.OPTION(() => { this.OR1([{ ALT: () => this.CONSUME(Or) }, { ALT: () => this.CONSUME(And) }]); this.SUBRULE2(this.booleanExpression); }); }); this.booleanExpressionValue = this.RULE("booleanExpressionValue", () => { this.SUBRULE(this.columnPrimary); this.OR([ { ALT: () => { // Binary operation this.CONSUME(BinaryOperator); this.OR1([ { ALT: () => this.SUBRULE1(this.valueExpression) }, { ALT: () => this.SUBRULE2(this.columnPrimary) } ]); } }, { ALT: () => { // Multival operation this.CONSUME(MultivalOperator); this.CONSUME1(LParen); this.AT_LEAST_ONE_SEP({ SEP: Comma, DEF: () => { this.OR2([ { ALT: () => this.SUBRULE3(this.valueExpression) }, { ALT: () => this.SUBRULE4(this.columnPrimary) } ]); } }); this.CONSUME1(RParen); } }, { ALT: () => { // Unary operation this.OR3([{ ALT: () => this.CONSUME(IsNull) }, { ALT: () => this.CONSUME(IsNotNull) }]); } } ]); }); /** * orderItem: * expression [ ASC | DESC ] [ NULLS FIRST | NULLS LAST ] */ this.orderItem = this.RULE("orderItem", () => { this.SUBRULE(this.expression); this.OPTION(() => { this.OR([{ ALT: () => this.CONSUME(Asc) }, { ALT: () => this.CONSUME(Desc) }]); }); this.OPTION1(() => { this.OR1([ { ALT: () => { this.CONSUME(Nulls); this.CONSUME(First); } }, { ALT: () => { this.CONSUME1(Nulls); this.CONSUME(Last); } } ]); }); }); /** * select: * SELECT [ STREAM ] [ ALL | DISTINCT ] * { projectionItem [, projectionItem ]* } * FROM tableExpression [ AS tableAlias ] * [ WHERE booleanExpression ] * [ GROUP BY { groupItem [, groupItem ]* } ] * [ HAVING booleanExpression ] * [ WINDOW windowName AS windowSpec [, windowName AS windowSpec ]* ] * */ this.select = this.RULE("select", () => { this.CONSUME(Select); this.OPTION(() => this.CONSUME(Stream)); this.OPTION1(() => { this.OR([{ ALT: () => this.CONSUME(All) }, { ALT: () => this.CONSUME(Distinct) }]); }); this.SUBRULE(this.projectionItems); // Everything is wrap into `OPTION` to deal with selectWithoutFrom case this.OPTION3(() => { this.CONSUME(From); this.SUBRULE(this.tableExpression); }); this.OPTION4(() => { this.SUBRULE(this.where); }); this.OPTION5(() => { this.SUBRULE(this.groupBy); }); }); /** * Group by statement */ this.groupBy = this.RULE("groupBy", () => { this.CONSUME(Group); this.CONSUME(By); this.AT_LEAST_ONE_SEP({ SEP: Comma, DEF: () => this.SUBRULE(this.groupItem) }); }); /** * Where statement */ this.where = this.RULE("where", () => { this.CONSUME(Where); this.SUBRULE(this.booleanExpression); }); /** * projectionItems: * projectionItem [, projectionItem ]* */ this.projectionItems = this.RULE("projectionItems", () => { this.AT_LEAST_ONE_SEP({ SEP: Comma, DEF: () => this.SUBRULE(this.projectionItem) }); }); /** * projectionItem: * expression [ [ AS ] columnAlias ] * | tableAlias . * * | * */ this.projectionItem = this.RULE("projectionItem", () => { this.OR([ { ALT: () => { this.OPTION1(() => { this.CONSUME1(Identifier); this.CONSUME(Period); }); this.CONSUME(Asterisk); } }, { ALT: () => { this.SUBRULE(this.expression); this.OPTION(() => { this.CONSUME(As); this.CONSUME(Identifier); }); } } ]); }); /** * tableExpression: * tableReference [, tableReference ]* * | tableExpression [ NATURAL ] [ INNER | (( LEFT | RIGHT | FULL ) [ OUTER ]) ] JOIN tableExpression [ joinCondition ] * | tableExpression CROSS JOIN tableExpression * | tableExpression [ CROSS | OUTER ] APPLY tableExpression */ this.tableExpression = this.RULE("tableExpression", () => { // tableReference [, tableReference ]* this.MANY_SEP({ SEP: Comma, DEF: () => this.SUBRULE(this.tableReference) }); this.OPTION(() => { this.OR([ { // [ NATURAL ] [ INNER | (( LEFT | RIGHT | FULL ) [ OUTER ]) ] JOIN tableExpression [ joinCondition ] ALT: () => { this.OPTION1(() => this.CONSUME(Natural)); this.OPTION2(() => { this.OR1([ { ALT: () => { this.CONSUME(Inner); } }, { ALT: () => { this.OR2([ { ALT: () => this.CONSUME(Left) }, { ALT: () => this.CONSUME(Right) }, { ALT: () => this.CONSUME(Full) } ]); this.OPTION3(() => this.CONSUME(Outer)); } } ]); }); this.CONSUME(Join); this.SUBRULE1(this.tableExpression); this.OPTION4(() => this.SUBRULE2(this.joinCondition)); } }, { // CROSS JOIN tableExpression ALT: () => { this.CONSUME(Cross); this.CONSUME2(Join); this.SUBRULE2(this.tableExpression); } }, { // [ CROSS | OUTER ] APPLY tableExpression ALT: () => { this.OR3([{ ALT: () => this.CONSUME1(Cross) }, { ALT: () => this.CONSUME1(Outer) }]); this.CONSUME(Apply); this.SUBRULE3(this.tableExpression); } } ]); }); }); /** * joinCondition: * ON booleanExpression * | USING '(' column [, column ]* ')' */ this.joinCondition = this.RULE("joinCondition", () => { this.OR([ { ALT: () => { this.CONSUME(On); this.SUBRULE(this.booleanExpression); } }, { ALT: () => { this.CONSUME(Using); this.CONSUME(LParen); this.SUBRULE(this.projectionItems); this.CONSUME(RParen); } } ]); }); /** * tableReference: * tablePrimary * [ FOR SYSTEM_TIME AS OF expression ] * [ matchRecognize ] * [ [ AS ] alias [ '(' columnAlias [, columnAlias ]* ')' ] ] * */ this.tableReference = this.RULE("tableReference", () => { this.SUBRULE(this.tablePrimary); this.OPTION(() => { this.CONSUME(As); this.CONSUME(Identifier); // alias this.OPTION1(() => { this.CONSUME(LParen); this.AT_LEAST_ONE_SEP({ SEP: Comma, DEF: () => this.CONSUME1(Identifier) // columnAlias }); this.CONSUME(RParen); }); }); }); /** * tablePrimary: * [ [ catalogName . ] schemaName . ] tableName * '(' TABLE [ [ catalogName . ] schemaName . ] tableName ')' * | tablePrimary [ EXTEND ] '(' columnDecl [, columnDecl ]* ')' * | [ LATERAL ] '(' query ')' * | UNNEST '(' expression ')' [ WITH ORDINALITY ] * | [ LATERAL ] TABLE '(' [ SPECIFIC ] functionName '(' expression [, expression ]* ')' ')' */ this.tablePrimary = this.RULE("tablePrimary", () => { this.OR([ { ALT: () => { // CatalogName this.OPTION(() => { this.CONSUME1(Identifier); this.CONSUME(Period); }); // schemaName this.OPTION1(() => { this.CONSUME3(Identifier); this.CONSUME2(Period); }); // tableName this.CONSUME4(Identifier); } } ]); }); /** * columnPrimary: * [ [ [ catalogName . ] schemaName . ] tableName . ] columnName */ this.columnPrimary = this.RULE("columnPrimary", () => { // CatalogName this.OPTION(() => { this.CONSUME(Identifier); this.CONSUME(Period); }); // schemaName this.OPTION1(() => { this.CONSUME1(Identifier); this.CONSUME1(Period); }); // tableName this.OPTION2(() => { this.CONSUME2(Identifier); this.CONSUME2(Period); }); // columnName this.CONSUME3(Identifier); }); /** * columnDecl: * column type [ NOT NULL ] */ this.columnDecl = this.RULE("columnDecl", () => { }); /** * values: * VALUES expression [, expression ]* */ this.values = this.RULE("values", () => { this.CONSUME(Values); this.AT_LEAST_ONE_SEP({ SEP: Comma, DEF: () => this.SUBRULE(this.expression) }); }); /** * groupItem: * expression * | '(' ')' * | '(' expression [, expression ]* ')' * | CUBE '(' expression [, expression ]* ')' * | ROLLUP '(' expression [, expression ]* ')' * | GROUPING SETS '(' groupItem [, groupItem ]* ')' */ this.groupItem = this.RULE("groupItem", () => { this.OPTION(() => { this.OR1([{ ALT: () => this.CONSUME(Cube) }, { ALT: () => this.CONSUME1(Rollup) }]); }); this.OPTION1(() => { this.CONSUME2(LParen); }); this.MANY_SEP({ SEP: Comma, DEF: () => this.SUBRULE1(this.expression) }); this.OPTION2(() => { this.CONSUME3(RParen); }); // TODO: Deal with `GROUPING SETS` }); /** * window: * windowName * | windowSpec */ this.window = this.RULE("window", () => { }); /** * windowSpec: * '(' * [ windowName ] * [ ORDER BY orderItem [, orderItem ]* ] * [ PARTITION BY expression [, expression ]* ] * [ * RANGE numericOrIntervalExpression { PRECEDING | FOLLOWING } * | ROWS numericExpression { PRECEDING | FOLLOWING } * ] * ')' */ this.windowSpec = this.RULE("windowSpec", () => { }); this.performSelfAnalysis(); } } // Retrieve the serialized grammar in production (avoid minification issues) let serializedGrammar; if (process.env.NODE_ENV === "production") { try { serializedGrammar = require("./serializedGrammar").serializedGrammar; } catch (err) { throw new Error("The serialized grammar can't be loaded!"); } } // reuse the same parser instance. exports.parser = new SqlParser(serializedGrammar); function parseSql(statement) { const lexResult = exports.SqlLexer.tokenize(statement); // setting a new input will RESET the parser instance's state. exports.parser.input = lexResult.tokens; // ref: https://sap.github.io/chevrotain/docs/guide/concrete_syntax_tree.html#ast-vs-cst // `statement` is our top level rule as entry point const cst = exports.parser.statement(); return { cst, lexErrors: lexResult.errors, parseErrors: exports.parser.errors }; } exports.parseSql = parseSql; function parseFilter(filter) { const lexResult = exports.SqlLexer.tokenize(filter); // setting a new input will RESET the parser instance's state. exports.parser.input = lexResult.tokens; // ref: https://sap.github.io/chevrotain/docs/guide/concrete_syntax_tree.html#ast-vs-cst // `booleanExpression` is our top level rule for a filter const cst = exports.parser.booleanExpression(); return { cst, lexErrors: lexResult.errors, parseErrors: exports.parser.errors }; } exports.parseFilter = parseFilter; //# sourceMappingURL=SqlParser.js.map