grammar NanoSQL;

import Keywords, Lexer;


// A series of one or more commands.  (Not sure if I should allow this to
// produce 0 commands as well. -Donnie)
commands:
      command+ ;

// All commands are required to end with a semicolon.  If we don't have any
// terminator then we don't know when an interactive command is actually
// completely entered.  For example, "SELECT * FROM foo" would parse correctly
// even though the user meant to type "... WHERE b > 5".  By requiring the
// semicolon we avoid this issue.
command:
        commandNoSemicolon ';' ;

// Separate this rule out from the "command" rule so that the visitor
// implementation doesn't have to do anything fancy.
commandNoSemicolon:
        createTableStmt
      | createIndexStmt
      | dropTableStmt
      | dropIndexStmt
      | selectStmt
      | insertStmt
      | updateStmt
      | deleteStmt
      | explainStmt
      | beginTxnStmt
      | commitTxnStmt
      | rollbackTxnStmt
      | analyzeStmt
      | exitStmt
      | crashStmt
      | dumpTableStmt
      | dumpIndexStmt
      | flushStmt
      | verifyStmt
      | optimizeStmt
      | showTableStatsStmt
      | showTablesStmt
      | showPropsStmt
      | setPropStmt
      | showSystemStatsStmt
      ;


//============================================================================
// CREATE TABLE
//
// The rules for CREATE TABLE commands are somewhat involved since they have
// many components and different kinds of constraints that can be specified.
//

createTableStmt:
        CREATE (TEMPORARY)? TABLE (IF NOT EXISTS)? tableName=IDENT '('
        ( tableColDecl | tableConstraint )
        ( ',' ( tableColDecl | tableConstraint ) )*
        ')'
        cmdProperties?
        ;

tableColDecl:
        columnName=IDENT columnType columnConstraint* ;

columnType:
        ( TYPE_INT | TYPE_INTEGER )                 # ColTypeInt
      | TYPE_BIGINT                                 # ColTypeBigInt
      | (TYPE_DECIMAL | TYPE_NUMERIC)
            ('(' precision=INT_LITERAL (',' scale=INT_LITERAL)? ')')?               # ColTypeDecimal
      | TYPE_FLOAT                                  # ColTypeFloat
      | TYPE_DOUBLE                                 # ColTypeDouble
      | (TYPE_CHAR | TYPE_CHARACTER) '(' length=INT_LITERAL ')'                     # ColTypeChar
      | (TYPE_VARCHAR | (TYPE_CHARACTER TYPE_VARYING)) '(' length=INT_LITERAL ')'   # ColTypeVarChar
      | TYPE_DATE                                   # ColTypeDate
      | TYPE_DATETIME                               # ColTypeDateTime
      | TYPE_TIME                                   # ColTypeTime
      | TYPE_TIMESTAMP                              # ColTypeTimestamp
      ;

// Table columns can have a number of constraints, which may optionally be
// named.  Note that column-constraints and table-constraints can be quite
// different, even though they are represented with the same Java class in
// the implementation.
//
// The rule is written with the repeated components so that we can use the
// nifty alternative-naming feature of Antlr4.
columnConstraint:
          (CONSTRAINT constraintName=IDENT)? NOT NULL                   # ColConstraintNotNull
        | (CONSTRAINT constraintName=IDENT)? UNIQUE                     # ColConstraintUnique
        | (CONSTRAINT constraintName=IDENT)? PRIMARY KEY                # ColConstraintPrimaryKey
        | (CONSTRAINT constraintName=IDENT)?
              REFERENCES refTableName=IDENT ('(' refColumnName=IDENT ')')?
              (ON DELETE delOpt=cascadeOption)?
              (ON UPDATE updOpt=cascadeOption)?                         # ColConstraintForeignKey
        ;

// Table columns can have a number of constraints, which may optionally be
// named.  Note that column-constraints and table-constraints can be quite
// different, even though they are represented with the same Java class in
// the implementation.
tableConstraint:
          (CONSTRAINT constraintName=IDENT)? UNIQUE
              '(' columnName+=IDENT (',' columnName+=IDENT)* ')'        # TblConstraintUnique
        | (CONSTRAINT constraintName=IDENT)? PRIMARY KEY
              '(' columnName+=IDENT (',' columnName+=IDENT)* ')'        # TblConstraintPrimaryKey
        | (CONSTRAINT constraintName=IDENT)? FOREIGN KEY
              '(' columnName+=IDENT (',' columnName+=IDENT)* ')'
              REFERENCES refTableName=IDENT ('(' refColumnName+=IDENT (',' refColumnName+=IDENT)* ')')?
              (ON DELETE delOpt=cascadeOption)?
              (ON UPDATE updOpt=cascadeOption)?                         # TblConstraintForeignKey
        ;


cascadeOption:
          RESTRICT  # CascadeOptRestrict
        | CASCADE   # CascadeOptCascade
        | SET NULL  # CascadeOptSetNull
        ;

//============================================================================
// DROP TABLE ...
//

dropTableStmt:
        DROP TABLE (IF EXISTS)? tableName=IDENT ;


//============================================================================
// CREATE INDEX ...
//

createIndexStmt:
        CREATE UNIQUE? INDEX (IF NOT EXISTS)?
        indexName=IDENT ON tableName=IDENT
        '(' columnName+=IDENT (',' columnName+=IDENT)* ')'
        cmdProperties?
        ;


//============================================================================
// DROP INDEX ...
//

dropIndexStmt:
        DROP INDEX (IF EXISTS)? indexName=IDENT ON tableName=IDENT ;


//============================================================================
// Table utility operations:
//   SHOW TABLES
//   ANALYZE table [, table ...]
//   OPTIMIZE table [, table ...]
//   VERIFY table [, table ...]
//   DUMP TABLE table ...
//   DUMP INDEX table.index ...
//

showTablesStmt:
        SHOW TABLES ;

analyzeStmt:
        ANALYZE IDENT ( ',' IDENT )* ;

optimizeStmt:
        OPTIMIZE IDENT ( ',' IDENT )* ;

verifyStmt:
        VERIFY IDENT ( ',' IDENT )* ;

dumpTableStmt:
        DUMP TABLE tableName=IDENT
        ( TO FILE fileName=STRING_LITERAL )?
        ( FORMAT format=STRING_LITERAL )?
        ;

dumpIndexStmt:
        DUMP INDEX tableName=IDENT '.' indexName=IDENT
        ( TO FILE fileName=STRING_LITERAL )?
        ( FORMAT format=STRING_LITERAL )?
        ;

showTableStatsStmt:
        SHOW TABLE tableName=IDENT STATS ;


//============================================================================
// Transaction processing statements
//   START TRANSACTION / BEGIN WORK
//   COMMIT WORK
//   ROLLBACK WORK
//

beginTxnStmt:
        START TRANSACTION | BEGIN WORK? ;

commitTxnStmt:
        COMMIT WORK? ;

rollbackTxnStmt:
        ROLLBACK WORK? ;


//============================================================================
// Other utility statements
//   SHOW PROPERTIES
//   SET PROPERTY '...' = ...
//   SHOW '...' STATS
//   FLUSH
//   CRASH
//   EXIT (or QUIT)
//

showPropsStmt:
        SHOW PROPERTIES (LIKE pattern=STRING_LITERAL)? ;

setPropStmt:
        SET PROPERTY name=STRING_LITERAL '=' expression ;

showSystemStatsStmt:
        SHOW name=STRING_LITERAL STATS ;

flushStmt:
        FLUSH ;

crashStmt:
        CRASH INT_LITERAL? ;

exitStmt:
        ( EXIT | QUIT ) ;


//============================================================================
// SELECT
//

// NOTE:  This approach to supporting the WITH clause is completely broken.
//        The SELECT statements combined with set-operations need to have no
//        WITH or ORDER BY clauses.  The WITH and ORDER BY clauses should
//        be applied after set-operations.  Nested subqueries can use these
//        clauses if they are parenthesized.
//
//        Oh, and INSERT / UPDATE / DELETE clauses can also specify WITH
//        clauses...
//
// selectStmt:
//          (withClause? selectStmtNoWith)
//        | selectStmt (UNION | EXCEPT | INTERSECT) ALL? selectStmt
//             orderByClause? limitOffsetClause?
//         | '(' selectStmt ')'
//         ;
//
// withClause:
//         WITH RECURSIVE? commonTableExpression (',' commonTableExpression)* ;
//
// commonTableExpression:
//         IDENT AS selectStmtNoWith ;
//
// selectStmtNoWith:

selectStmt:
        SELECT (ALL | DISTINCT)? selectValue (',' selectValue)*
        ( FROM fromExpr )?
        ( WHERE wherePred=expression )?
        ( GROUP BY groupExpr+=expression (',' groupExpr+=expression)*
          (HAVING havingPred=expression)? )?
        ( ORDER BY orderByExpr (',' orderByExpr)* )?
        ( LIMIT limit=INT_LITERAL)?
        ( OFFSET offset=INT_LITERAL)?
        ;

selectValue:
          expression (AS? alias=IDENT)? ;

joinType:
          INNER         # JoinTypeInner
        | LEFT OUTER?   # JoinTypeLeftOuter
        | RIGHT OUTER?  # JoinTypeRightOuter
        | FULL OUTER?   # JoinTypeFullOuter
        ;

fromExpr:
          fromExpr CROSS JOIN fromExpr                                  # FromCrossJoin
        | fromExpr NATURAL joinType? JOIN fromExpr                      # FromNaturalJoin
        | fromExpr joinType? JOIN fromExpr ON expression                # FromJoinOn
        | fromExpr joinType? JOIN fromExpr
              USING '(' columnName+=IDENT (',' columnName+=IDENT)* ')'  # FromJoinUsing
        | fromExpr ',' fromExpr                                         # FromImplicitCrossJoin
        | tableName=IDENT (AS? alias=IDENT)?                            # FromTable
        | functionCall (AS? alias=IDENT)?                               # FromTableFunction
        | '(' selectStmt ')' AS? alias=IDENT                            # FromNestedSelect
        | '(' fromExpr ')'                                              # FromParens
        ;

orderByExpr:
        expression (ASC | ASCENDING | DESC | DESCENDING)? ;


insertStmt:
        INSERT INTO tableName=IDENT
        ( '(' (columnName+=IDENT (',' columnName+=IDENT)*)? ')' )?  // Optional column-list
        ( VALUES expressionList | selectStmt )
        ;


updateStmt:
        UPDATE tableName=IDENT
        SET columnName+=IDENT '=' expression (',' columnName+=IDENT '=' expression)*
        (WHERE predicate=expression)?
        ;


deleteStmt:
        DELETE FROM tableName=IDENT (WHERE expression)? ;


explainStmt:
          EXPLAIN selectStmt    # ExplainSelect
        | EXPLAIN insertStmt    # ExplainInsert
        | EXPLAIN updateStmt    # ExplainUpdate
        | EXPLAIN deleteStmt    # ExplainDelete
        ;

//============================================================================
// Some commands take properties to specify e.g. page-size for tables, type of
// storage format, etc.
//

cmdProperties:
        PROPERTIES '(' name+=IDENT '=' literalValue
                  (',' name+=IDENT '=' literalValue)* ')' ;

//============================================================================
// All kinds of expressions!
//

// The general expression parsing rule.  ANTLR4 doesn't explicitly specify
// operator precedence; the order implicitly specifies precedence order from
// high to low.
//
// Also, we would really like to be able to factor out different kinds of
// expressions into subrules (e.g. subquery operations, or compare operations)
// but ANTLR4 can only handle left-recursive rules if they are within a single
// rule, not spaning multiple mutually left-recursive rules.  Therefore, we
// can either have a simple grammar that throws everything into one bucket, or
// we can have a very large complicated grammar that separates things into
// different categories.  (The previous grammar had nearly 300 lines for
// expressions alone; this is well under 50 lines.)
expression:
        literalValue                                        # ExprLiteral
      | columnRef                                           # ExprColumnRef
      | functionCall                                        # ExprFunctionCall
      | op=('+' | '-') expression                           # ExprUnarySign
      | expression op=('*' | '/' | '%') expression          # ExprMul
      | expression op=('+' | '-') expression                # ExprAdd
      | expression op=( '<' | '<=' | '>' | '>=' | '=' | '==' | '!=' | '<>' ) expression     # ExprCompare
      | expression IS NOT? NULL                             # ExprIsNull
      | expression NOT? BETWEEN expression AND expression   # ExprBetween
      | expression LIKE expression                          # ExprLike
      | expression SIMILAR TO expression                    # ExprSimilarTo
      | expression NOT? IN expressionList                   # ExprOneColInValues
      | expression NOT? IN '(' selectStmt ')'               # ExprOneColInSubquery
      | expressionList NOT? IN '(' selectStmt ')'           # ExprMultiColInSubquery
      | EXISTS '(' selectStmt ')'                           # ExprExists
      | NOT expression                                      # ExprNot
      | expression AND expression                           # ExprAnd
      | expression OR expression                            # ExprOr
      | '(' selectStmt ')'                                  # ExprScalarSubquery
      | '(' expression ')'                                  # ExprParen
      ;

literalValue:
        NULL                        # LiteralNull
      | TRUE                        # LiteralTrue
      | FALSE                       # LiteralFalse
      | INT_LITERAL                 # LiteralInteger
      | DECIMAL_LITERAL             # LiteralDecimal
      | STRING_LITERAL              # LiteralString
      | INTERVAL STRING_LITERAL     # LiteralInterval
      ;

// To keep the parsing simple, we just have four separate subrules for column
// references.
columnRef:
        tableName=IDENT '.' columnName=IDENT    # ColRefTable
      | columnName=IDENT                        # ColRefNoTable
      | tableName=IDENT '.' '*'                 # ColRefWildcardTable
      | '*'                                     # ColRefWildcardNoTable
      ;

// A list of zero or more expressions, wrapped with parentheses.  This is
// useful for function calls, INSERT statements, IN clauses, etc.
//
expressionList: '(' (expression (',' expression)*)? ')' ;

// Note that we have a "DISTINCT" keyword in this parse rule since it is used
// to parse aggregate functions, and aggregates support this syntax.
// If a "DISTINCT" modifier is inappropriate for a given kind of function,
// the code should detect this and complain about it.
functionCall:
        functionName=IDENT '(' (DISTINCT? expression (',' expression)*)? ')' ;