SQL Grammar

Every production rule accepted by the recursive-descent parser

This page lists every production rule accepted by mskql’s hand-written recursive-descent parser. Implemented features are unmarked; features not yet supported are noted inline in italics. See the SQL Reference for a higher-level summary grouped by category.

1. Top-Level Statements

statement ::=
   select_stmt
  | insert_stmt
  | update_stmt
  | delete_stmt
  | create_table_stmt
  | drop_table_stmt
  | create_index_stmt
  | drop_index_stmt
  | create_type_stmt
  | drop_type_stmt
  | create_sequence_stmt
  | drop_sequence_stmt
  | create_view_stmt
  | drop_view_stmt
  | alter_table_stmt
  | truncate_stmt
  | copy_stmt
  | transaction_stmt

2. SELECT

select_stmt ::=
   [ with_clause ]
   SELECT [ DISTINCT [ ON ( column_list ) ] ]
   select_list
   [ FROM from_clause ]
   [ join_clause ... ]
   [ WHERE condition ]
   [ GROUP BY group_by_clause ]
   [ HAVING condition ]
   [ set_operation ]
   [ ORDER BY order_list ]
   [ LIMIT expr ]
   [ OFFSET expr ]
   [ FETCH FIRST n ROWS ONLY ]

WITH clause (CTEs)

with_clause ::= WITH [ RECURSIVE ] cte_def { , cte_def }

cte_def ::= name [ ( column_list ) ] AS ( select_stmt )

SELECT list

select_list ::= * | select_item { , select_item }

select_item ::=
   expr [ [ AS ] alias ]
  | agg_func ( [ DISTINCT ] expr ) [ [ AS ] alias ]
  | window_func ( args ) OVER ( window_spec ) [ [ AS ] alias ]
  | ( select_stmt ) [ [ AS ] alias ]  -- scalar subquery

FROM clause

from_clause ::=
   table_name [ [ AS ] alias ]
  | ( select_stmt ) [ AS ] alias  -- derived table
  | generate_series ( start , stop [ , step ] ) [ AS alias ( col_alias ) ]
  -- integer series: generate_series(1, 100)
  -- integer with step: generate_series(0, 100, 10)
  -- timestamp series: generate_series('2024-01-01'::date, '2024-12-31'::date, '1 month')

JOIN clause

join_clause ::=
   [ INNER ] JOIN table_ref ON condition
  | LEFT [ OUTER ] JOIN table_ref ON condition
  | RIGHT [ OUTER ] JOIN table_ref ON condition
  | FULL [ OUTER ] JOIN table_ref ON condition
  | CROSS JOIN table_ref
  | NATURAL [ LEFT | RIGHT | FULL ] JOIN table_ref
  | ... USING ( column_list )
  | LATERAL ( select_stmt ) [ AS ] alias
  | , table_ref  -- implicit cross join

GROUP BY clause

group_by_clause ::=
   column_ref { , column_ref }
  | ROLLUP ( column_ref { , column_ref } )
  | CUBE ( column_ref { , column_ref } )

Set operations

set_operation ::=
   UNION [ ALL ] select_stmt
  | INTERSECT select_stmt
  | EXCEPT select_stmt

3. Aggregate & Window Functions

Aggregate functions

agg_func ::= COUNT | SUM | AVG | MIN | MAX | STRING_AGG | ARRAY_AGG

-- COUNT(DISTINCT col) is supported
-- COUNT(*) is supported
-- STRING_AGG(expr, delimiter) is supported
-- ARRAY_AGG(expr) is supported

Window functions

window_func ::=
   ROW_NUMBER | RANK | DENSE_RANK
  | NTILE | PERCENT_RANK | CUME_DIST
  | LAG | LEAD
  | FIRST_VALUE | LAST_VALUE | NTH_VALUE
  | SUM | COUNT | AVG

window_spec ::=
   [ PARTITION BY column_ref ]
   [ ORDER BY column_ref [ ASC | DESC ] ]
   [ frame_clause ]

frame_clause ::= ROWS BETWEEN frame_bound AND frame_bound

frame_bound ::=
   UNBOUNDED PRECEDING
  | n PRECEDING
  | CURRENT ROW
  | n FOLLOWING
  | UNBOUNDED FOLLOWING

4. INSERT

insert_stmt ::=
   INSERT INTO table_name [ ( column_list ) ]
   VALUES value_tuple { , value_tuple }
   [ conflict_clause ]
   [ RETURNING select_list ]

  | [ with_clause ] INSERT INTO table_name [ ( column_list ) ]
    select_stmt  -- INSERT...SELECT, with optional CTE
   [ conflict_clause ]
   [ RETURNING select_list ]

conflict_clause ::=
   ON CONFLICT [ ( column_ref ) ] DO NOTHING
  | ON CONFLICT ( column_ref ) DO UPDATE SET set_list

5. UPDATE

update_stmt ::=
   UPDATE table_name [ [ AS ] alias ]
   SET set_list
   [ FROM table_name [ [ AS ] alias ] ]
   [ WHERE condition ]
   [ RETURNING select_list ]

set_list ::= set_item { , set_item }

set_item ::= column_name = expr

6. DELETE

delete_stmt ::=
   DELETE FROM table_name
   [ WHERE condition ]
   [ RETURNING select_list ]

7. Data Definition (DDL)

CREATE TABLE

create_table_stmt ::=
   CREATE TABLE [ IF NOT EXISTS ] table_name ( column_def { , column_def } [ , table_constraint ] )
  | CREATE TABLE [ IF NOT EXISTS ] table_name ( LIKE source_table )

table_constraint ::=
   PRIMARY KEY ( column_name { , column_name } )
  | UNIQUE ( column_name { , column_name } )

column_def ::=
   column_name data_type { column_constraint }

column_constraint ::=
   NOT NULL
  | UNIQUE
  | PRIMARY KEY
  | DEFAULT expr
  | CHECK ( condition )
  | REFERENCES table_name ( column_name ) [ ON DELETE CASCADE ] [ ON UPDATE CASCADE ]

ALTER TABLE

alter_table_stmt ::=
   ALTER TABLE table_name ADD COLUMN column_def
  | ALTER TABLE table_name DROP COLUMN column_name
  | ALTER TABLE table_name RENAME COLUMN old TO new
  | ALTER TABLE table_name ALTER COLUMN column_name TYPE data_type
  | ALTER TABLE table_name ALTER COLUMN column_name SET DEFAULT expr
  | ALTER TABLE table_name ALTER COLUMN column_name DROP DEFAULT
  | ALTER TABLE table_name ALTER COLUMN column_name SET NOT NULL
  | ALTER TABLE table_name ALTER COLUMN column_name DROP NOT NULL

Other DDL

drop_table_stmt ::= DROP TABLE table_name

create_index_stmt ::= CREATE INDEX [ IF NOT EXISTS ] name ON table_name ( column_name { , column_name } )

drop_index_stmt ::= DROP INDEX name

create_type_stmt ::= CREATE TYPE name AS ENUM ( string_literal { , string_literal } )

drop_type_stmt ::= DROP TYPE name

create_sequence_stmt ::= CREATE SEQUENCE name [ START WITH n ] [ INCREMENT BY n ]

drop_sequence_stmt ::= DROP SEQUENCE name

create_view_stmt ::= CREATE VIEW name AS select_stmt

drop_view_stmt ::= DROP VIEW name

truncate_stmt ::= TRUNCATE [ TABLE ] table_name

create_foreign_table_stmt ::=
   CREATE FOREIGN TABLE table_name ( column_def { , column_def } ) OPTIONS ( filename string_literal )

-- Maps a Parquet file to a SQL table. Query with standard SQL.
-- Supported Parquet types: INT32, INT64, FLOAT, DOUBLE, BYTE_ARRAY (→ TEXT), INT96 (→ TIMESTAMP).

drop_foreign_table_stmt ::= DROP FOREIGN TABLE table_name

COPY

copy_stmt ::=
   COPY table_name TO STDOUT [ [ WITH ] CSV [ HEADER ] ]
  | COPY table_name FROM STDIN [ [ WITH ] CSV [ HEADER ] ]

-- COPY TO STDOUT: tab-delimited (default), CSV, CSV HEADER
-- COPY FROM STDIN: tab-delimited (default), CSV
-- NULL values represented as \N

8. Data Types

data_type ::=
   INT | INTEGER
  | BIGINT
  | FLOAT | DOUBLE PRECISION | REAL
  | NUMERIC | DECIMAL
  | TEXT
  | VARCHAR ( n )
  | CHAR ( n ) (not yet supported)
  | BOOLEAN
  | DATE
  | TIME
  | TIMESTAMP
  | TIMESTAMPTZ
  | INTERVAL
  | UUID
  | VECTOR ( n )
  | SERIAL
  | BIGSERIAL
  | SMALLINT
  | BYTEA (not yet supported)
  | JSON | JSONB (not yet supported)
  | ARRAY (not yet supported)
  | enum_type_name

9. Expressions

expr ::=
   literal
  | column_ref
  | expr binary_op expr
  | unary_op expr
  | func_call
  | CASE WHEN condition THEN expr { WHEN ... } [ ELSE expr ] END
  | ( select_stmt )  -- scalar subquery
  | expr :: data_type  -- postfix cast
  | CAST ( expr AS data_type )
  | expr IS [ NOT ] NULL  -- postfix null test (expression-level)

binary_op ::= + | - | * | / | % | ||

unary_op ::= -

literal ::= integer | float | string | TRUE | FALSE | NULL

Built-in functions

func_call ::=
   COALESCE ( expr { , expr } )
  | NULLIF ( expr , expr )
  | GREATEST ( expr { , expr } )
  | LEAST ( expr { , expr } )
  | UPPER ( expr )
  | LOWER ( expr )
  | LENGTH ( expr )
  | TRIM ( expr )
  | CONCAT ( expr { , expr } )
  | SUBSTRING ( expr FROM n [ FOR n ] )
  | NEXTVAL ( string )
  | CURRVAL ( string )
  | GEN_RANDOM_UUID ()
  | NOW ()
  | CURRENT_TIMESTAMP
  | CURRENT_DATE
  | EXTRACT ( field FROM expr )
  | DATE_PART ( string , expr )
  | DATE_TRUNC ( string , expr )
  | AGE ( expr [ , expr ] )
  | TO_CHAR ( expr , string )
  | REPLACE ( expr , expr , expr )
  | LEFT ( expr , n ) / RIGHT ( expr , n )
  | LPAD ( expr , n [ , fill ] ) / RPAD ( ... )
  | CONCAT_WS ( sep , expr { , expr } )
  | POSITION ( expr IN expr )
  | SPLIT_PART ( expr , delim , n )
  | REPEAT ( expr , n )
  | REVERSE ( expr )
  | INITCAP ( expr )
  | LTRIM ( expr ) / RTRIM ( expr )
  | SUBSTR ( expr , start [ , length ] )
  | ABS ( expr )
  | CEIL ( expr ) / FLOOR ( expr )
  | ROUND ( expr [ , n ] )
  | POWER ( expr , expr ) / SQRT ( expr )
  | MOD ( expr , expr )
  | SIGN ( expr )
  | RANDOM ()
  | STRING_AGG ( expr , delimiter )
  | ARRAY_AGG ( expr )

10. Conditions (WHERE / ON / HAVING)

condition ::=
   condition AND condition
  | condition OR condition
  | NOT condition
  | expr cmp_op expr
  | expr BETWEEN expr AND expr
  | expr [ NOT ] IN ( value_list | select_stmt )
  | ( expr_list ) IN ( select_stmt )  -- multi-column IN
  | expr cmp_op ANY ( value_list | select_stmt )
  | expr cmp_op ALL ( value_list | select_stmt )
  | expr [ NOT ] LIKE pattern
  | expr ILIKE pattern
  | expr IS [ NOT ] NULL
  | expr IS [ NOT ] DISTINCT FROM expr
  | EXISTS ( select_stmt )
  | NOT EXISTS ( select_stmt )
  | expr SIMILAR TO pattern (not yet supported)
  | expr ~ pattern  -- POSIX regex (not yet supported)

cmp_op ::= = | != | <> | < | > | <= | >=

11. Transactions

transaction_stmt ::=
   BEGIN
  | COMMIT
  | ROLLBACK
  | SAVEPOINT name (not yet supported)
  | RELEASE SAVEPOINT name (not yet supported)
  | ROLLBACK TO SAVEPOINT name (not yet supported)

12. Not Yet Supported

Types: CHAR(n), BYTEA, JSON/JSONB, ARRAY, composite types, range types. Expressions: SIMILAR TO, POSIX regex (~), ARRAY[...] constructor. DDL: CREATE TABLE ... AS SELECT, CREATE MATERIALIZED VIEW, CASCADE/RESTRICT on DROP. Transactions: SAVEPOINT, isolation levels. Window: RANGE BETWEEN, GROUPS BETWEEN, named window definitions. Aggregation: GROUPING SETS, FILTER (WHERE ...), ordered-set aggregates. Other: SQL-level PREPARE/EXECUTE (wire-level Extended Query Protocol is supported), LISTEN/NOTIFY, GRANT/REVOKE, stored procedures, triggers.