From 2051de9b55b490f8983b8f87860b8eb54985fb3d Mon Sep 17 00:00:00 2001 From: funcpp Date: Mon, 30 Mar 2026 11:04:42 +0900 Subject: [PATCH] Support optional AS keyword in CTE definitions for Databricks Databricks allows omitting the AS keyword in CTE definitions: `WITH cte (SELECT ...) SELECT * FROM cte` Add `supports_cte_without_as()` dialect method and enable it for Databricks and Generic dialects. --- src/dialect/databricks.rs | 5 +++ src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 11 +++++ src/parser/mod.rs | 81 ++++++++++++++++++++++++----------- tests/sqlparser_databricks.rs | 27 ++++++++++++ 5 files changed, 104 insertions(+), 24 deletions(-) diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 55e4f56cc..1a2040e89 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -90,4 +90,9 @@ impl Dialect for DatabricksDialect { fn supports_optimize_table(&self) -> bool { true } + + /// See + fn supports_cte_without_as(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 1d5461fec..c7f17351b 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -288,4 +288,8 @@ impl Dialect for GenericDialect { fn supports_comma_separated_trim(&self) -> bool { true } + + fn supports_cte_without_as(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index fed81b60a..626aeba95 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1664,6 +1664,17 @@ pub trait Dialect: Debug + Any { fn supports_comma_separated_trim(&self) -> bool { false } + + /// Returns true if the dialect supports the `AS` keyword being + /// optional in a CTE definition. For example: + /// ```sql + /// WITH cte_name (SELECT ...) + /// ``` + /// + /// [Databricks](https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-qry-select-cte) + fn supports_cte_without_as(&self) -> bool { + false + } } /// Operators for which precedence must be defined. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6282ed3d7..9fa581b21 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14060,7 +14060,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) + /// Parse a CTE (`alias [( col1, col2, ... )] [AS] (subquery)`) pub fn parse_cte(&mut self) -> Result { let name = self.parse_identifier()?; @@ -14091,32 +14091,65 @@ impl<'a> Parser<'a> { closing_paren_token: closing_paren_token.into(), } } else { - let columns = self.parse_table_alias_column_defs()?; - self.expect_keyword_is(Keyword::AS)?; - let mut is_materialized = None; - if dialect_of!(self is PostgreSqlDialect) { - if self.parse_keyword(Keyword::MATERIALIZED) { - is_materialized = Some(CteAsMaterialized::Materialized); - } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { - is_materialized = Some(CteAsMaterialized::NotMaterialized); + let as_optional = self.dialect.supports_cte_without_as(); + let opt_query = if as_optional { + self.maybe_parse(|p| { + p.expect_token(&Token::LParen)?; + let query = p.parse_query()?; + let closing_paren_token = p.expect_token(&Token::RParen)?; + Ok((query, closing_paren_token)) + })? + } else { + None + }; + match opt_query { + Some((query, closing_paren_token)) => { + let alias = TableAlias { + explicit: false, + name, + columns: vec![], + }; + Cte { + alias, + query, + from: None, + materialized: None, + closing_paren_token: closing_paren_token.into(), + } } - } - self.expect_token(&Token::LParen)?; + None => { + let columns = self.parse_table_alias_column_defs()?; + if as_optional { + let _ = self.parse_keyword(Keyword::AS); + } else { + self.expect_keyword_is(Keyword::AS)?; + } + let mut is_materialized = None; + if dialect_of!(self is PostgreSqlDialect) { + if self.parse_keyword(Keyword::MATERIALIZED) { + is_materialized = Some(CteAsMaterialized::Materialized); + } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { + is_materialized = Some(CteAsMaterialized::NotMaterialized); + } + } + self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - let closing_paren_token = self.expect_token(&Token::RParen)?; + let query = self.parse_query()?; + let closing_paren_token = self.expect_token(&Token::RParen)?; - let alias = TableAlias { - explicit: false, - name, - columns, - }; - Cte { - alias, - query, - from: None, - materialized: is_materialized, - closing_paren_token: closing_paren_token.into(), + let alias = TableAlias { + explicit: false, + name, + columns, + }; + Cte { + alias, + query, + from: None, + materialized: is_materialized, + closing_paren_token: closing_paren_token.into(), + } + } } }; if self.parse_keyword(Keyword::FROM) { diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 79b3d0654..ff44bf610 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -644,3 +644,30 @@ fn parse_databricks_json_accessor() { "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", ); } + +#[test] +fn parse_cte_without_as() { + databricks_and_generic().one_statement_parses_to( + "WITH cte (SELECT 1) SELECT * FROM cte", + "WITH cte AS (SELECT 1) SELECT * FROM cte", + ); + + databricks_and_generic().one_statement_parses_to( + "WITH a AS (SELECT 1), b (SELECT 2) SELECT * FROM a, b", + "WITH a AS (SELECT 1), b AS (SELECT 2) SELECT * FROM a, b", + ); + + databricks_and_generic().one_statement_parses_to( + "WITH cte (col1, col2) (SELECT 1, 2) SELECT * FROM cte", + "WITH cte (col1, col2) AS (SELECT 1, 2) SELECT * FROM cte", + ); + + databricks_and_generic().verified_query("WITH cte AS (SELECT 1) SELECT * FROM cte"); + + databricks_and_generic() + .verified_query("WITH cte (col1, col2) AS (SELECT 1, 2) SELECT * FROM cte"); + + assert!(all_dialects_where(|d| !d.supports_cte_without_as()) + .parse_sql_statements("WITH cte (SELECT 1) SELECT * FROM cte") + .is_err()); +}