sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 } 483 484 # Tokens that can represent identifiers 485 ID_VAR_TOKENS = { 486 TokenType.ALL, 487 TokenType.ATTACH, 488 TokenType.VAR, 489 TokenType.ANTI, 490 TokenType.APPLY, 491 TokenType.ASC, 492 TokenType.ASOF, 493 TokenType.AUTO_INCREMENT, 494 TokenType.BEGIN, 495 TokenType.BPCHAR, 496 TokenType.CACHE, 497 TokenType.CASE, 498 TokenType.COLLATE, 499 TokenType.COMMAND, 500 TokenType.COMMENT, 501 TokenType.COMMIT, 502 TokenType.CONSTRAINT, 503 TokenType.COPY, 504 TokenType.CUBE, 505 TokenType.CURRENT_SCHEMA, 506 TokenType.DEFAULT, 507 TokenType.DELETE, 508 TokenType.DESC, 509 TokenType.DESCRIBE, 510 TokenType.DETACH, 511 TokenType.DICTIONARY, 512 TokenType.DIV, 513 TokenType.END, 514 TokenType.EXECUTE, 515 TokenType.EXPORT, 516 TokenType.ESCAPE, 517 TokenType.FALSE, 518 TokenType.FIRST, 519 TokenType.FILTER, 520 TokenType.FINAL, 521 TokenType.FORMAT, 522 TokenType.FULL, 523 TokenType.GET, 524 TokenType.IDENTIFIER, 525 TokenType.IS, 526 TokenType.ISNULL, 527 TokenType.INTERVAL, 528 TokenType.KEEP, 529 TokenType.KILL, 530 TokenType.LEFT, 531 TokenType.LIMIT, 532 TokenType.LOAD, 533 TokenType.MERGE, 534 TokenType.NATURAL, 535 TokenType.NEXT, 536 TokenType.OFFSET, 537 TokenType.OPERATOR, 538 TokenType.ORDINALITY, 539 TokenType.OVERLAPS, 540 TokenType.OVERWRITE, 541 TokenType.PARTITION, 542 TokenType.PERCENT, 543 TokenType.PIVOT, 544 TokenType.PRAGMA, 545 TokenType.PUT, 546 TokenType.RANGE, 547 TokenType.RECURSIVE, 548 TokenType.REFERENCES, 549 TokenType.REFRESH, 550 TokenType.RENAME, 551 TokenType.REPLACE, 552 TokenType.RIGHT, 553 TokenType.ROLLUP, 554 TokenType.ROW, 555 TokenType.ROWS, 556 TokenType.SEMI, 557 TokenType.SET, 558 TokenType.SETTINGS, 559 TokenType.SHOW, 560 TokenType.TEMPORARY, 561 TokenType.TOP, 562 TokenType.TRUE, 563 TokenType.TRUNCATE, 564 TokenType.UNIQUE, 565 TokenType.UNNEST, 566 TokenType.UNPIVOT, 567 TokenType.UPDATE, 568 TokenType.USE, 569 TokenType.VOLATILE, 570 TokenType.WINDOW, 571 *CREATABLES, 572 *SUBQUERY_PREDICATES, 573 *TYPE_TOKENS, 574 *NO_PAREN_FUNCTIONS, 575 } 576 ID_VAR_TOKENS.remove(TokenType.UNION) 577 578 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 579 TokenType.ANTI, 580 TokenType.ASOF, 581 TokenType.FULL, 582 TokenType.LEFT, 583 TokenType.LOCK, 584 TokenType.NATURAL, 585 TokenType.RIGHT, 586 TokenType.SEMI, 587 TokenType.WINDOW, 588 } 589 590 ALIAS_TOKENS = ID_VAR_TOKENS 591 592 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 593 594 ARRAY_CONSTRUCTORS = { 595 "ARRAY": exp.Array, 596 "LIST": exp.List, 597 } 598 599 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 600 601 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 602 603 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 604 605 FUNC_TOKENS = { 606 TokenType.COLLATE, 607 TokenType.COMMAND, 608 TokenType.CURRENT_DATE, 609 TokenType.CURRENT_DATETIME, 610 TokenType.CURRENT_SCHEMA, 611 TokenType.CURRENT_TIMESTAMP, 612 TokenType.CURRENT_TIME, 613 TokenType.CURRENT_USER, 614 TokenType.FILTER, 615 TokenType.FIRST, 616 TokenType.FORMAT, 617 TokenType.GET, 618 TokenType.GLOB, 619 TokenType.IDENTIFIER, 620 TokenType.INDEX, 621 TokenType.ISNULL, 622 TokenType.ILIKE, 623 TokenType.INSERT, 624 TokenType.LIKE, 625 TokenType.MERGE, 626 TokenType.NEXT, 627 TokenType.OFFSET, 628 TokenType.PRIMARY_KEY, 629 TokenType.RANGE, 630 TokenType.REPLACE, 631 TokenType.RLIKE, 632 TokenType.ROW, 633 TokenType.UNNEST, 634 TokenType.VAR, 635 TokenType.LEFT, 636 TokenType.RIGHT, 637 TokenType.SEQUENCE, 638 TokenType.DATE, 639 TokenType.DATETIME, 640 TokenType.TABLE, 641 TokenType.TIMESTAMP, 642 TokenType.TIMESTAMPTZ, 643 TokenType.TRUNCATE, 644 TokenType.WINDOW, 645 TokenType.XOR, 646 *TYPE_TOKENS, 647 *SUBQUERY_PREDICATES, 648 } 649 650 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.AND: exp.And, 652 } 653 654 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.COLON_EQ: exp.PropertyEQ, 656 } 657 658 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.OR: exp.Or, 660 } 661 662 EQUALITY = { 663 TokenType.EQ: exp.EQ, 664 TokenType.NEQ: exp.NEQ, 665 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 666 } 667 668 COMPARISON = { 669 TokenType.GT: exp.GT, 670 TokenType.GTE: exp.GTE, 671 TokenType.LT: exp.LT, 672 TokenType.LTE: exp.LTE, 673 } 674 675 BITWISE = { 676 TokenType.AMP: exp.BitwiseAnd, 677 TokenType.CARET: exp.BitwiseXor, 678 TokenType.PIPE: exp.BitwiseOr, 679 } 680 681 TERM = { 682 TokenType.DASH: exp.Sub, 683 TokenType.PLUS: exp.Add, 684 TokenType.MOD: exp.Mod, 685 TokenType.COLLATE: exp.Collate, 686 } 687 688 FACTOR = { 689 TokenType.DIV: exp.IntDiv, 690 TokenType.LR_ARROW: exp.Distance, 691 TokenType.SLASH: exp.Div, 692 TokenType.STAR: exp.Mul, 693 } 694 695 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 696 697 TIMES = { 698 TokenType.TIME, 699 TokenType.TIMETZ, 700 } 701 702 TIMESTAMPS = { 703 TokenType.TIMESTAMP, 704 TokenType.TIMESTAMPNTZ, 705 TokenType.TIMESTAMPTZ, 706 TokenType.TIMESTAMPLTZ, 707 *TIMES, 708 } 709 710 SET_OPERATIONS = { 711 TokenType.UNION, 712 TokenType.INTERSECT, 713 TokenType.EXCEPT, 714 } 715 716 JOIN_METHODS = { 717 TokenType.ASOF, 718 TokenType.NATURAL, 719 TokenType.POSITIONAL, 720 } 721 722 JOIN_SIDES = { 723 TokenType.LEFT, 724 TokenType.RIGHT, 725 TokenType.FULL, 726 } 727 728 JOIN_KINDS = { 729 TokenType.ANTI, 730 TokenType.CROSS, 731 TokenType.INNER, 732 TokenType.OUTER, 733 TokenType.SEMI, 734 TokenType.STRAIGHT_JOIN, 735 } 736 737 JOIN_HINTS: t.Set[str] = set() 738 739 LAMBDAS = { 740 TokenType.ARROW: lambda self, expressions: self.expression( 741 exp.Lambda, 742 this=self._replace_lambda( 743 self._parse_assignment(), 744 expressions, 745 ), 746 expressions=expressions, 747 ), 748 TokenType.FARROW: lambda self, expressions: self.expression( 749 exp.Kwarg, 750 this=exp.var(expressions[0].name), 751 expression=self._parse_assignment(), 752 ), 753 } 754 755 COLUMN_OPERATORS = { 756 TokenType.DOT: None, 757 TokenType.DOTCOLON: lambda self, this, to: self.expression( 758 exp.JSONCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.DCOLON: lambda self, this, to: self.build_cast( 763 strict=self.STRICT_CAST, this=this, to=to 764 ), 765 TokenType.ARROW: lambda self, this, path: self.expression( 766 exp.JSONExtract, 767 this=this, 768 expression=self.dialect.to_json_path(path), 769 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 770 ), 771 TokenType.DARROW: lambda self, this, path: self.expression( 772 exp.JSONExtractScalar, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 778 exp.JSONBExtract, 779 this=this, 780 expression=path, 781 ), 782 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 783 exp.JSONBExtractScalar, 784 this=this, 785 expression=path, 786 ), 787 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 788 exp.JSONBContains, 789 this=this, 790 expression=key, 791 ), 792 } 793 794 CAST_COLUMN_OPERATORS = { 795 TokenType.DOTCOLON, 796 TokenType.DCOLON, 797 } 798 799 EXPRESSION_PARSERS = { 800 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 801 exp.Column: lambda self: self._parse_column(), 802 exp.Condition: lambda self: self._parse_assignment(), 803 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 804 exp.Expression: lambda self: self._parse_expression(), 805 exp.From: lambda self: self._parse_from(joins=True), 806 exp.Group: lambda self: self._parse_group(), 807 exp.Having: lambda self: self._parse_having(), 808 exp.Hint: lambda self: self._parse_hint_body(), 809 exp.Identifier: lambda self: self._parse_id_var(), 810 exp.Join: lambda self: self._parse_join(), 811 exp.Lambda: lambda self: self._parse_lambda(), 812 exp.Lateral: lambda self: self._parse_lateral(), 813 exp.Limit: lambda self: self._parse_limit(), 814 exp.Offset: lambda self: self._parse_offset(), 815 exp.Order: lambda self: self._parse_order(), 816 exp.Ordered: lambda self: self._parse_ordered(), 817 exp.Properties: lambda self: self._parse_properties(), 818 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 819 exp.Qualify: lambda self: self._parse_qualify(), 820 exp.Returning: lambda self: self._parse_returning(), 821 exp.Select: lambda self: self._parse_select(), 822 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 823 exp.Table: lambda self: self._parse_table_parts(), 824 exp.TableAlias: lambda self: self._parse_table_alias(), 825 exp.Tuple: lambda self: self._parse_value(values=False), 826 exp.Whens: lambda self: self._parse_when_matched(), 827 exp.Where: lambda self: self._parse_where(), 828 exp.Window: lambda self: self._parse_named_window(), 829 exp.With: lambda self: self._parse_with(), 830 "JOIN_TYPE": lambda self: self._parse_join_parts(), 831 } 832 833 STATEMENT_PARSERS = { 834 TokenType.ALTER: lambda self: self._parse_alter(), 835 TokenType.ANALYZE: lambda self: self._parse_analyze(), 836 TokenType.BEGIN: lambda self: self._parse_transaction(), 837 TokenType.CACHE: lambda self: self._parse_cache(), 838 TokenType.COMMENT: lambda self: self._parse_comment(), 839 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 840 TokenType.COPY: lambda self: self._parse_copy(), 841 TokenType.CREATE: lambda self: self._parse_create(), 842 TokenType.DELETE: lambda self: self._parse_delete(), 843 TokenType.DESC: lambda self: self._parse_describe(), 844 TokenType.DESCRIBE: lambda self: self._parse_describe(), 845 TokenType.DROP: lambda self: self._parse_drop(), 846 TokenType.GRANT: lambda self: self._parse_grant(), 847 TokenType.INSERT: lambda self: self._parse_insert(), 848 TokenType.KILL: lambda self: self._parse_kill(), 849 TokenType.LOAD: lambda self: self._parse_load(), 850 TokenType.MERGE: lambda self: self._parse_merge(), 851 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 852 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 853 TokenType.REFRESH: lambda self: self._parse_refresh(), 854 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 855 TokenType.SET: lambda self: self._parse_set(), 856 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 857 TokenType.UNCACHE: lambda self: self._parse_uncache(), 858 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 859 TokenType.UPDATE: lambda self: self._parse_update(), 860 TokenType.USE: lambda self: self._parse_use(), 861 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 862 } 863 864 UNARY_PARSERS = { 865 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 866 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 867 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 868 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 869 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 870 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 871 } 872 873 STRING_PARSERS = { 874 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 875 exp.RawString, this=token.text 876 ), 877 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 878 exp.National, this=token.text 879 ), 880 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 881 TokenType.STRING: lambda self, token: self.expression( 882 exp.Literal, this=token.text, is_string=True 883 ), 884 TokenType.UNICODE_STRING: lambda self, token: self.expression( 885 exp.UnicodeString, 886 this=token.text, 887 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 888 ), 889 } 890 891 NUMERIC_PARSERS = { 892 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 893 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 894 TokenType.HEX_STRING: lambda self, token: self.expression( 895 exp.HexString, 896 this=token.text, 897 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 898 ), 899 TokenType.NUMBER: lambda self, token: self.expression( 900 exp.Literal, this=token.text, is_string=False 901 ), 902 } 903 904 PRIMARY_PARSERS = { 905 **STRING_PARSERS, 906 **NUMERIC_PARSERS, 907 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 908 TokenType.NULL: lambda self, _: self.expression(exp.Null), 909 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 910 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 911 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 912 TokenType.STAR: lambda self, _: self._parse_star_ops(), 913 } 914 915 PLACEHOLDER_PARSERS = { 916 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 917 TokenType.PARAMETER: lambda self: self._parse_parameter(), 918 TokenType.COLON: lambda self: ( 919 self.expression(exp.Placeholder, this=self._prev.text) 920 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 921 else None 922 ), 923 } 924 925 RANGE_PARSERS = { 926 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 927 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 928 TokenType.GLOB: binary_range_parser(exp.Glob), 929 TokenType.ILIKE: binary_range_parser(exp.ILike), 930 TokenType.IN: lambda self, this: self._parse_in(this), 931 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 932 TokenType.IS: lambda self, this: self._parse_is(this), 933 TokenType.LIKE: binary_range_parser(exp.Like), 934 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 935 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 936 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 937 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 938 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 939 } 940 941 PIPE_SYNTAX_TRANSFORM_PARSERS = { 942 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 943 "AS": lambda self, query: self._build_pipe_cte( 944 query, [exp.Star()], self._parse_table_alias() 945 ), 946 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 947 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 948 "ORDER BY": lambda self, query: query.order_by( 949 self._parse_order(), append=False, copy=False 950 ), 951 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 952 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 953 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 954 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 955 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 956 } 957 958 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 959 "ALLOWED_VALUES": lambda self: self.expression( 960 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 961 ), 962 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 963 "AUTO": lambda self: self._parse_auto_property(), 964 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 965 "BACKUP": lambda self: self.expression( 966 exp.BackupProperty, this=self._parse_var(any_token=True) 967 ), 968 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 969 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 970 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 971 "CHECKSUM": lambda self: self._parse_checksum(), 972 "CLUSTER BY": lambda self: self._parse_cluster(), 973 "CLUSTERED": lambda self: self._parse_clustered_by(), 974 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 975 exp.CollateProperty, **kwargs 976 ), 977 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 978 "CONTAINS": lambda self: self._parse_contains_property(), 979 "COPY": lambda self: self._parse_copy_property(), 980 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 981 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 982 "DEFINER": lambda self: self._parse_definer(), 983 "DETERMINISTIC": lambda self: self.expression( 984 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 985 ), 986 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 987 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 988 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 989 "DISTKEY": lambda self: self._parse_distkey(), 990 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 991 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 992 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 993 "ENVIRONMENT": lambda self: self.expression( 994 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 995 ), 996 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 997 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 998 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 999 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1000 "FREESPACE": lambda self: self._parse_freespace(), 1001 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1002 "HEAP": lambda self: self.expression(exp.HeapProperty), 1003 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1004 "IMMUTABLE": lambda self: self.expression( 1005 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1006 ), 1007 "INHERITS": lambda self: self.expression( 1008 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1009 ), 1010 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1011 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1012 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1013 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1014 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1015 "LIKE": lambda self: self._parse_create_like(), 1016 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1017 "LOCK": lambda self: self._parse_locking(), 1018 "LOCKING": lambda self: self._parse_locking(), 1019 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1020 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1021 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1022 "MODIFIES": lambda self: self._parse_modifies_property(), 1023 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1024 "NO": lambda self: self._parse_no_property(), 1025 "ON": lambda self: self._parse_on_property(), 1026 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1027 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1028 "PARTITION": lambda self: self._parse_partitioned_of(), 1029 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1030 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1031 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1032 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1033 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1034 "READS": lambda self: self._parse_reads_property(), 1035 "REMOTE": lambda self: self._parse_remote_with_connection(), 1036 "RETURNS": lambda self: self._parse_returns(), 1037 "STRICT": lambda self: self.expression(exp.StrictProperty), 1038 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1039 "ROW": lambda self: self._parse_row(), 1040 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1041 "SAMPLE": lambda self: self.expression( 1042 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1043 ), 1044 "SECURE": lambda self: self.expression(exp.SecureProperty), 1045 "SECURITY": lambda self: self._parse_security(), 1046 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1047 "SETTINGS": lambda self: self._parse_settings_property(), 1048 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1049 "SORTKEY": lambda self: self._parse_sortkey(), 1050 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1051 "STABLE": lambda self: self.expression( 1052 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1053 ), 1054 "STORED": lambda self: self._parse_stored(), 1055 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1056 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1057 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1058 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1059 "TO": lambda self: self._parse_to_table(), 1060 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1061 "TRANSFORM": lambda self: self.expression( 1062 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1063 ), 1064 "TTL": lambda self: self._parse_ttl(), 1065 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1066 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1067 "VOLATILE": lambda self: self._parse_volatile_property(), 1068 "WITH": lambda self: self._parse_with_property(), 1069 } 1070 1071 CONSTRAINT_PARSERS = { 1072 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1073 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1074 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1075 "CHARACTER SET": lambda self: self.expression( 1076 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1077 ), 1078 "CHECK": lambda self: self.expression( 1079 exp.CheckColumnConstraint, 1080 this=self._parse_wrapped(self._parse_assignment), 1081 enforced=self._match_text_seq("ENFORCED"), 1082 ), 1083 "COLLATE": lambda self: self.expression( 1084 exp.CollateColumnConstraint, 1085 this=self._parse_identifier() or self._parse_column(), 1086 ), 1087 "COMMENT": lambda self: self.expression( 1088 exp.CommentColumnConstraint, this=self._parse_string() 1089 ), 1090 "COMPRESS": lambda self: self._parse_compress(), 1091 "CLUSTERED": lambda self: self.expression( 1092 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1093 ), 1094 "NONCLUSTERED": lambda self: self.expression( 1095 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1096 ), 1097 "DEFAULT": lambda self: self.expression( 1098 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1099 ), 1100 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1101 "EPHEMERAL": lambda self: self.expression( 1102 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1103 ), 1104 "EXCLUDE": lambda self: self.expression( 1105 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1106 ), 1107 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1108 "FORMAT": lambda self: self.expression( 1109 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1110 ), 1111 "GENERATED": lambda self: self._parse_generated_as_identity(), 1112 "IDENTITY": lambda self: self._parse_auto_increment(), 1113 "INLINE": lambda self: self._parse_inline(), 1114 "LIKE": lambda self: self._parse_create_like(), 1115 "NOT": lambda self: self._parse_not_constraint(), 1116 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1117 "ON": lambda self: ( 1118 self._match(TokenType.UPDATE) 1119 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1120 ) 1121 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1122 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1123 "PERIOD": lambda self: self._parse_period_for_system_time(), 1124 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1125 "REFERENCES": lambda self: self._parse_references(match=False), 1126 "TITLE": lambda self: self.expression( 1127 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1128 ), 1129 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1130 "UNIQUE": lambda self: self._parse_unique(), 1131 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1132 "WATERMARK": lambda self: self.expression( 1133 exp.WatermarkColumnConstraint, 1134 this=self._match(TokenType.FOR) and self._parse_column(), 1135 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1136 ), 1137 "WITH": lambda self: self.expression( 1138 exp.Properties, expressions=self._parse_wrapped_properties() 1139 ), 1140 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1141 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1142 } 1143 1144 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1145 if not self._match(TokenType.L_PAREN, advance=False): 1146 # Partitioning by bucket or truncate follows the syntax: 1147 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1148 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1149 self._retreat(self._index - 1) 1150 return None 1151 1152 klass = ( 1153 exp.PartitionedByBucket 1154 if self._prev.text.upper() == "BUCKET" 1155 else exp.PartitionByTruncate 1156 ) 1157 1158 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1159 this, expression = seq_get(args, 0), seq_get(args, 1) 1160 1161 if isinstance(this, exp.Literal): 1162 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1163 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1164 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1165 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1166 # 1167 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1168 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1169 this, expression = expression, this 1170 1171 return self.expression(klass, this=this, expression=expression) 1172 1173 ALTER_PARSERS = { 1174 "ADD": lambda self: self._parse_alter_table_add(), 1175 "AS": lambda self: self._parse_select(), 1176 "ALTER": lambda self: self._parse_alter_table_alter(), 1177 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1178 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1179 "DROP": lambda self: self._parse_alter_table_drop(), 1180 "RENAME": lambda self: self._parse_alter_table_rename(), 1181 "SET": lambda self: self._parse_alter_table_set(), 1182 "SWAP": lambda self: self.expression( 1183 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1184 ), 1185 } 1186 1187 ALTER_ALTER_PARSERS = { 1188 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1189 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1190 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1191 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1192 } 1193 1194 SCHEMA_UNNAMED_CONSTRAINTS = { 1195 "CHECK", 1196 "EXCLUDE", 1197 "FOREIGN KEY", 1198 "LIKE", 1199 "PERIOD", 1200 "PRIMARY KEY", 1201 "UNIQUE", 1202 "WATERMARK", 1203 "BUCKET", 1204 "TRUNCATE", 1205 } 1206 1207 NO_PAREN_FUNCTION_PARSERS = { 1208 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1209 "CASE": lambda self: self._parse_case(), 1210 "CONNECT_BY_ROOT": lambda self: self.expression( 1211 exp.ConnectByRoot, this=self._parse_column() 1212 ), 1213 "IF": lambda self: self._parse_if(), 1214 } 1215 1216 INVALID_FUNC_NAME_TOKENS = { 1217 TokenType.IDENTIFIER, 1218 TokenType.STRING, 1219 } 1220 1221 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1222 1223 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1224 1225 FUNCTION_PARSERS = { 1226 **{ 1227 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1228 }, 1229 **{ 1230 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1231 }, 1232 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1233 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1234 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1235 "DECODE": lambda self: self._parse_decode(), 1236 "EXTRACT": lambda self: self._parse_extract(), 1237 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1238 "GAP_FILL": lambda self: self._parse_gap_fill(), 1239 "JSON_OBJECT": lambda self: self._parse_json_object(), 1240 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1241 "JSON_TABLE": lambda self: self._parse_json_table(), 1242 "MATCH": lambda self: self._parse_match_against(), 1243 "NORMALIZE": lambda self: self._parse_normalize(), 1244 "OPENJSON": lambda self: self._parse_open_json(), 1245 "OVERLAY": lambda self: self._parse_overlay(), 1246 "POSITION": lambda self: self._parse_position(), 1247 "PREDICT": lambda self: self._parse_predict(), 1248 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1249 "STRING_AGG": lambda self: self._parse_string_agg(), 1250 "SUBSTRING": lambda self: self._parse_substring(), 1251 "TRIM": lambda self: self._parse_trim(), 1252 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1253 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1254 "XMLELEMENT": lambda self: self.expression( 1255 exp.XMLElement, 1256 this=self._match_text_seq("NAME") and self._parse_id_var(), 1257 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1258 ), 1259 "XMLTABLE": lambda self: self._parse_xml_table(), 1260 } 1261 1262 QUERY_MODIFIER_PARSERS = { 1263 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1264 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1265 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1266 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1267 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1268 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1269 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1270 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1271 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1272 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1273 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1274 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1275 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1276 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1277 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1278 TokenType.CLUSTER_BY: lambda self: ( 1279 "cluster", 1280 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1281 ), 1282 TokenType.DISTRIBUTE_BY: lambda self: ( 1283 "distribute", 1284 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1285 ), 1286 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1287 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1288 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1289 } 1290 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1291 1292 SET_PARSERS = { 1293 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1294 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1295 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1296 "TRANSACTION": lambda self: self._parse_set_transaction(), 1297 } 1298 1299 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1300 1301 TYPE_LITERAL_PARSERS = { 1302 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1303 } 1304 1305 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1306 1307 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1308 1309 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1310 1311 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1312 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1313 "ISOLATION": ( 1314 ("LEVEL", "REPEATABLE", "READ"), 1315 ("LEVEL", "READ", "COMMITTED"), 1316 ("LEVEL", "READ", "UNCOMITTED"), 1317 ("LEVEL", "SERIALIZABLE"), 1318 ), 1319 "READ": ("WRITE", "ONLY"), 1320 } 1321 1322 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1323 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1324 ) 1325 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1326 1327 CREATE_SEQUENCE: OPTIONS_TYPE = { 1328 "SCALE": ("EXTEND", "NOEXTEND"), 1329 "SHARD": ("EXTEND", "NOEXTEND"), 1330 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1331 **dict.fromkeys( 1332 ( 1333 "SESSION", 1334 "GLOBAL", 1335 "KEEP", 1336 "NOKEEP", 1337 "ORDER", 1338 "NOORDER", 1339 "NOCACHE", 1340 "CYCLE", 1341 "NOCYCLE", 1342 "NOMINVALUE", 1343 "NOMAXVALUE", 1344 "NOSCALE", 1345 "NOSHARD", 1346 ), 1347 tuple(), 1348 ), 1349 } 1350 1351 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1352 1353 USABLES: OPTIONS_TYPE = dict.fromkeys( 1354 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1355 ) 1356 1357 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1358 1359 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1360 "TYPE": ("EVOLUTION",), 1361 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1362 } 1363 1364 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1365 1366 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1367 1368 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1369 "NOT": ("ENFORCED",), 1370 "MATCH": ( 1371 "FULL", 1372 "PARTIAL", 1373 "SIMPLE", 1374 ), 1375 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1376 "USING": ( 1377 "BTREE", 1378 "HASH", 1379 ), 1380 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1381 } 1382 1383 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1384 "NO": ("OTHERS",), 1385 "CURRENT": ("ROW",), 1386 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1387 } 1388 1389 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1390 1391 CLONE_KEYWORDS = {"CLONE", "COPY"} 1392 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1393 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1394 1395 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1396 1397 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1398 1399 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1400 1401 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1402 1403 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1404 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1405 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1406 1407 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1408 1409 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1410 1411 ADD_CONSTRAINT_TOKENS = { 1412 TokenType.CONSTRAINT, 1413 TokenType.FOREIGN_KEY, 1414 TokenType.INDEX, 1415 TokenType.KEY, 1416 TokenType.PRIMARY_KEY, 1417 TokenType.UNIQUE, 1418 } 1419 1420 DISTINCT_TOKENS = {TokenType.DISTINCT} 1421 1422 NULL_TOKENS = {TokenType.NULL} 1423 1424 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1425 1426 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1427 1428 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1429 1430 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1431 1432 ODBC_DATETIME_LITERALS = { 1433 "d": exp.Date, 1434 "t": exp.Time, 1435 "ts": exp.Timestamp, 1436 } 1437 1438 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1439 1440 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1441 1442 # The style options for the DESCRIBE statement 1443 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1444 1445 # The style options for the ANALYZE statement 1446 ANALYZE_STYLES = { 1447 "BUFFER_USAGE_LIMIT", 1448 "FULL", 1449 "LOCAL", 1450 "NO_WRITE_TO_BINLOG", 1451 "SAMPLE", 1452 "SKIP_LOCKED", 1453 "VERBOSE", 1454 } 1455 1456 ANALYZE_EXPRESSION_PARSERS = { 1457 "ALL": lambda self: self._parse_analyze_columns(), 1458 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1459 "DELETE": lambda self: self._parse_analyze_delete(), 1460 "DROP": lambda self: self._parse_analyze_histogram(), 1461 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1462 "LIST": lambda self: self._parse_analyze_list(), 1463 "PREDICATE": lambda self: self._parse_analyze_columns(), 1464 "UPDATE": lambda self: self._parse_analyze_histogram(), 1465 "VALIDATE": lambda self: self._parse_analyze_validate(), 1466 } 1467 1468 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1469 1470 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1471 1472 OPERATION_MODIFIERS: t.Set[str] = set() 1473 1474 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1475 1476 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1477 1478 STRICT_CAST = True 1479 1480 PREFIXED_PIVOT_COLUMNS = False 1481 IDENTIFY_PIVOT_STRINGS = False 1482 1483 LOG_DEFAULTS_TO_LN = False 1484 1485 # Whether the table sample clause expects CSV syntax 1486 TABLESAMPLE_CSV = False 1487 1488 # The default method used for table sampling 1489 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1490 1491 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1492 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1493 1494 # Whether the TRIM function expects the characters to trim as its first argument 1495 TRIM_PATTERN_FIRST = False 1496 1497 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1498 STRING_ALIASES = False 1499 1500 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1501 MODIFIERS_ATTACHED_TO_SET_OP = True 1502 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1503 1504 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1505 NO_PAREN_IF_COMMANDS = True 1506 1507 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1508 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1509 1510 # Whether the `:` operator is used to extract a value from a VARIANT column 1511 COLON_IS_VARIANT_EXTRACT = False 1512 1513 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1514 # If this is True and '(' is not found, the keyword will be treated as an identifier 1515 VALUES_FOLLOWED_BY_PAREN = True 1516 1517 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1518 SUPPORTS_IMPLICIT_UNNEST = False 1519 1520 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1521 INTERVAL_SPANS = True 1522 1523 # Whether a PARTITION clause can follow a table reference 1524 SUPPORTS_PARTITION_SELECTION = False 1525 1526 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1527 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1528 1529 # Whether the 'AS' keyword is optional in the CTE definition syntax 1530 OPTIONAL_ALIAS_TOKEN_CTE = True 1531 1532 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1533 ALTER_RENAME_REQUIRES_COLUMN = True 1534 1535 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1536 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1537 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1538 # as BigQuery, where all joins have the same precedence. 1539 JOINS_HAVE_EQUAL_PRECEDENCE = False 1540 1541 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1542 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1543 1544 # Whether map literals support arbitrary expressions as keys. 1545 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1546 # When False, keys are typically restricted to identifiers. 1547 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1548 1549 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1550 # is true for Snowflake but not for BigQuery which can also process strings 1551 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1552 1553 __slots__ = ( 1554 "error_level", 1555 "error_message_context", 1556 "max_errors", 1557 "dialect", 1558 "sql", 1559 "errors", 1560 "_tokens", 1561 "_index", 1562 "_curr", 1563 "_next", 1564 "_prev", 1565 "_prev_comments", 1566 "_pipe_cte_counter", 1567 ) 1568 1569 # Autofilled 1570 SHOW_TRIE: t.Dict = {} 1571 SET_TRIE: t.Dict = {} 1572 1573 def __init__( 1574 self, 1575 error_level: t.Optional[ErrorLevel] = None, 1576 error_message_context: int = 100, 1577 max_errors: int = 3, 1578 dialect: DialectType = None, 1579 ): 1580 from sqlglot.dialects import Dialect 1581 1582 self.error_level = error_level or ErrorLevel.IMMEDIATE 1583 self.error_message_context = error_message_context 1584 self.max_errors = max_errors 1585 self.dialect = Dialect.get_or_raise(dialect) 1586 self.reset() 1587 1588 def reset(self): 1589 self.sql = "" 1590 self.errors = [] 1591 self._tokens = [] 1592 self._index = 0 1593 self._curr = None 1594 self._next = None 1595 self._prev = None 1596 self._prev_comments = None 1597 self._pipe_cte_counter = 0 1598 1599 def parse( 1600 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1601 ) -> t.List[t.Optional[exp.Expression]]: 1602 """ 1603 Parses a list of tokens and returns a list of syntax trees, one tree 1604 per parsed SQL statement. 1605 1606 Args: 1607 raw_tokens: The list of tokens. 1608 sql: The original SQL string, used to produce helpful debug messages. 1609 1610 Returns: 1611 The list of the produced syntax trees. 1612 """ 1613 return self._parse( 1614 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1615 ) 1616 1617 def parse_into( 1618 self, 1619 expression_types: exp.IntoType, 1620 raw_tokens: t.List[Token], 1621 sql: t.Optional[str] = None, 1622 ) -> t.List[t.Optional[exp.Expression]]: 1623 """ 1624 Parses a list of tokens into a given Expression type. If a collection of Expression 1625 types is given instead, this method will try to parse the token list into each one 1626 of them, stopping at the first for which the parsing succeeds. 1627 1628 Args: 1629 expression_types: The expression type(s) to try and parse the token list into. 1630 raw_tokens: The list of tokens. 1631 sql: The original SQL string, used to produce helpful debug messages. 1632 1633 Returns: 1634 The target Expression. 1635 """ 1636 errors = [] 1637 for expression_type in ensure_list(expression_types): 1638 parser = self.EXPRESSION_PARSERS.get(expression_type) 1639 if not parser: 1640 raise TypeError(f"No parser registered for {expression_type}") 1641 1642 try: 1643 return self._parse(parser, raw_tokens, sql) 1644 except ParseError as e: 1645 e.errors[0]["into_expression"] = expression_type 1646 errors.append(e) 1647 1648 raise ParseError( 1649 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1650 errors=merge_errors(errors), 1651 ) from errors[-1] 1652 1653 def _parse( 1654 self, 1655 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1656 raw_tokens: t.List[Token], 1657 sql: t.Optional[str] = None, 1658 ) -> t.List[t.Optional[exp.Expression]]: 1659 self.reset() 1660 self.sql = sql or "" 1661 1662 total = len(raw_tokens) 1663 chunks: t.List[t.List[Token]] = [[]] 1664 1665 for i, token in enumerate(raw_tokens): 1666 if token.token_type == TokenType.SEMICOLON: 1667 if token.comments: 1668 chunks.append([token]) 1669 1670 if i < total - 1: 1671 chunks.append([]) 1672 else: 1673 chunks[-1].append(token) 1674 1675 expressions = [] 1676 1677 for tokens in chunks: 1678 self._index = -1 1679 self._tokens = tokens 1680 self._advance() 1681 1682 expressions.append(parse_method(self)) 1683 1684 if self._index < len(self._tokens): 1685 self.raise_error("Invalid expression / Unexpected token") 1686 1687 self.check_errors() 1688 1689 return expressions 1690 1691 def check_errors(self) -> None: 1692 """Logs or raises any found errors, depending on the chosen error level setting.""" 1693 if self.error_level == ErrorLevel.WARN: 1694 for error in self.errors: 1695 logger.error(str(error)) 1696 elif self.error_level == ErrorLevel.RAISE and self.errors: 1697 raise ParseError( 1698 concat_messages(self.errors, self.max_errors), 1699 errors=merge_errors(self.errors), 1700 ) 1701 1702 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1703 """ 1704 Appends an error in the list of recorded errors or raises it, depending on the chosen 1705 error level setting. 1706 """ 1707 token = token or self._curr or self._prev or Token.string("") 1708 start = token.start 1709 end = token.end + 1 1710 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1711 highlight = self.sql[start:end] 1712 end_context = self.sql[end : end + self.error_message_context] 1713 1714 error = ParseError.new( 1715 f"{message}. Line {token.line}, Col: {token.col}.\n" 1716 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1717 description=message, 1718 line=token.line, 1719 col=token.col, 1720 start_context=start_context, 1721 highlight=highlight, 1722 end_context=end_context, 1723 ) 1724 1725 if self.error_level == ErrorLevel.IMMEDIATE: 1726 raise error 1727 1728 self.errors.append(error) 1729 1730 def expression( 1731 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1732 ) -> E: 1733 """ 1734 Creates a new, validated Expression. 1735 1736 Args: 1737 exp_class: The expression class to instantiate. 1738 comments: An optional list of comments to attach to the expression. 1739 kwargs: The arguments to set for the expression along with their respective values. 1740 1741 Returns: 1742 The target expression. 1743 """ 1744 instance = exp_class(**kwargs) 1745 instance.add_comments(comments) if comments else self._add_comments(instance) 1746 return self.validate_expression(instance) 1747 1748 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1749 if expression and self._prev_comments: 1750 expression.add_comments(self._prev_comments) 1751 self._prev_comments = None 1752 1753 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1754 """ 1755 Validates an Expression, making sure that all its mandatory arguments are set. 1756 1757 Args: 1758 expression: The expression to validate. 1759 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1760 1761 Returns: 1762 The validated expression. 1763 """ 1764 if self.error_level != ErrorLevel.IGNORE: 1765 for error_message in expression.error_messages(args): 1766 self.raise_error(error_message) 1767 1768 return expression 1769 1770 def _find_sql(self, start: Token, end: Token) -> str: 1771 return self.sql[start.start : end.end + 1] 1772 1773 def _is_connected(self) -> bool: 1774 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1775 1776 def _advance(self, times: int = 1) -> None: 1777 self._index += times 1778 self._curr = seq_get(self._tokens, self._index) 1779 self._next = seq_get(self._tokens, self._index + 1) 1780 1781 if self._index > 0: 1782 self._prev = self._tokens[self._index - 1] 1783 self._prev_comments = self._prev.comments 1784 else: 1785 self._prev = None 1786 self._prev_comments = None 1787 1788 def _retreat(self, index: int) -> None: 1789 if index != self._index: 1790 self._advance(index - self._index) 1791 1792 def _warn_unsupported(self) -> None: 1793 if len(self._tokens) <= 1: 1794 return 1795 1796 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1797 # interested in emitting a warning for the one being currently processed. 1798 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1799 1800 logger.warning( 1801 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1802 ) 1803 1804 def _parse_command(self) -> exp.Command: 1805 self._warn_unsupported() 1806 return self.expression( 1807 exp.Command, 1808 comments=self._prev_comments, 1809 this=self._prev.text.upper(), 1810 expression=self._parse_string(), 1811 ) 1812 1813 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1814 """ 1815 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1816 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1817 solve this by setting & resetting the parser state accordingly 1818 """ 1819 index = self._index 1820 error_level = self.error_level 1821 1822 self.error_level = ErrorLevel.IMMEDIATE 1823 try: 1824 this = parse_method() 1825 except ParseError: 1826 this = None 1827 finally: 1828 if not this or retreat: 1829 self._retreat(index) 1830 self.error_level = error_level 1831 1832 return this 1833 1834 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1835 start = self._prev 1836 exists = self._parse_exists() if allow_exists else None 1837 1838 self._match(TokenType.ON) 1839 1840 materialized = self._match_text_seq("MATERIALIZED") 1841 kind = self._match_set(self.CREATABLES) and self._prev 1842 if not kind: 1843 return self._parse_as_command(start) 1844 1845 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1846 this = self._parse_user_defined_function(kind=kind.token_type) 1847 elif kind.token_type == TokenType.TABLE: 1848 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1849 elif kind.token_type == TokenType.COLUMN: 1850 this = self._parse_column() 1851 else: 1852 this = self._parse_id_var() 1853 1854 self._match(TokenType.IS) 1855 1856 return self.expression( 1857 exp.Comment, 1858 this=this, 1859 kind=kind.text, 1860 expression=self._parse_string(), 1861 exists=exists, 1862 materialized=materialized, 1863 ) 1864 1865 def _parse_to_table( 1866 self, 1867 ) -> exp.ToTableProperty: 1868 table = self._parse_table_parts(schema=True) 1869 return self.expression(exp.ToTableProperty, this=table) 1870 1871 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1872 def _parse_ttl(self) -> exp.Expression: 1873 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1874 this = self._parse_bitwise() 1875 1876 if self._match_text_seq("DELETE"): 1877 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1878 if self._match_text_seq("RECOMPRESS"): 1879 return self.expression( 1880 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1881 ) 1882 if self._match_text_seq("TO", "DISK"): 1883 return self.expression( 1884 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1885 ) 1886 if self._match_text_seq("TO", "VOLUME"): 1887 return self.expression( 1888 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1889 ) 1890 1891 return this 1892 1893 expressions = self._parse_csv(_parse_ttl_action) 1894 where = self._parse_where() 1895 group = self._parse_group() 1896 1897 aggregates = None 1898 if group and self._match(TokenType.SET): 1899 aggregates = self._parse_csv(self._parse_set_item) 1900 1901 return self.expression( 1902 exp.MergeTreeTTL, 1903 expressions=expressions, 1904 where=where, 1905 group=group, 1906 aggregates=aggregates, 1907 ) 1908 1909 def _parse_statement(self) -> t.Optional[exp.Expression]: 1910 if self._curr is None: 1911 return None 1912 1913 if self._match_set(self.STATEMENT_PARSERS): 1914 comments = self._prev_comments 1915 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1916 stmt.add_comments(comments, prepend=True) 1917 return stmt 1918 1919 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1920 return self._parse_command() 1921 1922 expression = self._parse_expression() 1923 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1924 return self._parse_query_modifiers(expression) 1925 1926 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1927 start = self._prev 1928 temporary = self._match(TokenType.TEMPORARY) 1929 materialized = self._match_text_seq("MATERIALIZED") 1930 1931 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1932 if not kind: 1933 return self._parse_as_command(start) 1934 1935 concurrently = self._match_text_seq("CONCURRENTLY") 1936 if_exists = exists or self._parse_exists() 1937 1938 if kind == "COLUMN": 1939 this = self._parse_column() 1940 else: 1941 this = self._parse_table_parts( 1942 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1943 ) 1944 1945 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1946 1947 if self._match(TokenType.L_PAREN, advance=False): 1948 expressions = self._parse_wrapped_csv(self._parse_types) 1949 else: 1950 expressions = None 1951 1952 return self.expression( 1953 exp.Drop, 1954 exists=if_exists, 1955 this=this, 1956 expressions=expressions, 1957 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1958 temporary=temporary, 1959 materialized=materialized, 1960 cascade=self._match_text_seq("CASCADE"), 1961 constraints=self._match_text_seq("CONSTRAINTS"), 1962 purge=self._match_text_seq("PURGE"), 1963 cluster=cluster, 1964 concurrently=concurrently, 1965 ) 1966 1967 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1968 return ( 1969 self._match_text_seq("IF") 1970 and (not not_ or self._match(TokenType.NOT)) 1971 and self._match(TokenType.EXISTS) 1972 ) 1973 1974 def _parse_create(self) -> exp.Create | exp.Command: 1975 # Note: this can't be None because we've matched a statement parser 1976 start = self._prev 1977 1978 replace = ( 1979 start.token_type == TokenType.REPLACE 1980 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1981 or self._match_pair(TokenType.OR, TokenType.ALTER) 1982 ) 1983 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1984 1985 unique = self._match(TokenType.UNIQUE) 1986 1987 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1988 clustered = True 1989 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1990 "COLUMNSTORE" 1991 ): 1992 clustered = False 1993 else: 1994 clustered = None 1995 1996 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1997 self._advance() 1998 1999 properties = None 2000 create_token = self._match_set(self.CREATABLES) and self._prev 2001 2002 if not create_token: 2003 # exp.Properties.Location.POST_CREATE 2004 properties = self._parse_properties() 2005 create_token = self._match_set(self.CREATABLES) and self._prev 2006 2007 if not properties or not create_token: 2008 return self._parse_as_command(start) 2009 2010 concurrently = self._match_text_seq("CONCURRENTLY") 2011 exists = self._parse_exists(not_=True) 2012 this = None 2013 expression: t.Optional[exp.Expression] = None 2014 indexes = None 2015 no_schema_binding = None 2016 begin = None 2017 end = None 2018 clone = None 2019 2020 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2021 nonlocal properties 2022 if properties and temp_props: 2023 properties.expressions.extend(temp_props.expressions) 2024 elif temp_props: 2025 properties = temp_props 2026 2027 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2028 this = self._parse_user_defined_function(kind=create_token.token_type) 2029 2030 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2031 extend_props(self._parse_properties()) 2032 2033 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2034 extend_props(self._parse_properties()) 2035 2036 if not expression: 2037 if self._match(TokenType.COMMAND): 2038 expression = self._parse_as_command(self._prev) 2039 else: 2040 begin = self._match(TokenType.BEGIN) 2041 return_ = self._match_text_seq("RETURN") 2042 2043 if self._match(TokenType.STRING, advance=False): 2044 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2045 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2046 expression = self._parse_string() 2047 extend_props(self._parse_properties()) 2048 else: 2049 expression = self._parse_user_defined_function_expression() 2050 2051 end = self._match_text_seq("END") 2052 2053 if return_: 2054 expression = self.expression(exp.Return, this=expression) 2055 elif create_token.token_type == TokenType.INDEX: 2056 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2057 if not self._match(TokenType.ON): 2058 index = self._parse_id_var() 2059 anonymous = False 2060 else: 2061 index = None 2062 anonymous = True 2063 2064 this = self._parse_index(index=index, anonymous=anonymous) 2065 elif create_token.token_type in self.DB_CREATABLES: 2066 table_parts = self._parse_table_parts( 2067 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2068 ) 2069 2070 # exp.Properties.Location.POST_NAME 2071 self._match(TokenType.COMMA) 2072 extend_props(self._parse_properties(before=True)) 2073 2074 this = self._parse_schema(this=table_parts) 2075 2076 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2077 extend_props(self._parse_properties()) 2078 2079 has_alias = self._match(TokenType.ALIAS) 2080 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2081 # exp.Properties.Location.POST_ALIAS 2082 extend_props(self._parse_properties()) 2083 2084 if create_token.token_type == TokenType.SEQUENCE: 2085 expression = self._parse_types() 2086 props = self._parse_properties() 2087 if props: 2088 sequence_props = exp.SequenceProperties() 2089 options = [] 2090 for prop in props: 2091 if isinstance(prop, exp.SequenceProperties): 2092 for arg, value in prop.args.items(): 2093 if arg == "options": 2094 options.extend(value) 2095 else: 2096 sequence_props.set(arg, value) 2097 prop.pop() 2098 2099 if options: 2100 sequence_props.set("options", options) 2101 2102 props.append("expressions", sequence_props) 2103 extend_props(props) 2104 else: 2105 expression = self._parse_ddl_select() 2106 2107 # Some dialects also support using a table as an alias instead of a SELECT. 2108 # Here we fallback to this as an alternative. 2109 if not expression and has_alias: 2110 expression = self._try_parse(self._parse_table_parts) 2111 2112 if create_token.token_type == TokenType.TABLE: 2113 # exp.Properties.Location.POST_EXPRESSION 2114 extend_props(self._parse_properties()) 2115 2116 indexes = [] 2117 while True: 2118 index = self._parse_index() 2119 2120 # exp.Properties.Location.POST_INDEX 2121 extend_props(self._parse_properties()) 2122 if not index: 2123 break 2124 else: 2125 self._match(TokenType.COMMA) 2126 indexes.append(index) 2127 elif create_token.token_type == TokenType.VIEW: 2128 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2129 no_schema_binding = True 2130 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2131 extend_props(self._parse_properties()) 2132 2133 shallow = self._match_text_seq("SHALLOW") 2134 2135 if self._match_texts(self.CLONE_KEYWORDS): 2136 copy = self._prev.text.lower() == "copy" 2137 clone = self.expression( 2138 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2139 ) 2140 2141 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2142 return self._parse_as_command(start) 2143 2144 create_kind_text = create_token.text.upper() 2145 return self.expression( 2146 exp.Create, 2147 this=this, 2148 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2149 replace=replace, 2150 refresh=refresh, 2151 unique=unique, 2152 expression=expression, 2153 exists=exists, 2154 properties=properties, 2155 indexes=indexes, 2156 no_schema_binding=no_schema_binding, 2157 begin=begin, 2158 end=end, 2159 clone=clone, 2160 concurrently=concurrently, 2161 clustered=clustered, 2162 ) 2163 2164 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2165 seq = exp.SequenceProperties() 2166 2167 options = [] 2168 index = self._index 2169 2170 while self._curr: 2171 self._match(TokenType.COMMA) 2172 if self._match_text_seq("INCREMENT"): 2173 self._match_text_seq("BY") 2174 self._match_text_seq("=") 2175 seq.set("increment", self._parse_term()) 2176 elif self._match_text_seq("MINVALUE"): 2177 seq.set("minvalue", self._parse_term()) 2178 elif self._match_text_seq("MAXVALUE"): 2179 seq.set("maxvalue", self._parse_term()) 2180 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2181 self._match_text_seq("=") 2182 seq.set("start", self._parse_term()) 2183 elif self._match_text_seq("CACHE"): 2184 # T-SQL allows empty CACHE which is initialized dynamically 2185 seq.set("cache", self._parse_number() or True) 2186 elif self._match_text_seq("OWNED", "BY"): 2187 # "OWNED BY NONE" is the default 2188 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2189 else: 2190 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2191 if opt: 2192 options.append(opt) 2193 else: 2194 break 2195 2196 seq.set("options", options if options else None) 2197 return None if self._index == index else seq 2198 2199 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2200 # only used for teradata currently 2201 self._match(TokenType.COMMA) 2202 2203 kwargs = { 2204 "no": self._match_text_seq("NO"), 2205 "dual": self._match_text_seq("DUAL"), 2206 "before": self._match_text_seq("BEFORE"), 2207 "default": self._match_text_seq("DEFAULT"), 2208 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2209 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2210 "after": self._match_text_seq("AFTER"), 2211 "minimum": self._match_texts(("MIN", "MINIMUM")), 2212 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2213 } 2214 2215 if self._match_texts(self.PROPERTY_PARSERS): 2216 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2217 try: 2218 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2219 except TypeError: 2220 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2221 2222 return None 2223 2224 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2225 return self._parse_wrapped_csv(self._parse_property) 2226 2227 def _parse_property(self) -> t.Optional[exp.Expression]: 2228 if self._match_texts(self.PROPERTY_PARSERS): 2229 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2230 2231 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2232 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2233 2234 if self._match_text_seq("COMPOUND", "SORTKEY"): 2235 return self._parse_sortkey(compound=True) 2236 2237 if self._match_text_seq("SQL", "SECURITY"): 2238 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2239 2240 index = self._index 2241 2242 seq_props = self._parse_sequence_properties() 2243 if seq_props: 2244 return seq_props 2245 2246 self._retreat(index) 2247 key = self._parse_column() 2248 2249 if not self._match(TokenType.EQ): 2250 self._retreat(index) 2251 return None 2252 2253 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2254 if isinstance(key, exp.Column): 2255 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2256 2257 value = self._parse_bitwise() or self._parse_var(any_token=True) 2258 2259 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2260 if isinstance(value, exp.Column): 2261 value = exp.var(value.name) 2262 2263 return self.expression(exp.Property, this=key, value=value) 2264 2265 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2266 if self._match_text_seq("BY"): 2267 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2268 2269 self._match(TokenType.ALIAS) 2270 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2271 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2272 2273 return self.expression( 2274 exp.FileFormatProperty, 2275 this=( 2276 self.expression( 2277 exp.InputOutputFormat, 2278 input_format=input_format, 2279 output_format=output_format, 2280 ) 2281 if input_format or output_format 2282 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2283 ), 2284 hive_format=True, 2285 ) 2286 2287 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2288 field = self._parse_field() 2289 if isinstance(field, exp.Identifier) and not field.quoted: 2290 field = exp.var(field) 2291 2292 return field 2293 2294 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2295 self._match(TokenType.EQ) 2296 self._match(TokenType.ALIAS) 2297 2298 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2299 2300 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2301 properties = [] 2302 while True: 2303 if before: 2304 prop = self._parse_property_before() 2305 else: 2306 prop = self._parse_property() 2307 if not prop: 2308 break 2309 for p in ensure_list(prop): 2310 properties.append(p) 2311 2312 if properties: 2313 return self.expression(exp.Properties, expressions=properties) 2314 2315 return None 2316 2317 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2318 return self.expression( 2319 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2320 ) 2321 2322 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2323 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2324 security_specifier = self._prev.text.upper() 2325 return self.expression(exp.SecurityProperty, this=security_specifier) 2326 return None 2327 2328 def _parse_settings_property(self) -> exp.SettingsProperty: 2329 return self.expression( 2330 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2331 ) 2332 2333 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2334 if self._index >= 2: 2335 pre_volatile_token = self._tokens[self._index - 2] 2336 else: 2337 pre_volatile_token = None 2338 2339 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2340 return exp.VolatileProperty() 2341 2342 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2343 2344 def _parse_retention_period(self) -> exp.Var: 2345 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2346 number = self._parse_number() 2347 number_str = f"{number} " if number else "" 2348 unit = self._parse_var(any_token=True) 2349 return exp.var(f"{number_str}{unit}") 2350 2351 def _parse_system_versioning_property( 2352 self, with_: bool = False 2353 ) -> exp.WithSystemVersioningProperty: 2354 self._match(TokenType.EQ) 2355 prop = self.expression( 2356 exp.WithSystemVersioningProperty, 2357 **{ # type: ignore 2358 "on": True, 2359 "with": with_, 2360 }, 2361 ) 2362 2363 if self._match_text_seq("OFF"): 2364 prop.set("on", False) 2365 return prop 2366 2367 self._match(TokenType.ON) 2368 if self._match(TokenType.L_PAREN): 2369 while self._curr and not self._match(TokenType.R_PAREN): 2370 if self._match_text_seq("HISTORY_TABLE", "="): 2371 prop.set("this", self._parse_table_parts()) 2372 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2373 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2374 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2375 prop.set("retention_period", self._parse_retention_period()) 2376 2377 self._match(TokenType.COMMA) 2378 2379 return prop 2380 2381 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2382 self._match(TokenType.EQ) 2383 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2384 prop = self.expression(exp.DataDeletionProperty, on=on) 2385 2386 if self._match(TokenType.L_PAREN): 2387 while self._curr and not self._match(TokenType.R_PAREN): 2388 if self._match_text_seq("FILTER_COLUMN", "="): 2389 prop.set("filter_column", self._parse_column()) 2390 elif self._match_text_seq("RETENTION_PERIOD", "="): 2391 prop.set("retention_period", self._parse_retention_period()) 2392 2393 self._match(TokenType.COMMA) 2394 2395 return prop 2396 2397 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2398 kind = "HASH" 2399 expressions: t.Optional[t.List[exp.Expression]] = None 2400 if self._match_text_seq("BY", "HASH"): 2401 expressions = self._parse_wrapped_csv(self._parse_id_var) 2402 elif self._match_text_seq("BY", "RANDOM"): 2403 kind = "RANDOM" 2404 2405 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2406 buckets: t.Optional[exp.Expression] = None 2407 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2408 buckets = self._parse_number() 2409 2410 return self.expression( 2411 exp.DistributedByProperty, 2412 expressions=expressions, 2413 kind=kind, 2414 buckets=buckets, 2415 order=self._parse_order(), 2416 ) 2417 2418 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2419 self._match_text_seq("KEY") 2420 expressions = self._parse_wrapped_id_vars() 2421 return self.expression(expr_type, expressions=expressions) 2422 2423 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2424 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2425 prop = self._parse_system_versioning_property(with_=True) 2426 self._match_r_paren() 2427 return prop 2428 2429 if self._match(TokenType.L_PAREN, advance=False): 2430 return self._parse_wrapped_properties() 2431 2432 if self._match_text_seq("JOURNAL"): 2433 return self._parse_withjournaltable() 2434 2435 if self._match_texts(self.VIEW_ATTRIBUTES): 2436 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2437 2438 if self._match_text_seq("DATA"): 2439 return self._parse_withdata(no=False) 2440 elif self._match_text_seq("NO", "DATA"): 2441 return self._parse_withdata(no=True) 2442 2443 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2444 return self._parse_serde_properties(with_=True) 2445 2446 if self._match(TokenType.SCHEMA): 2447 return self.expression( 2448 exp.WithSchemaBindingProperty, 2449 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2450 ) 2451 2452 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2453 return self.expression( 2454 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2455 ) 2456 2457 if not self._next: 2458 return None 2459 2460 return self._parse_withisolatedloading() 2461 2462 def _parse_procedure_option(self) -> exp.Expression | None: 2463 if self._match_text_seq("EXECUTE", "AS"): 2464 return self.expression( 2465 exp.ExecuteAsProperty, 2466 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2467 or self._parse_string(), 2468 ) 2469 2470 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2471 2472 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2473 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2474 self._match(TokenType.EQ) 2475 2476 user = self._parse_id_var() 2477 self._match(TokenType.PARAMETER) 2478 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2479 2480 if not user or not host: 2481 return None 2482 2483 return exp.DefinerProperty(this=f"{user}@{host}") 2484 2485 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2486 self._match(TokenType.TABLE) 2487 self._match(TokenType.EQ) 2488 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2489 2490 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2491 return self.expression(exp.LogProperty, no=no) 2492 2493 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2494 return self.expression(exp.JournalProperty, **kwargs) 2495 2496 def _parse_checksum(self) -> exp.ChecksumProperty: 2497 self._match(TokenType.EQ) 2498 2499 on = None 2500 if self._match(TokenType.ON): 2501 on = True 2502 elif self._match_text_seq("OFF"): 2503 on = False 2504 2505 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2506 2507 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2508 return self.expression( 2509 exp.Cluster, 2510 expressions=( 2511 self._parse_wrapped_csv(self._parse_ordered) 2512 if wrapped 2513 else self._parse_csv(self._parse_ordered) 2514 ), 2515 ) 2516 2517 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2518 self._match_text_seq("BY") 2519 2520 self._match_l_paren() 2521 expressions = self._parse_csv(self._parse_column) 2522 self._match_r_paren() 2523 2524 if self._match_text_seq("SORTED", "BY"): 2525 self._match_l_paren() 2526 sorted_by = self._parse_csv(self._parse_ordered) 2527 self._match_r_paren() 2528 else: 2529 sorted_by = None 2530 2531 self._match(TokenType.INTO) 2532 buckets = self._parse_number() 2533 self._match_text_seq("BUCKETS") 2534 2535 return self.expression( 2536 exp.ClusteredByProperty, 2537 expressions=expressions, 2538 sorted_by=sorted_by, 2539 buckets=buckets, 2540 ) 2541 2542 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2543 if not self._match_text_seq("GRANTS"): 2544 self._retreat(self._index - 1) 2545 return None 2546 2547 return self.expression(exp.CopyGrantsProperty) 2548 2549 def _parse_freespace(self) -> exp.FreespaceProperty: 2550 self._match(TokenType.EQ) 2551 return self.expression( 2552 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2553 ) 2554 2555 def _parse_mergeblockratio( 2556 self, no: bool = False, default: bool = False 2557 ) -> exp.MergeBlockRatioProperty: 2558 if self._match(TokenType.EQ): 2559 return self.expression( 2560 exp.MergeBlockRatioProperty, 2561 this=self._parse_number(), 2562 percent=self._match(TokenType.PERCENT), 2563 ) 2564 2565 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2566 2567 def _parse_datablocksize( 2568 self, 2569 default: t.Optional[bool] = None, 2570 minimum: t.Optional[bool] = None, 2571 maximum: t.Optional[bool] = None, 2572 ) -> exp.DataBlocksizeProperty: 2573 self._match(TokenType.EQ) 2574 size = self._parse_number() 2575 2576 units = None 2577 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2578 units = self._prev.text 2579 2580 return self.expression( 2581 exp.DataBlocksizeProperty, 2582 size=size, 2583 units=units, 2584 default=default, 2585 minimum=minimum, 2586 maximum=maximum, 2587 ) 2588 2589 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2590 self._match(TokenType.EQ) 2591 always = self._match_text_seq("ALWAYS") 2592 manual = self._match_text_seq("MANUAL") 2593 never = self._match_text_seq("NEVER") 2594 default = self._match_text_seq("DEFAULT") 2595 2596 autotemp = None 2597 if self._match_text_seq("AUTOTEMP"): 2598 autotemp = self._parse_schema() 2599 2600 return self.expression( 2601 exp.BlockCompressionProperty, 2602 always=always, 2603 manual=manual, 2604 never=never, 2605 default=default, 2606 autotemp=autotemp, 2607 ) 2608 2609 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2610 index = self._index 2611 no = self._match_text_seq("NO") 2612 concurrent = self._match_text_seq("CONCURRENT") 2613 2614 if not self._match_text_seq("ISOLATED", "LOADING"): 2615 self._retreat(index) 2616 return None 2617 2618 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2619 return self.expression( 2620 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2621 ) 2622 2623 def _parse_locking(self) -> exp.LockingProperty: 2624 if self._match(TokenType.TABLE): 2625 kind = "TABLE" 2626 elif self._match(TokenType.VIEW): 2627 kind = "VIEW" 2628 elif self._match(TokenType.ROW): 2629 kind = "ROW" 2630 elif self._match_text_seq("DATABASE"): 2631 kind = "DATABASE" 2632 else: 2633 kind = None 2634 2635 if kind in ("DATABASE", "TABLE", "VIEW"): 2636 this = self._parse_table_parts() 2637 else: 2638 this = None 2639 2640 if self._match(TokenType.FOR): 2641 for_or_in = "FOR" 2642 elif self._match(TokenType.IN): 2643 for_or_in = "IN" 2644 else: 2645 for_or_in = None 2646 2647 if self._match_text_seq("ACCESS"): 2648 lock_type = "ACCESS" 2649 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2650 lock_type = "EXCLUSIVE" 2651 elif self._match_text_seq("SHARE"): 2652 lock_type = "SHARE" 2653 elif self._match_text_seq("READ"): 2654 lock_type = "READ" 2655 elif self._match_text_seq("WRITE"): 2656 lock_type = "WRITE" 2657 elif self._match_text_seq("CHECKSUM"): 2658 lock_type = "CHECKSUM" 2659 else: 2660 lock_type = None 2661 2662 override = self._match_text_seq("OVERRIDE") 2663 2664 return self.expression( 2665 exp.LockingProperty, 2666 this=this, 2667 kind=kind, 2668 for_or_in=for_or_in, 2669 lock_type=lock_type, 2670 override=override, 2671 ) 2672 2673 def _parse_partition_by(self) -> t.List[exp.Expression]: 2674 if self._match(TokenType.PARTITION_BY): 2675 return self._parse_csv(self._parse_assignment) 2676 return [] 2677 2678 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2679 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2680 if self._match_text_seq("MINVALUE"): 2681 return exp.var("MINVALUE") 2682 if self._match_text_seq("MAXVALUE"): 2683 return exp.var("MAXVALUE") 2684 return self._parse_bitwise() 2685 2686 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2687 expression = None 2688 from_expressions = None 2689 to_expressions = None 2690 2691 if self._match(TokenType.IN): 2692 this = self._parse_wrapped_csv(self._parse_bitwise) 2693 elif self._match(TokenType.FROM): 2694 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2695 self._match_text_seq("TO") 2696 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2697 elif self._match_text_seq("WITH", "(", "MODULUS"): 2698 this = self._parse_number() 2699 self._match_text_seq(",", "REMAINDER") 2700 expression = self._parse_number() 2701 self._match_r_paren() 2702 else: 2703 self.raise_error("Failed to parse partition bound spec.") 2704 2705 return self.expression( 2706 exp.PartitionBoundSpec, 2707 this=this, 2708 expression=expression, 2709 from_expressions=from_expressions, 2710 to_expressions=to_expressions, 2711 ) 2712 2713 # https://www.postgresql.org/docs/current/sql-createtable.html 2714 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2715 if not self._match_text_seq("OF"): 2716 self._retreat(self._index - 1) 2717 return None 2718 2719 this = self._parse_table(schema=True) 2720 2721 if self._match(TokenType.DEFAULT): 2722 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2723 elif self._match_text_seq("FOR", "VALUES"): 2724 expression = self._parse_partition_bound_spec() 2725 else: 2726 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2727 2728 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2729 2730 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2731 self._match(TokenType.EQ) 2732 return self.expression( 2733 exp.PartitionedByProperty, 2734 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2735 ) 2736 2737 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2738 if self._match_text_seq("AND", "STATISTICS"): 2739 statistics = True 2740 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2741 statistics = False 2742 else: 2743 statistics = None 2744 2745 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2746 2747 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2748 if self._match_text_seq("SQL"): 2749 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2750 return None 2751 2752 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2753 if self._match_text_seq("SQL", "DATA"): 2754 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2755 return None 2756 2757 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2758 if self._match_text_seq("PRIMARY", "INDEX"): 2759 return exp.NoPrimaryIndexProperty() 2760 if self._match_text_seq("SQL"): 2761 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2762 return None 2763 2764 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2765 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2766 return exp.OnCommitProperty() 2767 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2768 return exp.OnCommitProperty(delete=True) 2769 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2770 2771 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2772 if self._match_text_seq("SQL", "DATA"): 2773 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2774 return None 2775 2776 def _parse_distkey(self) -> exp.DistKeyProperty: 2777 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2778 2779 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2780 table = self._parse_table(schema=True) 2781 2782 options = [] 2783 while self._match_texts(("INCLUDING", "EXCLUDING")): 2784 this = self._prev.text.upper() 2785 2786 id_var = self._parse_id_var() 2787 if not id_var: 2788 return None 2789 2790 options.append( 2791 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2792 ) 2793 2794 return self.expression(exp.LikeProperty, this=table, expressions=options) 2795 2796 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2797 return self.expression( 2798 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2799 ) 2800 2801 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2802 self._match(TokenType.EQ) 2803 return self.expression( 2804 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2805 ) 2806 2807 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2808 self._match_text_seq("WITH", "CONNECTION") 2809 return self.expression( 2810 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2811 ) 2812 2813 def _parse_returns(self) -> exp.ReturnsProperty: 2814 value: t.Optional[exp.Expression] 2815 null = None 2816 is_table = self._match(TokenType.TABLE) 2817 2818 if is_table: 2819 if self._match(TokenType.LT): 2820 value = self.expression( 2821 exp.Schema, 2822 this="TABLE", 2823 expressions=self._parse_csv(self._parse_struct_types), 2824 ) 2825 if not self._match(TokenType.GT): 2826 self.raise_error("Expecting >") 2827 else: 2828 value = self._parse_schema(exp.var("TABLE")) 2829 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2830 null = True 2831 value = None 2832 else: 2833 value = self._parse_types() 2834 2835 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2836 2837 def _parse_describe(self) -> exp.Describe: 2838 kind = self._match_set(self.CREATABLES) and self._prev.text 2839 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2840 if self._match(TokenType.DOT): 2841 style = None 2842 self._retreat(self._index - 2) 2843 2844 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2845 2846 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2847 this = self._parse_statement() 2848 else: 2849 this = self._parse_table(schema=True) 2850 2851 properties = self._parse_properties() 2852 expressions = properties.expressions if properties else None 2853 partition = self._parse_partition() 2854 return self.expression( 2855 exp.Describe, 2856 this=this, 2857 style=style, 2858 kind=kind, 2859 expressions=expressions, 2860 partition=partition, 2861 format=format, 2862 ) 2863 2864 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2865 kind = self._prev.text.upper() 2866 expressions = [] 2867 2868 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2869 if self._match(TokenType.WHEN): 2870 expression = self._parse_disjunction() 2871 self._match(TokenType.THEN) 2872 else: 2873 expression = None 2874 2875 else_ = self._match(TokenType.ELSE) 2876 2877 if not self._match(TokenType.INTO): 2878 return None 2879 2880 return self.expression( 2881 exp.ConditionalInsert, 2882 this=self.expression( 2883 exp.Insert, 2884 this=self._parse_table(schema=True), 2885 expression=self._parse_derived_table_values(), 2886 ), 2887 expression=expression, 2888 else_=else_, 2889 ) 2890 2891 expression = parse_conditional_insert() 2892 while expression is not None: 2893 expressions.append(expression) 2894 expression = parse_conditional_insert() 2895 2896 return self.expression( 2897 exp.MultitableInserts, 2898 kind=kind, 2899 comments=comments, 2900 expressions=expressions, 2901 source=self._parse_table(), 2902 ) 2903 2904 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2905 comments = [] 2906 hint = self._parse_hint() 2907 overwrite = self._match(TokenType.OVERWRITE) 2908 ignore = self._match(TokenType.IGNORE) 2909 local = self._match_text_seq("LOCAL") 2910 alternative = None 2911 is_function = None 2912 2913 if self._match_text_seq("DIRECTORY"): 2914 this: t.Optional[exp.Expression] = self.expression( 2915 exp.Directory, 2916 this=self._parse_var_or_string(), 2917 local=local, 2918 row_format=self._parse_row_format(match_row=True), 2919 ) 2920 else: 2921 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2922 comments += ensure_list(self._prev_comments) 2923 return self._parse_multitable_inserts(comments) 2924 2925 if self._match(TokenType.OR): 2926 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2927 2928 self._match(TokenType.INTO) 2929 comments += ensure_list(self._prev_comments) 2930 self._match(TokenType.TABLE) 2931 is_function = self._match(TokenType.FUNCTION) 2932 2933 this = ( 2934 self._parse_table(schema=True, parse_partition=True) 2935 if not is_function 2936 else self._parse_function() 2937 ) 2938 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2939 this.set("alias", self._parse_table_alias()) 2940 2941 returning = self._parse_returning() 2942 2943 return self.expression( 2944 exp.Insert, 2945 comments=comments, 2946 hint=hint, 2947 is_function=is_function, 2948 this=this, 2949 stored=self._match_text_seq("STORED") and self._parse_stored(), 2950 by_name=self._match_text_seq("BY", "NAME"), 2951 exists=self._parse_exists(), 2952 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2953 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2954 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2955 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2956 conflict=self._parse_on_conflict(), 2957 returning=returning or self._parse_returning(), 2958 overwrite=overwrite, 2959 alternative=alternative, 2960 ignore=ignore, 2961 source=self._match(TokenType.TABLE) and self._parse_table(), 2962 ) 2963 2964 def _parse_kill(self) -> exp.Kill: 2965 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2966 2967 return self.expression( 2968 exp.Kill, 2969 this=self._parse_primary(), 2970 kind=kind, 2971 ) 2972 2973 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2974 conflict = self._match_text_seq("ON", "CONFLICT") 2975 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2976 2977 if not conflict and not duplicate: 2978 return None 2979 2980 conflict_keys = None 2981 constraint = None 2982 2983 if conflict: 2984 if self._match_text_seq("ON", "CONSTRAINT"): 2985 constraint = self._parse_id_var() 2986 elif self._match(TokenType.L_PAREN): 2987 conflict_keys = self._parse_csv(self._parse_id_var) 2988 self._match_r_paren() 2989 2990 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2991 if self._prev.token_type == TokenType.UPDATE: 2992 self._match(TokenType.SET) 2993 expressions = self._parse_csv(self._parse_equality) 2994 else: 2995 expressions = None 2996 2997 return self.expression( 2998 exp.OnConflict, 2999 duplicate=duplicate, 3000 expressions=expressions, 3001 action=action, 3002 conflict_keys=conflict_keys, 3003 constraint=constraint, 3004 where=self._parse_where(), 3005 ) 3006 3007 def _parse_returning(self) -> t.Optional[exp.Returning]: 3008 if not self._match(TokenType.RETURNING): 3009 return None 3010 return self.expression( 3011 exp.Returning, 3012 expressions=self._parse_csv(self._parse_expression), 3013 into=self._match(TokenType.INTO) and self._parse_table_part(), 3014 ) 3015 3016 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3017 if not self._match(TokenType.FORMAT): 3018 return None 3019 return self._parse_row_format() 3020 3021 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3022 index = self._index 3023 with_ = with_ or self._match_text_seq("WITH") 3024 3025 if not self._match(TokenType.SERDE_PROPERTIES): 3026 self._retreat(index) 3027 return None 3028 return self.expression( 3029 exp.SerdeProperties, 3030 **{ # type: ignore 3031 "expressions": self._parse_wrapped_properties(), 3032 "with": with_, 3033 }, 3034 ) 3035 3036 def _parse_row_format( 3037 self, match_row: bool = False 3038 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3039 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3040 return None 3041 3042 if self._match_text_seq("SERDE"): 3043 this = self._parse_string() 3044 3045 serde_properties = self._parse_serde_properties() 3046 3047 return self.expression( 3048 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3049 ) 3050 3051 self._match_text_seq("DELIMITED") 3052 3053 kwargs = {} 3054 3055 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3056 kwargs["fields"] = self._parse_string() 3057 if self._match_text_seq("ESCAPED", "BY"): 3058 kwargs["escaped"] = self._parse_string() 3059 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3060 kwargs["collection_items"] = self._parse_string() 3061 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3062 kwargs["map_keys"] = self._parse_string() 3063 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3064 kwargs["lines"] = self._parse_string() 3065 if self._match_text_seq("NULL", "DEFINED", "AS"): 3066 kwargs["null"] = self._parse_string() 3067 3068 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3069 3070 def _parse_load(self) -> exp.LoadData | exp.Command: 3071 if self._match_text_seq("DATA"): 3072 local = self._match_text_seq("LOCAL") 3073 self._match_text_seq("INPATH") 3074 inpath = self._parse_string() 3075 overwrite = self._match(TokenType.OVERWRITE) 3076 self._match_pair(TokenType.INTO, TokenType.TABLE) 3077 3078 return self.expression( 3079 exp.LoadData, 3080 this=self._parse_table(schema=True), 3081 local=local, 3082 overwrite=overwrite, 3083 inpath=inpath, 3084 partition=self._parse_partition(), 3085 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3086 serde=self._match_text_seq("SERDE") and self._parse_string(), 3087 ) 3088 return self._parse_as_command(self._prev) 3089 3090 def _parse_delete(self) -> exp.Delete: 3091 # This handles MySQL's "Multiple-Table Syntax" 3092 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3093 tables = None 3094 if not self._match(TokenType.FROM, advance=False): 3095 tables = self._parse_csv(self._parse_table) or None 3096 3097 returning = self._parse_returning() 3098 3099 return self.expression( 3100 exp.Delete, 3101 tables=tables, 3102 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3103 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3104 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3105 where=self._parse_where(), 3106 returning=returning or self._parse_returning(), 3107 limit=self._parse_limit(), 3108 ) 3109 3110 def _parse_update(self) -> exp.Update: 3111 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3112 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3113 returning = self._parse_returning() 3114 return self.expression( 3115 exp.Update, 3116 **{ # type: ignore 3117 "this": this, 3118 "expressions": expressions, 3119 "from": self._parse_from(joins=True), 3120 "where": self._parse_where(), 3121 "returning": returning or self._parse_returning(), 3122 "order": self._parse_order(), 3123 "limit": self._parse_limit(), 3124 }, 3125 ) 3126 3127 def _parse_use(self) -> exp.Use: 3128 return self.expression( 3129 exp.Use, 3130 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3131 this=self._parse_table(schema=False), 3132 ) 3133 3134 def _parse_uncache(self) -> exp.Uncache: 3135 if not self._match(TokenType.TABLE): 3136 self.raise_error("Expecting TABLE after UNCACHE") 3137 3138 return self.expression( 3139 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3140 ) 3141 3142 def _parse_cache(self) -> exp.Cache: 3143 lazy = self._match_text_seq("LAZY") 3144 self._match(TokenType.TABLE) 3145 table = self._parse_table(schema=True) 3146 3147 options = [] 3148 if self._match_text_seq("OPTIONS"): 3149 self._match_l_paren() 3150 k = self._parse_string() 3151 self._match(TokenType.EQ) 3152 v = self._parse_string() 3153 options = [k, v] 3154 self._match_r_paren() 3155 3156 self._match(TokenType.ALIAS) 3157 return self.expression( 3158 exp.Cache, 3159 this=table, 3160 lazy=lazy, 3161 options=options, 3162 expression=self._parse_select(nested=True), 3163 ) 3164 3165 def _parse_partition(self) -> t.Optional[exp.Partition]: 3166 if not self._match_texts(self.PARTITION_KEYWORDS): 3167 return None 3168 3169 return self.expression( 3170 exp.Partition, 3171 subpartition=self._prev.text.upper() == "SUBPARTITION", 3172 expressions=self._parse_wrapped_csv(self._parse_assignment), 3173 ) 3174 3175 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3176 def _parse_value_expression() -> t.Optional[exp.Expression]: 3177 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3178 return exp.var(self._prev.text.upper()) 3179 return self._parse_expression() 3180 3181 if self._match(TokenType.L_PAREN): 3182 expressions = self._parse_csv(_parse_value_expression) 3183 self._match_r_paren() 3184 return self.expression(exp.Tuple, expressions=expressions) 3185 3186 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3187 expression = self._parse_expression() 3188 if expression: 3189 return self.expression(exp.Tuple, expressions=[expression]) 3190 return None 3191 3192 def _parse_projections(self) -> t.List[exp.Expression]: 3193 return self._parse_expressions() 3194 3195 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3196 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3197 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3198 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3199 ) 3200 elif self._match(TokenType.FROM): 3201 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3202 # Support parentheses for duckdb FROM-first syntax 3203 select = self._parse_select() 3204 if select: 3205 select.set("from", from_) 3206 this = select 3207 else: 3208 this = exp.select("*").from_(t.cast(exp.From, from_)) 3209 else: 3210 this = ( 3211 self._parse_table(consume_pipe=True) 3212 if table 3213 else self._parse_select(nested=True, parse_set_operation=False) 3214 ) 3215 3216 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3217 # in case a modifier (e.g. join) is following 3218 if table and isinstance(this, exp.Values) and this.alias: 3219 alias = this.args["alias"].pop() 3220 this = exp.Table(this=this, alias=alias) 3221 3222 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3223 3224 return this 3225 3226 def _parse_select( 3227 self, 3228 nested: bool = False, 3229 table: bool = False, 3230 parse_subquery_alias: bool = True, 3231 parse_set_operation: bool = True, 3232 consume_pipe: bool = True, 3233 ) -> t.Optional[exp.Expression]: 3234 query = self._parse_select_query( 3235 nested=nested, 3236 table=table, 3237 parse_subquery_alias=parse_subquery_alias, 3238 parse_set_operation=parse_set_operation, 3239 ) 3240 3241 if ( 3242 consume_pipe 3243 and self._match(TokenType.PIPE_GT, advance=False) 3244 and isinstance(query, exp.Query) 3245 ): 3246 query = self._parse_pipe_syntax_query(query) 3247 query = query.subquery(copy=False) if query and table else query 3248 3249 return query 3250 3251 def _parse_select_query( 3252 self, 3253 nested: bool = False, 3254 table: bool = False, 3255 parse_subquery_alias: bool = True, 3256 parse_set_operation: bool = True, 3257 ) -> t.Optional[exp.Expression]: 3258 cte = self._parse_with() 3259 3260 if cte: 3261 this = self._parse_statement() 3262 3263 if not this: 3264 self.raise_error("Failed to parse any statement following CTE") 3265 return cte 3266 3267 if "with" in this.arg_types: 3268 this.set("with", cte) 3269 else: 3270 self.raise_error(f"{this.key} does not support CTE") 3271 this = cte 3272 3273 return this 3274 3275 # duckdb supports leading with FROM x 3276 from_ = ( 3277 self._parse_from(consume_pipe=True) 3278 if self._match(TokenType.FROM, advance=False) 3279 else None 3280 ) 3281 3282 if self._match(TokenType.SELECT): 3283 comments = self._prev_comments 3284 3285 hint = self._parse_hint() 3286 3287 if self._next and not self._next.token_type == TokenType.DOT: 3288 all_ = self._match(TokenType.ALL) 3289 distinct = self._match_set(self.DISTINCT_TOKENS) 3290 else: 3291 all_, distinct = None, None 3292 3293 kind = ( 3294 self._match(TokenType.ALIAS) 3295 and self._match_texts(("STRUCT", "VALUE")) 3296 and self._prev.text.upper() 3297 ) 3298 3299 if distinct: 3300 distinct = self.expression( 3301 exp.Distinct, 3302 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3303 ) 3304 3305 if all_ and distinct: 3306 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3307 3308 operation_modifiers = [] 3309 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3310 operation_modifiers.append(exp.var(self._prev.text.upper())) 3311 3312 limit = self._parse_limit(top=True) 3313 projections = self._parse_projections() 3314 3315 this = self.expression( 3316 exp.Select, 3317 kind=kind, 3318 hint=hint, 3319 distinct=distinct, 3320 expressions=projections, 3321 limit=limit, 3322 operation_modifiers=operation_modifiers or None, 3323 ) 3324 this.comments = comments 3325 3326 into = self._parse_into() 3327 if into: 3328 this.set("into", into) 3329 3330 if not from_: 3331 from_ = self._parse_from() 3332 3333 if from_: 3334 this.set("from", from_) 3335 3336 this = self._parse_query_modifiers(this) 3337 elif (table or nested) and self._match(TokenType.L_PAREN): 3338 this = self._parse_wrapped_select(table=table) 3339 3340 # We return early here so that the UNION isn't attached to the subquery by the 3341 # following call to _parse_set_operations, but instead becomes the parent node 3342 self._match_r_paren() 3343 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3344 elif self._match(TokenType.VALUES, advance=False): 3345 this = self._parse_derived_table_values() 3346 elif from_: 3347 this = exp.select("*").from_(from_.this, copy=False) 3348 elif self._match(TokenType.SUMMARIZE): 3349 table = self._match(TokenType.TABLE) 3350 this = self._parse_select() or self._parse_string() or self._parse_table() 3351 return self.expression(exp.Summarize, this=this, table=table) 3352 elif self._match(TokenType.DESCRIBE): 3353 this = self._parse_describe() 3354 elif self._match_text_seq("STREAM"): 3355 this = self._parse_function() 3356 if this: 3357 this = self.expression(exp.Stream, this=this) 3358 else: 3359 self._retreat(self._index - 1) 3360 else: 3361 this = None 3362 3363 return self._parse_set_operations(this) if parse_set_operation else this 3364 3365 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3366 self._match_text_seq("SEARCH") 3367 3368 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3369 3370 if not kind: 3371 return None 3372 3373 self._match_text_seq("FIRST", "BY") 3374 3375 return self.expression( 3376 exp.RecursiveWithSearch, 3377 kind=kind, 3378 this=self._parse_id_var(), 3379 expression=self._match_text_seq("SET") and self._parse_id_var(), 3380 using=self._match_text_seq("USING") and self._parse_id_var(), 3381 ) 3382 3383 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3384 if not skip_with_token and not self._match(TokenType.WITH): 3385 return None 3386 3387 comments = self._prev_comments 3388 recursive = self._match(TokenType.RECURSIVE) 3389 3390 last_comments = None 3391 expressions = [] 3392 while True: 3393 cte = self._parse_cte() 3394 if isinstance(cte, exp.CTE): 3395 expressions.append(cte) 3396 if last_comments: 3397 cte.add_comments(last_comments) 3398 3399 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3400 break 3401 else: 3402 self._match(TokenType.WITH) 3403 3404 last_comments = self._prev_comments 3405 3406 return self.expression( 3407 exp.With, 3408 comments=comments, 3409 expressions=expressions, 3410 recursive=recursive, 3411 search=self._parse_recursive_with_search(), 3412 ) 3413 3414 def _parse_cte(self) -> t.Optional[exp.CTE]: 3415 index = self._index 3416 3417 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3418 if not alias or not alias.this: 3419 self.raise_error("Expected CTE to have alias") 3420 3421 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3422 self._retreat(index) 3423 return None 3424 3425 comments = self._prev_comments 3426 3427 if self._match_text_seq("NOT", "MATERIALIZED"): 3428 materialized = False 3429 elif self._match_text_seq("MATERIALIZED"): 3430 materialized = True 3431 else: 3432 materialized = None 3433 3434 cte = self.expression( 3435 exp.CTE, 3436 this=self._parse_wrapped(self._parse_statement), 3437 alias=alias, 3438 materialized=materialized, 3439 comments=comments, 3440 ) 3441 3442 values = cte.this 3443 if isinstance(values, exp.Values): 3444 if values.alias: 3445 cte.set("this", exp.select("*").from_(values)) 3446 else: 3447 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3448 3449 return cte 3450 3451 def _parse_table_alias( 3452 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3453 ) -> t.Optional[exp.TableAlias]: 3454 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3455 # so this section tries to parse the clause version and if it fails, it treats the token 3456 # as an identifier (alias) 3457 if self._can_parse_limit_or_offset(): 3458 return None 3459 3460 any_token = self._match(TokenType.ALIAS) 3461 alias = ( 3462 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3463 or self._parse_string_as_identifier() 3464 ) 3465 3466 index = self._index 3467 if self._match(TokenType.L_PAREN): 3468 columns = self._parse_csv(self._parse_function_parameter) 3469 self._match_r_paren() if columns else self._retreat(index) 3470 else: 3471 columns = None 3472 3473 if not alias and not columns: 3474 return None 3475 3476 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3477 3478 # We bubble up comments from the Identifier to the TableAlias 3479 if isinstance(alias, exp.Identifier): 3480 table_alias.add_comments(alias.pop_comments()) 3481 3482 return table_alias 3483 3484 def _parse_subquery( 3485 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3486 ) -> t.Optional[exp.Subquery]: 3487 if not this: 3488 return None 3489 3490 return self.expression( 3491 exp.Subquery, 3492 this=this, 3493 pivots=self._parse_pivots(), 3494 alias=self._parse_table_alias() if parse_alias else None, 3495 sample=self._parse_table_sample(), 3496 ) 3497 3498 def _implicit_unnests_to_explicit(self, this: E) -> E: 3499 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3500 3501 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3502 for i, join in enumerate(this.args.get("joins") or []): 3503 table = join.this 3504 normalized_table = table.copy() 3505 normalized_table.meta["maybe_column"] = True 3506 normalized_table = _norm(normalized_table, dialect=self.dialect) 3507 3508 if isinstance(table, exp.Table) and not join.args.get("on"): 3509 if normalized_table.parts[0].name in refs: 3510 table_as_column = table.to_column() 3511 unnest = exp.Unnest(expressions=[table_as_column]) 3512 3513 # Table.to_column creates a parent Alias node that we want to convert to 3514 # a TableAlias and attach to the Unnest, so it matches the parser's output 3515 if isinstance(table.args.get("alias"), exp.TableAlias): 3516 table_as_column.replace(table_as_column.this) 3517 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3518 3519 table.replace(unnest) 3520 3521 refs.add(normalized_table.alias_or_name) 3522 3523 return this 3524 3525 def _parse_query_modifiers( 3526 self, this: t.Optional[exp.Expression] 3527 ) -> t.Optional[exp.Expression]: 3528 if isinstance(this, self.MODIFIABLES): 3529 for join in self._parse_joins(): 3530 this.append("joins", join) 3531 for lateral in iter(self._parse_lateral, None): 3532 this.append("laterals", lateral) 3533 3534 while True: 3535 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3536 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3537 key, expression = parser(self) 3538 3539 if expression: 3540 this.set(key, expression) 3541 if key == "limit": 3542 offset = expression.args.pop("offset", None) 3543 3544 if offset: 3545 offset = exp.Offset(expression=offset) 3546 this.set("offset", offset) 3547 3548 limit_by_expressions = expression.expressions 3549 expression.set("expressions", None) 3550 offset.set("expressions", limit_by_expressions) 3551 continue 3552 break 3553 3554 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3555 this = self._implicit_unnests_to_explicit(this) 3556 3557 return this 3558 3559 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3560 start = self._curr 3561 while self._curr: 3562 self._advance() 3563 3564 end = self._tokens[self._index - 1] 3565 return exp.Hint(expressions=[self._find_sql(start, end)]) 3566 3567 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3568 return self._parse_function_call() 3569 3570 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3571 start_index = self._index 3572 should_fallback_to_string = False 3573 3574 hints = [] 3575 try: 3576 for hint in iter( 3577 lambda: self._parse_csv( 3578 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3579 ), 3580 [], 3581 ): 3582 hints.extend(hint) 3583 except ParseError: 3584 should_fallback_to_string = True 3585 3586 if should_fallback_to_string or self._curr: 3587 self._retreat(start_index) 3588 return self._parse_hint_fallback_to_string() 3589 3590 return self.expression(exp.Hint, expressions=hints) 3591 3592 def _parse_hint(self) -> t.Optional[exp.Hint]: 3593 if self._match(TokenType.HINT) and self._prev_comments: 3594 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3595 3596 return None 3597 3598 def _parse_into(self) -> t.Optional[exp.Into]: 3599 if not self._match(TokenType.INTO): 3600 return None 3601 3602 temp = self._match(TokenType.TEMPORARY) 3603 unlogged = self._match_text_seq("UNLOGGED") 3604 self._match(TokenType.TABLE) 3605 3606 return self.expression( 3607 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3608 ) 3609 3610 def _parse_from( 3611 self, 3612 joins: bool = False, 3613 skip_from_token: bool = False, 3614 consume_pipe: bool = False, 3615 ) -> t.Optional[exp.From]: 3616 if not skip_from_token and not self._match(TokenType.FROM): 3617 return None 3618 3619 return self.expression( 3620 exp.From, 3621 comments=self._prev_comments, 3622 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3623 ) 3624 3625 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3626 return self.expression( 3627 exp.MatchRecognizeMeasure, 3628 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3629 this=self._parse_expression(), 3630 ) 3631 3632 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3633 if not self._match(TokenType.MATCH_RECOGNIZE): 3634 return None 3635 3636 self._match_l_paren() 3637 3638 partition = self._parse_partition_by() 3639 order = self._parse_order() 3640 3641 measures = ( 3642 self._parse_csv(self._parse_match_recognize_measure) 3643 if self._match_text_seq("MEASURES") 3644 else None 3645 ) 3646 3647 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3648 rows = exp.var("ONE ROW PER MATCH") 3649 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3650 text = "ALL ROWS PER MATCH" 3651 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3652 text += " SHOW EMPTY MATCHES" 3653 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3654 text += " OMIT EMPTY MATCHES" 3655 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3656 text += " WITH UNMATCHED ROWS" 3657 rows = exp.var(text) 3658 else: 3659 rows = None 3660 3661 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3662 text = "AFTER MATCH SKIP" 3663 if self._match_text_seq("PAST", "LAST", "ROW"): 3664 text += " PAST LAST ROW" 3665 elif self._match_text_seq("TO", "NEXT", "ROW"): 3666 text += " TO NEXT ROW" 3667 elif self._match_text_seq("TO", "FIRST"): 3668 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3669 elif self._match_text_seq("TO", "LAST"): 3670 text += f" TO LAST {self._advance_any().text}" # type: ignore 3671 after = exp.var(text) 3672 else: 3673 after = None 3674 3675 if self._match_text_seq("PATTERN"): 3676 self._match_l_paren() 3677 3678 if not self._curr: 3679 self.raise_error("Expecting )", self._curr) 3680 3681 paren = 1 3682 start = self._curr 3683 3684 while self._curr and paren > 0: 3685 if self._curr.token_type == TokenType.L_PAREN: 3686 paren += 1 3687 if self._curr.token_type == TokenType.R_PAREN: 3688 paren -= 1 3689 3690 end = self._prev 3691 self._advance() 3692 3693 if paren > 0: 3694 self.raise_error("Expecting )", self._curr) 3695 3696 pattern = exp.var(self._find_sql(start, end)) 3697 else: 3698 pattern = None 3699 3700 define = ( 3701 self._parse_csv(self._parse_name_as_expression) 3702 if self._match_text_seq("DEFINE") 3703 else None 3704 ) 3705 3706 self._match_r_paren() 3707 3708 return self.expression( 3709 exp.MatchRecognize, 3710 partition_by=partition, 3711 order=order, 3712 measures=measures, 3713 rows=rows, 3714 after=after, 3715 pattern=pattern, 3716 define=define, 3717 alias=self._parse_table_alias(), 3718 ) 3719 3720 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3721 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3722 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3723 cross_apply = False 3724 3725 if cross_apply is not None: 3726 this = self._parse_select(table=True) 3727 view = None 3728 outer = None 3729 elif self._match(TokenType.LATERAL): 3730 this = self._parse_select(table=True) 3731 view = self._match(TokenType.VIEW) 3732 outer = self._match(TokenType.OUTER) 3733 else: 3734 return None 3735 3736 if not this: 3737 this = ( 3738 self._parse_unnest() 3739 or self._parse_function() 3740 or self._parse_id_var(any_token=False) 3741 ) 3742 3743 while self._match(TokenType.DOT): 3744 this = exp.Dot( 3745 this=this, 3746 expression=self._parse_function() or self._parse_id_var(any_token=False), 3747 ) 3748 3749 ordinality: t.Optional[bool] = None 3750 3751 if view: 3752 table = self._parse_id_var(any_token=False) 3753 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3754 table_alias: t.Optional[exp.TableAlias] = self.expression( 3755 exp.TableAlias, this=table, columns=columns 3756 ) 3757 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3758 # We move the alias from the lateral's child node to the lateral itself 3759 table_alias = this.args["alias"].pop() 3760 else: 3761 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3762 table_alias = self._parse_table_alias() 3763 3764 return self.expression( 3765 exp.Lateral, 3766 this=this, 3767 view=view, 3768 outer=outer, 3769 alias=table_alias, 3770 cross_apply=cross_apply, 3771 ordinality=ordinality, 3772 ) 3773 3774 def _parse_join_parts( 3775 self, 3776 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3777 return ( 3778 self._match_set(self.JOIN_METHODS) and self._prev, 3779 self._match_set(self.JOIN_SIDES) and self._prev, 3780 self._match_set(self.JOIN_KINDS) and self._prev, 3781 ) 3782 3783 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3784 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3785 this = self._parse_column() 3786 if isinstance(this, exp.Column): 3787 return this.this 3788 return this 3789 3790 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3791 3792 def _parse_join( 3793 self, skip_join_token: bool = False, parse_bracket: bool = False 3794 ) -> t.Optional[exp.Join]: 3795 if self._match(TokenType.COMMA): 3796 table = self._try_parse(self._parse_table) 3797 cross_join = self.expression(exp.Join, this=table) if table else None 3798 3799 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3800 cross_join.set("kind", "CROSS") 3801 3802 return cross_join 3803 3804 index = self._index 3805 method, side, kind = self._parse_join_parts() 3806 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3807 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3808 join_comments = self._prev_comments 3809 3810 if not skip_join_token and not join: 3811 self._retreat(index) 3812 kind = None 3813 method = None 3814 side = None 3815 3816 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3817 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3818 3819 if not skip_join_token and not join and not outer_apply and not cross_apply: 3820 return None 3821 3822 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3823 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3824 kwargs["expressions"] = self._parse_csv( 3825 lambda: self._parse_table(parse_bracket=parse_bracket) 3826 ) 3827 3828 if method: 3829 kwargs["method"] = method.text 3830 if side: 3831 kwargs["side"] = side.text 3832 if kind: 3833 kwargs["kind"] = kind.text 3834 if hint: 3835 kwargs["hint"] = hint 3836 3837 if self._match(TokenType.MATCH_CONDITION): 3838 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3839 3840 if self._match(TokenType.ON): 3841 kwargs["on"] = self._parse_assignment() 3842 elif self._match(TokenType.USING): 3843 kwargs["using"] = self._parse_using_identifiers() 3844 elif ( 3845 not method 3846 and not (outer_apply or cross_apply) 3847 and not isinstance(kwargs["this"], exp.Unnest) 3848 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3849 ): 3850 index = self._index 3851 joins: t.Optional[list] = list(self._parse_joins()) 3852 3853 if joins and self._match(TokenType.ON): 3854 kwargs["on"] = self._parse_assignment() 3855 elif joins and self._match(TokenType.USING): 3856 kwargs["using"] = self._parse_using_identifiers() 3857 else: 3858 joins = None 3859 self._retreat(index) 3860 3861 kwargs["this"].set("joins", joins if joins else None) 3862 3863 kwargs["pivots"] = self._parse_pivots() 3864 3865 comments = [c for token in (method, side, kind) if token for c in token.comments] 3866 comments = (join_comments or []) + comments 3867 return self.expression(exp.Join, comments=comments, **kwargs) 3868 3869 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3870 this = self._parse_assignment() 3871 3872 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3873 return this 3874 3875 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3876 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3877 3878 return this 3879 3880 def _parse_index_params(self) -> exp.IndexParameters: 3881 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3882 3883 if self._match(TokenType.L_PAREN, advance=False): 3884 columns = self._parse_wrapped_csv(self._parse_with_operator) 3885 else: 3886 columns = None 3887 3888 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3889 partition_by = self._parse_partition_by() 3890 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3891 tablespace = ( 3892 self._parse_var(any_token=True) 3893 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3894 else None 3895 ) 3896 where = self._parse_where() 3897 3898 on = self._parse_field() if self._match(TokenType.ON) else None 3899 3900 return self.expression( 3901 exp.IndexParameters, 3902 using=using, 3903 columns=columns, 3904 include=include, 3905 partition_by=partition_by, 3906 where=where, 3907 with_storage=with_storage, 3908 tablespace=tablespace, 3909 on=on, 3910 ) 3911 3912 def _parse_index( 3913 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3914 ) -> t.Optional[exp.Index]: 3915 if index or anonymous: 3916 unique = None 3917 primary = None 3918 amp = None 3919 3920 self._match(TokenType.ON) 3921 self._match(TokenType.TABLE) # hive 3922 table = self._parse_table_parts(schema=True) 3923 else: 3924 unique = self._match(TokenType.UNIQUE) 3925 primary = self._match_text_seq("PRIMARY") 3926 amp = self._match_text_seq("AMP") 3927 3928 if not self._match(TokenType.INDEX): 3929 return None 3930 3931 index = self._parse_id_var() 3932 table = None 3933 3934 params = self._parse_index_params() 3935 3936 return self.expression( 3937 exp.Index, 3938 this=index, 3939 table=table, 3940 unique=unique, 3941 primary=primary, 3942 amp=amp, 3943 params=params, 3944 ) 3945 3946 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3947 hints: t.List[exp.Expression] = [] 3948 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3949 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3950 hints.append( 3951 self.expression( 3952 exp.WithTableHint, 3953 expressions=self._parse_csv( 3954 lambda: self._parse_function() or self._parse_var(any_token=True) 3955 ), 3956 ) 3957 ) 3958 self._match_r_paren() 3959 else: 3960 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3961 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3962 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3963 3964 self._match_set((TokenType.INDEX, TokenType.KEY)) 3965 if self._match(TokenType.FOR): 3966 hint.set("target", self._advance_any() and self._prev.text.upper()) 3967 3968 hint.set("expressions", self._parse_wrapped_id_vars()) 3969 hints.append(hint) 3970 3971 return hints or None 3972 3973 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3974 return ( 3975 (not schema and self._parse_function(optional_parens=False)) 3976 or self._parse_id_var(any_token=False) 3977 or self._parse_string_as_identifier() 3978 or self._parse_placeholder() 3979 ) 3980 3981 def _parse_table_parts( 3982 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3983 ) -> exp.Table: 3984 catalog = None 3985 db = None 3986 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3987 3988 while self._match(TokenType.DOT): 3989 if catalog: 3990 # This allows nesting the table in arbitrarily many dot expressions if needed 3991 table = self.expression( 3992 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3993 ) 3994 else: 3995 catalog = db 3996 db = table 3997 # "" used for tsql FROM a..b case 3998 table = self._parse_table_part(schema=schema) or "" 3999 4000 if ( 4001 wildcard 4002 and self._is_connected() 4003 and (isinstance(table, exp.Identifier) or not table) 4004 and self._match(TokenType.STAR) 4005 ): 4006 if isinstance(table, exp.Identifier): 4007 table.args["this"] += "*" 4008 else: 4009 table = exp.Identifier(this="*") 4010 4011 # We bubble up comments from the Identifier to the Table 4012 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4013 4014 if is_db_reference: 4015 catalog = db 4016 db = table 4017 table = None 4018 4019 if not table and not is_db_reference: 4020 self.raise_error(f"Expected table name but got {self._curr}") 4021 if not db and is_db_reference: 4022 self.raise_error(f"Expected database name but got {self._curr}") 4023 4024 table = self.expression( 4025 exp.Table, 4026 comments=comments, 4027 this=table, 4028 db=db, 4029 catalog=catalog, 4030 ) 4031 4032 changes = self._parse_changes() 4033 if changes: 4034 table.set("changes", changes) 4035 4036 at_before = self._parse_historical_data() 4037 if at_before: 4038 table.set("when", at_before) 4039 4040 pivots = self._parse_pivots() 4041 if pivots: 4042 table.set("pivots", pivots) 4043 4044 return table 4045 4046 def _parse_table( 4047 self, 4048 schema: bool = False, 4049 joins: bool = False, 4050 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4051 parse_bracket: bool = False, 4052 is_db_reference: bool = False, 4053 parse_partition: bool = False, 4054 consume_pipe: bool = False, 4055 ) -> t.Optional[exp.Expression]: 4056 lateral = self._parse_lateral() 4057 if lateral: 4058 return lateral 4059 4060 unnest = self._parse_unnest() 4061 if unnest: 4062 return unnest 4063 4064 values = self._parse_derived_table_values() 4065 if values: 4066 return values 4067 4068 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4069 if subquery: 4070 if not subquery.args.get("pivots"): 4071 subquery.set("pivots", self._parse_pivots()) 4072 return subquery 4073 4074 bracket = parse_bracket and self._parse_bracket(None) 4075 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4076 4077 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4078 self._parse_table 4079 ) 4080 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4081 4082 only = self._match(TokenType.ONLY) 4083 4084 this = t.cast( 4085 exp.Expression, 4086 bracket 4087 or rows_from 4088 or self._parse_bracket( 4089 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4090 ), 4091 ) 4092 4093 if only: 4094 this.set("only", only) 4095 4096 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4097 self._match_text_seq("*") 4098 4099 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4100 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4101 this.set("partition", self._parse_partition()) 4102 4103 if schema: 4104 return self._parse_schema(this=this) 4105 4106 version = self._parse_version() 4107 4108 if version: 4109 this.set("version", version) 4110 4111 if self.dialect.ALIAS_POST_TABLESAMPLE: 4112 this.set("sample", self._parse_table_sample()) 4113 4114 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4115 if alias: 4116 this.set("alias", alias) 4117 4118 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4119 return self.expression( 4120 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4121 ) 4122 4123 this.set("hints", self._parse_table_hints()) 4124 4125 if not this.args.get("pivots"): 4126 this.set("pivots", self._parse_pivots()) 4127 4128 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4129 this.set("sample", self._parse_table_sample()) 4130 4131 if joins: 4132 for join in self._parse_joins(): 4133 this.append("joins", join) 4134 4135 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4136 this.set("ordinality", True) 4137 this.set("alias", self._parse_table_alias()) 4138 4139 return this 4140 4141 def _parse_version(self) -> t.Optional[exp.Version]: 4142 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4143 this = "TIMESTAMP" 4144 elif self._match(TokenType.VERSION_SNAPSHOT): 4145 this = "VERSION" 4146 else: 4147 return None 4148 4149 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4150 kind = self._prev.text.upper() 4151 start = self._parse_bitwise() 4152 self._match_texts(("TO", "AND")) 4153 end = self._parse_bitwise() 4154 expression: t.Optional[exp.Expression] = self.expression( 4155 exp.Tuple, expressions=[start, end] 4156 ) 4157 elif self._match_text_seq("CONTAINED", "IN"): 4158 kind = "CONTAINED IN" 4159 expression = self.expression( 4160 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4161 ) 4162 elif self._match(TokenType.ALL): 4163 kind = "ALL" 4164 expression = None 4165 else: 4166 self._match_text_seq("AS", "OF") 4167 kind = "AS OF" 4168 expression = self._parse_type() 4169 4170 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4171 4172 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4173 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4174 index = self._index 4175 historical_data = None 4176 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4177 this = self._prev.text.upper() 4178 kind = ( 4179 self._match(TokenType.L_PAREN) 4180 and self._match_texts(self.HISTORICAL_DATA_KIND) 4181 and self._prev.text.upper() 4182 ) 4183 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4184 4185 if expression: 4186 self._match_r_paren() 4187 historical_data = self.expression( 4188 exp.HistoricalData, this=this, kind=kind, expression=expression 4189 ) 4190 else: 4191 self._retreat(index) 4192 4193 return historical_data 4194 4195 def _parse_changes(self) -> t.Optional[exp.Changes]: 4196 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4197 return None 4198 4199 information = self._parse_var(any_token=True) 4200 self._match_r_paren() 4201 4202 return self.expression( 4203 exp.Changes, 4204 information=information, 4205 at_before=self._parse_historical_data(), 4206 end=self._parse_historical_data(), 4207 ) 4208 4209 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4210 if not self._match(TokenType.UNNEST): 4211 return None 4212 4213 expressions = self._parse_wrapped_csv(self._parse_equality) 4214 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4215 4216 alias = self._parse_table_alias() if with_alias else None 4217 4218 if alias: 4219 if self.dialect.UNNEST_COLUMN_ONLY: 4220 if alias.args.get("columns"): 4221 self.raise_error("Unexpected extra column alias in unnest.") 4222 4223 alias.set("columns", [alias.this]) 4224 alias.set("this", None) 4225 4226 columns = alias.args.get("columns") or [] 4227 if offset and len(expressions) < len(columns): 4228 offset = columns.pop() 4229 4230 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4231 self._match(TokenType.ALIAS) 4232 offset = self._parse_id_var( 4233 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4234 ) or exp.to_identifier("offset") 4235 4236 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4237 4238 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4239 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4240 if not is_derived and not ( 4241 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4242 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4243 ): 4244 return None 4245 4246 expressions = self._parse_csv(self._parse_value) 4247 alias = self._parse_table_alias() 4248 4249 if is_derived: 4250 self._match_r_paren() 4251 4252 return self.expression( 4253 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4254 ) 4255 4256 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4257 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4258 as_modifier and self._match_text_seq("USING", "SAMPLE") 4259 ): 4260 return None 4261 4262 bucket_numerator = None 4263 bucket_denominator = None 4264 bucket_field = None 4265 percent = None 4266 size = None 4267 seed = None 4268 4269 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4270 matched_l_paren = self._match(TokenType.L_PAREN) 4271 4272 if self.TABLESAMPLE_CSV: 4273 num = None 4274 expressions = self._parse_csv(self._parse_primary) 4275 else: 4276 expressions = None 4277 num = ( 4278 self._parse_factor() 4279 if self._match(TokenType.NUMBER, advance=False) 4280 else self._parse_primary() or self._parse_placeholder() 4281 ) 4282 4283 if self._match_text_seq("BUCKET"): 4284 bucket_numerator = self._parse_number() 4285 self._match_text_seq("OUT", "OF") 4286 bucket_denominator = bucket_denominator = self._parse_number() 4287 self._match(TokenType.ON) 4288 bucket_field = self._parse_field() 4289 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4290 percent = num 4291 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4292 size = num 4293 else: 4294 percent = num 4295 4296 if matched_l_paren: 4297 self._match_r_paren() 4298 4299 if self._match(TokenType.L_PAREN): 4300 method = self._parse_var(upper=True) 4301 seed = self._match(TokenType.COMMA) and self._parse_number() 4302 self._match_r_paren() 4303 elif self._match_texts(("SEED", "REPEATABLE")): 4304 seed = self._parse_wrapped(self._parse_number) 4305 4306 if not method and self.DEFAULT_SAMPLING_METHOD: 4307 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4308 4309 return self.expression( 4310 exp.TableSample, 4311 expressions=expressions, 4312 method=method, 4313 bucket_numerator=bucket_numerator, 4314 bucket_denominator=bucket_denominator, 4315 bucket_field=bucket_field, 4316 percent=percent, 4317 size=size, 4318 seed=seed, 4319 ) 4320 4321 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4322 return list(iter(self._parse_pivot, None)) or None 4323 4324 def _parse_joins(self) -> t.Iterator[exp.Join]: 4325 return iter(self._parse_join, None) 4326 4327 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4328 if not self._match(TokenType.INTO): 4329 return None 4330 4331 return self.expression( 4332 exp.UnpivotColumns, 4333 this=self._match_text_seq("NAME") and self._parse_column(), 4334 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4335 ) 4336 4337 # https://duckdb.org/docs/sql/statements/pivot 4338 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4339 def _parse_on() -> t.Optional[exp.Expression]: 4340 this = self._parse_bitwise() 4341 4342 if self._match(TokenType.IN): 4343 # PIVOT ... ON col IN (row_val1, row_val2) 4344 return self._parse_in(this) 4345 if self._match(TokenType.ALIAS, advance=False): 4346 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4347 return self._parse_alias(this) 4348 4349 return this 4350 4351 this = self._parse_table() 4352 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4353 into = self._parse_unpivot_columns() 4354 using = self._match(TokenType.USING) and self._parse_csv( 4355 lambda: self._parse_alias(self._parse_function()) 4356 ) 4357 group = self._parse_group() 4358 4359 return self.expression( 4360 exp.Pivot, 4361 this=this, 4362 expressions=expressions, 4363 using=using, 4364 group=group, 4365 unpivot=is_unpivot, 4366 into=into, 4367 ) 4368 4369 def _parse_pivot_in(self) -> exp.In: 4370 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4371 this = self._parse_select_or_expression() 4372 4373 self._match(TokenType.ALIAS) 4374 alias = self._parse_bitwise() 4375 if alias: 4376 if isinstance(alias, exp.Column) and not alias.db: 4377 alias = alias.this 4378 return self.expression(exp.PivotAlias, this=this, alias=alias) 4379 4380 return this 4381 4382 value = self._parse_column() 4383 4384 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4385 self.raise_error("Expecting IN (") 4386 4387 if self._match(TokenType.ANY): 4388 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4389 else: 4390 exprs = self._parse_csv(_parse_aliased_expression) 4391 4392 self._match_r_paren() 4393 return self.expression(exp.In, this=value, expressions=exprs) 4394 4395 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4396 func = self._parse_function() 4397 if not func: 4398 self.raise_error("Expecting an aggregation function in PIVOT") 4399 4400 return self._parse_alias(func) 4401 4402 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4403 index = self._index 4404 include_nulls = None 4405 4406 if self._match(TokenType.PIVOT): 4407 unpivot = False 4408 elif self._match(TokenType.UNPIVOT): 4409 unpivot = True 4410 4411 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4412 if self._match_text_seq("INCLUDE", "NULLS"): 4413 include_nulls = True 4414 elif self._match_text_seq("EXCLUDE", "NULLS"): 4415 include_nulls = False 4416 else: 4417 return None 4418 4419 expressions = [] 4420 4421 if not self._match(TokenType.L_PAREN): 4422 self._retreat(index) 4423 return None 4424 4425 if unpivot: 4426 expressions = self._parse_csv(self._parse_column) 4427 else: 4428 expressions = self._parse_csv(self._parse_pivot_aggregation) 4429 4430 if not expressions: 4431 self.raise_error("Failed to parse PIVOT's aggregation list") 4432 4433 if not self._match(TokenType.FOR): 4434 self.raise_error("Expecting FOR") 4435 4436 fields = [] 4437 while True: 4438 field = self._try_parse(self._parse_pivot_in) 4439 if not field: 4440 break 4441 fields.append(field) 4442 4443 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4444 self._parse_bitwise 4445 ) 4446 4447 group = self._parse_group() 4448 4449 self._match_r_paren() 4450 4451 pivot = self.expression( 4452 exp.Pivot, 4453 expressions=expressions, 4454 fields=fields, 4455 unpivot=unpivot, 4456 include_nulls=include_nulls, 4457 default_on_null=default_on_null, 4458 group=group, 4459 ) 4460 4461 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4462 pivot.set("alias", self._parse_table_alias()) 4463 4464 if not unpivot: 4465 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4466 4467 columns: t.List[exp.Expression] = [] 4468 all_fields = [] 4469 for pivot_field in pivot.fields: 4470 pivot_field_expressions = pivot_field.expressions 4471 4472 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4473 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4474 continue 4475 4476 all_fields.append( 4477 [ 4478 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4479 for fld in pivot_field_expressions 4480 ] 4481 ) 4482 4483 if all_fields: 4484 if names: 4485 all_fields.append(names) 4486 4487 # Generate all possible combinations of the pivot columns 4488 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4489 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4490 for fld_parts_tuple in itertools.product(*all_fields): 4491 fld_parts = list(fld_parts_tuple) 4492 4493 if names and self.PREFIXED_PIVOT_COLUMNS: 4494 # Move the "name" to the front of the list 4495 fld_parts.insert(0, fld_parts.pop(-1)) 4496 4497 columns.append(exp.to_identifier("_".join(fld_parts))) 4498 4499 pivot.set("columns", columns) 4500 4501 return pivot 4502 4503 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4504 return [agg.alias for agg in aggregations if agg.alias] 4505 4506 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4507 if not skip_where_token and not self._match(TokenType.PREWHERE): 4508 return None 4509 4510 return self.expression( 4511 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4512 ) 4513 4514 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4515 if not skip_where_token and not self._match(TokenType.WHERE): 4516 return None 4517 4518 return self.expression( 4519 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4520 ) 4521 4522 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4523 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4524 return None 4525 comments = self._prev_comments 4526 4527 elements: t.Dict[str, t.Any] = defaultdict(list) 4528 4529 if self._match(TokenType.ALL): 4530 elements["all"] = True 4531 elif self._match(TokenType.DISTINCT): 4532 elements["all"] = False 4533 4534 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4535 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4536 4537 while True: 4538 index = self._index 4539 4540 elements["expressions"].extend( 4541 self._parse_csv( 4542 lambda: None 4543 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4544 else self._parse_assignment() 4545 ) 4546 ) 4547 4548 before_with_index = self._index 4549 with_prefix = self._match(TokenType.WITH) 4550 4551 if self._match(TokenType.ROLLUP): 4552 elements["rollup"].append( 4553 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4554 ) 4555 elif self._match(TokenType.CUBE): 4556 elements["cube"].append( 4557 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4558 ) 4559 elif self._match(TokenType.GROUPING_SETS): 4560 elements["grouping_sets"].append( 4561 self.expression( 4562 exp.GroupingSets, 4563 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4564 ) 4565 ) 4566 elif self._match_text_seq("TOTALS"): 4567 elements["totals"] = True # type: ignore 4568 4569 if before_with_index <= self._index <= before_with_index + 1: 4570 self._retreat(before_with_index) 4571 break 4572 4573 if index == self._index: 4574 break 4575 4576 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4577 4578 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4579 return self.expression( 4580 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4581 ) 4582 4583 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4584 if self._match(TokenType.L_PAREN): 4585 grouping_set = self._parse_csv(self._parse_column) 4586 self._match_r_paren() 4587 return self.expression(exp.Tuple, expressions=grouping_set) 4588 4589 return self._parse_column() 4590 4591 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4592 if not skip_having_token and not self._match(TokenType.HAVING): 4593 return None 4594 return self.expression( 4595 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4596 ) 4597 4598 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4599 if not self._match(TokenType.QUALIFY): 4600 return None 4601 return self.expression(exp.Qualify, this=self._parse_assignment()) 4602 4603 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4604 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4605 exp.Prior, this=self._parse_bitwise() 4606 ) 4607 connect = self._parse_assignment() 4608 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4609 return connect 4610 4611 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4612 if skip_start_token: 4613 start = None 4614 elif self._match(TokenType.START_WITH): 4615 start = self._parse_assignment() 4616 else: 4617 return None 4618 4619 self._match(TokenType.CONNECT_BY) 4620 nocycle = self._match_text_seq("NOCYCLE") 4621 connect = self._parse_connect_with_prior() 4622 4623 if not start and self._match(TokenType.START_WITH): 4624 start = self._parse_assignment() 4625 4626 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4627 4628 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4629 this = self._parse_id_var(any_token=True) 4630 if self._match(TokenType.ALIAS): 4631 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4632 return this 4633 4634 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4635 if self._match_text_seq("INTERPOLATE"): 4636 return self._parse_wrapped_csv(self._parse_name_as_expression) 4637 return None 4638 4639 def _parse_order( 4640 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4641 ) -> t.Optional[exp.Expression]: 4642 siblings = None 4643 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4644 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4645 return this 4646 4647 siblings = True 4648 4649 return self.expression( 4650 exp.Order, 4651 comments=self._prev_comments, 4652 this=this, 4653 expressions=self._parse_csv(self._parse_ordered), 4654 siblings=siblings, 4655 ) 4656 4657 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4658 if not self._match(token): 4659 return None 4660 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4661 4662 def _parse_ordered( 4663 self, parse_method: t.Optional[t.Callable] = None 4664 ) -> t.Optional[exp.Ordered]: 4665 this = parse_method() if parse_method else self._parse_assignment() 4666 if not this: 4667 return None 4668 4669 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4670 this = exp.var("ALL") 4671 4672 asc = self._match(TokenType.ASC) 4673 desc = self._match(TokenType.DESC) or (asc and False) 4674 4675 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4676 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4677 4678 nulls_first = is_nulls_first or False 4679 explicitly_null_ordered = is_nulls_first or is_nulls_last 4680 4681 if ( 4682 not explicitly_null_ordered 4683 and ( 4684 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4685 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4686 ) 4687 and self.dialect.NULL_ORDERING != "nulls_are_last" 4688 ): 4689 nulls_first = True 4690 4691 if self._match_text_seq("WITH", "FILL"): 4692 with_fill = self.expression( 4693 exp.WithFill, 4694 **{ # type: ignore 4695 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4696 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4697 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4698 "interpolate": self._parse_interpolate(), 4699 }, 4700 ) 4701 else: 4702 with_fill = None 4703 4704 return self.expression( 4705 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4706 ) 4707 4708 def _parse_limit_options(self) -> exp.LimitOptions: 4709 percent = self._match(TokenType.PERCENT) 4710 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4711 self._match_text_seq("ONLY") 4712 with_ties = self._match_text_seq("WITH", "TIES") 4713 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4714 4715 def _parse_limit( 4716 self, 4717 this: t.Optional[exp.Expression] = None, 4718 top: bool = False, 4719 skip_limit_token: bool = False, 4720 ) -> t.Optional[exp.Expression]: 4721 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4722 comments = self._prev_comments 4723 if top: 4724 limit_paren = self._match(TokenType.L_PAREN) 4725 expression = self._parse_term() if limit_paren else self._parse_number() 4726 4727 if limit_paren: 4728 self._match_r_paren() 4729 4730 limit_options = self._parse_limit_options() 4731 else: 4732 limit_options = None 4733 expression = self._parse_term() 4734 4735 if self._match(TokenType.COMMA): 4736 offset = expression 4737 expression = self._parse_term() 4738 else: 4739 offset = None 4740 4741 limit_exp = self.expression( 4742 exp.Limit, 4743 this=this, 4744 expression=expression, 4745 offset=offset, 4746 comments=comments, 4747 limit_options=limit_options, 4748 expressions=self._parse_limit_by(), 4749 ) 4750 4751 return limit_exp 4752 4753 if self._match(TokenType.FETCH): 4754 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4755 direction = self._prev.text.upper() if direction else "FIRST" 4756 4757 count = self._parse_field(tokens=self.FETCH_TOKENS) 4758 4759 return self.expression( 4760 exp.Fetch, 4761 direction=direction, 4762 count=count, 4763 limit_options=self._parse_limit_options(), 4764 ) 4765 4766 return this 4767 4768 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4769 if not self._match(TokenType.OFFSET): 4770 return this 4771 4772 count = self._parse_term() 4773 self._match_set((TokenType.ROW, TokenType.ROWS)) 4774 4775 return self.expression( 4776 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4777 ) 4778 4779 def _can_parse_limit_or_offset(self) -> bool: 4780 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4781 return False 4782 4783 index = self._index 4784 result = bool( 4785 self._try_parse(self._parse_limit, retreat=True) 4786 or self._try_parse(self._parse_offset, retreat=True) 4787 ) 4788 self._retreat(index) 4789 return result 4790 4791 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4792 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4793 4794 def _parse_locks(self) -> t.List[exp.Lock]: 4795 locks = [] 4796 while True: 4797 update, key = None, None 4798 if self._match_text_seq("FOR", "UPDATE"): 4799 update = True 4800 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4801 "LOCK", "IN", "SHARE", "MODE" 4802 ): 4803 update = False 4804 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4805 update, key = False, True 4806 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4807 update, key = True, True 4808 else: 4809 break 4810 4811 expressions = None 4812 if self._match_text_seq("OF"): 4813 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4814 4815 wait: t.Optional[bool | exp.Expression] = None 4816 if self._match_text_seq("NOWAIT"): 4817 wait = True 4818 elif self._match_text_seq("WAIT"): 4819 wait = self._parse_primary() 4820 elif self._match_text_seq("SKIP", "LOCKED"): 4821 wait = False 4822 4823 locks.append( 4824 self.expression( 4825 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4826 ) 4827 ) 4828 4829 return locks 4830 4831 def parse_set_operation( 4832 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4833 ) -> t.Optional[exp.Expression]: 4834 start = self._index 4835 _, side_token, kind_token = self._parse_join_parts() 4836 4837 side = side_token.text if side_token else None 4838 kind = kind_token.text if kind_token else None 4839 4840 if not self._match_set(self.SET_OPERATIONS): 4841 self._retreat(start) 4842 return None 4843 4844 token_type = self._prev.token_type 4845 4846 if token_type == TokenType.UNION: 4847 operation: t.Type[exp.SetOperation] = exp.Union 4848 elif token_type == TokenType.EXCEPT: 4849 operation = exp.Except 4850 else: 4851 operation = exp.Intersect 4852 4853 comments = self._prev.comments 4854 4855 if self._match(TokenType.DISTINCT): 4856 distinct: t.Optional[bool] = True 4857 elif self._match(TokenType.ALL): 4858 distinct = False 4859 else: 4860 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4861 if distinct is None: 4862 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4863 4864 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4865 "STRICT", "CORRESPONDING" 4866 ) 4867 if self._match_text_seq("CORRESPONDING"): 4868 by_name = True 4869 if not side and not kind: 4870 kind = "INNER" 4871 4872 on_column_list = None 4873 if by_name and self._match_texts(("ON", "BY")): 4874 on_column_list = self._parse_wrapped_csv(self._parse_column) 4875 4876 expression = self._parse_select( 4877 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4878 ) 4879 4880 return self.expression( 4881 operation, 4882 comments=comments, 4883 this=this, 4884 distinct=distinct, 4885 by_name=by_name, 4886 expression=expression, 4887 side=side, 4888 kind=kind, 4889 on=on_column_list, 4890 ) 4891 4892 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4893 while this: 4894 setop = self.parse_set_operation(this) 4895 if not setop: 4896 break 4897 this = setop 4898 4899 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4900 expression = this.expression 4901 4902 if expression: 4903 for arg in self.SET_OP_MODIFIERS: 4904 expr = expression.args.get(arg) 4905 if expr: 4906 this.set(arg, expr.pop()) 4907 4908 return this 4909 4910 def _parse_expression(self) -> t.Optional[exp.Expression]: 4911 return self._parse_alias(self._parse_assignment()) 4912 4913 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4914 this = self._parse_disjunction() 4915 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4916 # This allows us to parse <non-identifier token> := <expr> 4917 this = exp.column( 4918 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4919 ) 4920 4921 while self._match_set(self.ASSIGNMENT): 4922 if isinstance(this, exp.Column) and len(this.parts) == 1: 4923 this = this.this 4924 4925 this = self.expression( 4926 self.ASSIGNMENT[self._prev.token_type], 4927 this=this, 4928 comments=self._prev_comments, 4929 expression=self._parse_assignment(), 4930 ) 4931 4932 return this 4933 4934 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4935 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4936 4937 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4938 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4939 4940 def _parse_equality(self) -> t.Optional[exp.Expression]: 4941 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4942 4943 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4944 return self._parse_tokens(self._parse_range, self.COMPARISON) 4945 4946 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4947 this = this or self._parse_bitwise() 4948 negate = self._match(TokenType.NOT) 4949 4950 if self._match_set(self.RANGE_PARSERS): 4951 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4952 if not expression: 4953 return this 4954 4955 this = expression 4956 elif self._match(TokenType.ISNULL): 4957 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4958 4959 # Postgres supports ISNULL and NOTNULL for conditions. 4960 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4961 if self._match(TokenType.NOTNULL): 4962 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4963 this = self.expression(exp.Not, this=this) 4964 4965 if negate: 4966 this = self._negate_range(this) 4967 4968 if self._match(TokenType.IS): 4969 this = self._parse_is(this) 4970 4971 return this 4972 4973 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4974 if not this: 4975 return this 4976 4977 return self.expression(exp.Not, this=this) 4978 4979 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4980 index = self._index - 1 4981 negate = self._match(TokenType.NOT) 4982 4983 if self._match_text_seq("DISTINCT", "FROM"): 4984 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4985 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4986 4987 if self._match(TokenType.JSON): 4988 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4989 4990 if self._match_text_seq("WITH"): 4991 _with = True 4992 elif self._match_text_seq("WITHOUT"): 4993 _with = False 4994 else: 4995 _with = None 4996 4997 unique = self._match(TokenType.UNIQUE) 4998 self._match_text_seq("KEYS") 4999 expression: t.Optional[exp.Expression] = self.expression( 5000 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5001 ) 5002 else: 5003 expression = self._parse_primary() or self._parse_null() 5004 if not expression: 5005 self._retreat(index) 5006 return None 5007 5008 this = self.expression(exp.Is, this=this, expression=expression) 5009 return self.expression(exp.Not, this=this) if negate else this 5010 5011 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5012 unnest = self._parse_unnest(with_alias=False) 5013 if unnest: 5014 this = self.expression(exp.In, this=this, unnest=unnest) 5015 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5016 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5017 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5018 5019 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5020 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5021 else: 5022 this = self.expression(exp.In, this=this, expressions=expressions) 5023 5024 if matched_l_paren: 5025 self._match_r_paren(this) 5026 elif not self._match(TokenType.R_BRACKET, expression=this): 5027 self.raise_error("Expecting ]") 5028 else: 5029 this = self.expression(exp.In, this=this, field=self._parse_column()) 5030 5031 return this 5032 5033 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5034 symmetric = None 5035 if self._match_text_seq("SYMMETRIC"): 5036 symmetric = True 5037 elif self._match_text_seq("ASYMMETRIC"): 5038 symmetric = False 5039 5040 low = self._parse_bitwise() 5041 self._match(TokenType.AND) 5042 high = self._parse_bitwise() 5043 5044 return self.expression( 5045 exp.Between, 5046 this=this, 5047 low=low, 5048 high=high, 5049 symmetric=symmetric, 5050 ) 5051 5052 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5053 if not self._match(TokenType.ESCAPE): 5054 return this 5055 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5056 5057 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5058 index = self._index 5059 5060 if not self._match(TokenType.INTERVAL) and match_interval: 5061 return None 5062 5063 if self._match(TokenType.STRING, advance=False): 5064 this = self._parse_primary() 5065 else: 5066 this = self._parse_term() 5067 5068 if not this or ( 5069 isinstance(this, exp.Column) 5070 and not this.table 5071 and not this.this.quoted 5072 and this.name.upper() == "IS" 5073 ): 5074 self._retreat(index) 5075 return None 5076 5077 unit = self._parse_function() or ( 5078 not self._match(TokenType.ALIAS, advance=False) 5079 and self._parse_var(any_token=True, upper=True) 5080 ) 5081 5082 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5083 # each INTERVAL expression into this canonical form so it's easy to transpile 5084 if this and this.is_number: 5085 this = exp.Literal.string(this.to_py()) 5086 elif this and this.is_string: 5087 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5088 if parts and unit: 5089 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5090 unit = None 5091 self._retreat(self._index - 1) 5092 5093 if len(parts) == 1: 5094 this = exp.Literal.string(parts[0][0]) 5095 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5096 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5097 unit = self.expression( 5098 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5099 ) 5100 5101 interval = self.expression(exp.Interval, this=this, unit=unit) 5102 5103 index = self._index 5104 self._match(TokenType.PLUS) 5105 5106 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5107 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5108 return self.expression( 5109 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5110 ) 5111 5112 self._retreat(index) 5113 return interval 5114 5115 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5116 this = self._parse_term() 5117 5118 while True: 5119 if self._match_set(self.BITWISE): 5120 this = self.expression( 5121 self.BITWISE[self._prev.token_type], 5122 this=this, 5123 expression=self._parse_term(), 5124 ) 5125 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5126 this = self.expression( 5127 exp.DPipe, 5128 this=this, 5129 expression=self._parse_term(), 5130 safe=not self.dialect.STRICT_STRING_CONCAT, 5131 ) 5132 elif self._match(TokenType.DQMARK): 5133 this = self.expression( 5134 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5135 ) 5136 elif self._match_pair(TokenType.LT, TokenType.LT): 5137 this = self.expression( 5138 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5139 ) 5140 elif self._match_pair(TokenType.GT, TokenType.GT): 5141 this = self.expression( 5142 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5143 ) 5144 else: 5145 break 5146 5147 return this 5148 5149 def _parse_term(self) -> t.Optional[exp.Expression]: 5150 this = self._parse_factor() 5151 5152 while self._match_set(self.TERM): 5153 klass = self.TERM[self._prev.token_type] 5154 comments = self._prev_comments 5155 expression = self._parse_factor() 5156 5157 this = self.expression(klass, this=this, comments=comments, expression=expression) 5158 5159 if isinstance(this, exp.Collate): 5160 expr = this.expression 5161 5162 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5163 # fallback to Identifier / Var 5164 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5165 ident = expr.this 5166 if isinstance(ident, exp.Identifier): 5167 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5168 5169 return this 5170 5171 def _parse_factor(self) -> t.Optional[exp.Expression]: 5172 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5173 this = parse_method() 5174 5175 while self._match_set(self.FACTOR): 5176 klass = self.FACTOR[self._prev.token_type] 5177 comments = self._prev_comments 5178 expression = parse_method() 5179 5180 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5181 self._retreat(self._index - 1) 5182 return this 5183 5184 this = self.expression(klass, this=this, comments=comments, expression=expression) 5185 5186 if isinstance(this, exp.Div): 5187 this.args["typed"] = self.dialect.TYPED_DIVISION 5188 this.args["safe"] = self.dialect.SAFE_DIVISION 5189 5190 return this 5191 5192 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5193 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5194 5195 def _parse_unary(self) -> t.Optional[exp.Expression]: 5196 if self._match_set(self.UNARY_PARSERS): 5197 return self.UNARY_PARSERS[self._prev.token_type](self) 5198 return self._parse_at_time_zone(self._parse_type()) 5199 5200 def _parse_type( 5201 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5202 ) -> t.Optional[exp.Expression]: 5203 interval = parse_interval and self._parse_interval() 5204 if interval: 5205 return interval 5206 5207 index = self._index 5208 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5209 5210 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5211 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5212 if isinstance(data_type, exp.Cast): 5213 # This constructor can contain ops directly after it, for instance struct unnesting: 5214 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5215 return self._parse_column_ops(data_type) 5216 5217 if data_type: 5218 index2 = self._index 5219 this = self._parse_primary() 5220 5221 if isinstance(this, exp.Literal): 5222 literal = this.name 5223 this = self._parse_column_ops(this) 5224 5225 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5226 if parser: 5227 return parser(self, this, data_type) 5228 5229 if ( 5230 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5231 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5232 and TIME_ZONE_RE.search(literal) 5233 ): 5234 data_type = exp.DataType.build("TIMESTAMPTZ") 5235 5236 return self.expression(exp.Cast, this=this, to=data_type) 5237 5238 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5239 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5240 # 5241 # If the index difference here is greater than 1, that means the parser itself must have 5242 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5243 # 5244 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5245 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5246 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5247 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5248 # 5249 # In these cases, we don't really want to return the converted type, but instead retreat 5250 # and try to parse a Column or Identifier in the section below. 5251 if data_type.expressions and index2 - index > 1: 5252 self._retreat(index2) 5253 return self._parse_column_ops(data_type) 5254 5255 self._retreat(index) 5256 5257 if fallback_to_identifier: 5258 return self._parse_id_var() 5259 5260 this = self._parse_column() 5261 return this and self._parse_column_ops(this) 5262 5263 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5264 this = self._parse_type() 5265 if not this: 5266 return None 5267 5268 if isinstance(this, exp.Column) and not this.table: 5269 this = exp.var(this.name.upper()) 5270 5271 return self.expression( 5272 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5273 ) 5274 5275 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5276 type_name = identifier.name 5277 5278 while self._match(TokenType.DOT): 5279 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5280 5281 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5282 5283 def _parse_types( 5284 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5285 ) -> t.Optional[exp.Expression]: 5286 index = self._index 5287 5288 this: t.Optional[exp.Expression] = None 5289 prefix = self._match_text_seq("SYSUDTLIB", ".") 5290 5291 if not self._match_set(self.TYPE_TOKENS): 5292 identifier = allow_identifiers and self._parse_id_var( 5293 any_token=False, tokens=(TokenType.VAR,) 5294 ) 5295 if isinstance(identifier, exp.Identifier): 5296 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5297 5298 if len(tokens) != 1: 5299 self.raise_error("Unexpected identifier", self._prev) 5300 5301 if tokens[0].token_type in self.TYPE_TOKENS: 5302 self._prev = tokens[0] 5303 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5304 this = self._parse_user_defined_type(identifier) 5305 else: 5306 self._retreat(self._index - 1) 5307 return None 5308 else: 5309 return None 5310 5311 type_token = self._prev.token_type 5312 5313 if type_token == TokenType.PSEUDO_TYPE: 5314 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5315 5316 if type_token == TokenType.OBJECT_IDENTIFIER: 5317 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5318 5319 # https://materialize.com/docs/sql/types/map/ 5320 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5321 key_type = self._parse_types( 5322 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5323 ) 5324 if not self._match(TokenType.FARROW): 5325 self._retreat(index) 5326 return None 5327 5328 value_type = self._parse_types( 5329 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5330 ) 5331 if not self._match(TokenType.R_BRACKET): 5332 self._retreat(index) 5333 return None 5334 5335 return exp.DataType( 5336 this=exp.DataType.Type.MAP, 5337 expressions=[key_type, value_type], 5338 nested=True, 5339 prefix=prefix, 5340 ) 5341 5342 nested = type_token in self.NESTED_TYPE_TOKENS 5343 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5344 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5345 expressions = None 5346 maybe_func = False 5347 5348 if self._match(TokenType.L_PAREN): 5349 if is_struct: 5350 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5351 elif nested: 5352 expressions = self._parse_csv( 5353 lambda: self._parse_types( 5354 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5355 ) 5356 ) 5357 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5358 this = expressions[0] 5359 this.set("nullable", True) 5360 self._match_r_paren() 5361 return this 5362 elif type_token in self.ENUM_TYPE_TOKENS: 5363 expressions = self._parse_csv(self._parse_equality) 5364 elif is_aggregate: 5365 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5366 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5367 ) 5368 if not func_or_ident: 5369 return None 5370 expressions = [func_or_ident] 5371 if self._match(TokenType.COMMA): 5372 expressions.extend( 5373 self._parse_csv( 5374 lambda: self._parse_types( 5375 check_func=check_func, 5376 schema=schema, 5377 allow_identifiers=allow_identifiers, 5378 ) 5379 ) 5380 ) 5381 else: 5382 expressions = self._parse_csv(self._parse_type_size) 5383 5384 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5385 if type_token == TokenType.VECTOR and len(expressions) == 2: 5386 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5387 5388 if not expressions or not self._match(TokenType.R_PAREN): 5389 self._retreat(index) 5390 return None 5391 5392 maybe_func = True 5393 5394 values: t.Optional[t.List[exp.Expression]] = None 5395 5396 if nested and self._match(TokenType.LT): 5397 if is_struct: 5398 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5399 else: 5400 expressions = self._parse_csv( 5401 lambda: self._parse_types( 5402 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5403 ) 5404 ) 5405 5406 if not self._match(TokenType.GT): 5407 self.raise_error("Expecting >") 5408 5409 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5410 values = self._parse_csv(self._parse_assignment) 5411 if not values and is_struct: 5412 values = None 5413 self._retreat(self._index - 1) 5414 else: 5415 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5416 5417 if type_token in self.TIMESTAMPS: 5418 if self._match_text_seq("WITH", "TIME", "ZONE"): 5419 maybe_func = False 5420 tz_type = ( 5421 exp.DataType.Type.TIMETZ 5422 if type_token in self.TIMES 5423 else exp.DataType.Type.TIMESTAMPTZ 5424 ) 5425 this = exp.DataType(this=tz_type, expressions=expressions) 5426 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5427 maybe_func = False 5428 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5429 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5430 maybe_func = False 5431 elif type_token == TokenType.INTERVAL: 5432 unit = self._parse_var(upper=True) 5433 if unit: 5434 if self._match_text_seq("TO"): 5435 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5436 5437 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5438 else: 5439 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5440 elif type_token == TokenType.VOID: 5441 this = exp.DataType(this=exp.DataType.Type.NULL) 5442 5443 if maybe_func and check_func: 5444 index2 = self._index 5445 peek = self._parse_string() 5446 5447 if not peek: 5448 self._retreat(index) 5449 return None 5450 5451 self._retreat(index2) 5452 5453 if not this: 5454 if self._match_text_seq("UNSIGNED"): 5455 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5456 if not unsigned_type_token: 5457 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5458 5459 type_token = unsigned_type_token or type_token 5460 5461 this = exp.DataType( 5462 this=exp.DataType.Type[type_token.value], 5463 expressions=expressions, 5464 nested=nested, 5465 prefix=prefix, 5466 ) 5467 5468 # Empty arrays/structs are allowed 5469 if values is not None: 5470 cls = exp.Struct if is_struct else exp.Array 5471 this = exp.cast(cls(expressions=values), this, copy=False) 5472 5473 elif expressions: 5474 this.set("expressions", expressions) 5475 5476 # https://materialize.com/docs/sql/types/list/#type-name 5477 while self._match(TokenType.LIST): 5478 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5479 5480 index = self._index 5481 5482 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5483 matched_array = self._match(TokenType.ARRAY) 5484 5485 while self._curr: 5486 datatype_token = self._prev.token_type 5487 matched_l_bracket = self._match(TokenType.L_BRACKET) 5488 5489 if (not matched_l_bracket and not matched_array) or ( 5490 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5491 ): 5492 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5493 # not to be confused with the fixed size array parsing 5494 break 5495 5496 matched_array = False 5497 values = self._parse_csv(self._parse_assignment) or None 5498 if ( 5499 values 5500 and not schema 5501 and ( 5502 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5503 ) 5504 ): 5505 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5506 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5507 self._retreat(index) 5508 break 5509 5510 this = exp.DataType( 5511 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5512 ) 5513 self._match(TokenType.R_BRACKET) 5514 5515 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5516 converter = self.TYPE_CONVERTERS.get(this.this) 5517 if converter: 5518 this = converter(t.cast(exp.DataType, this)) 5519 5520 return this 5521 5522 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5523 index = self._index 5524 5525 if ( 5526 self._curr 5527 and self._next 5528 and self._curr.token_type in self.TYPE_TOKENS 5529 and self._next.token_type in self.TYPE_TOKENS 5530 ): 5531 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5532 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5533 this = self._parse_id_var() 5534 else: 5535 this = ( 5536 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5537 or self._parse_id_var() 5538 ) 5539 5540 self._match(TokenType.COLON) 5541 5542 if ( 5543 type_required 5544 and not isinstance(this, exp.DataType) 5545 and not self._match_set(self.TYPE_TOKENS, advance=False) 5546 ): 5547 self._retreat(index) 5548 return self._parse_types() 5549 5550 return self._parse_column_def(this) 5551 5552 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5553 if not self._match_text_seq("AT", "TIME", "ZONE"): 5554 return this 5555 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5556 5557 def _parse_column(self) -> t.Optional[exp.Expression]: 5558 this = self._parse_column_reference() 5559 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5560 5561 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5562 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5563 5564 return column 5565 5566 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5567 this = self._parse_field() 5568 if ( 5569 not this 5570 and self._match(TokenType.VALUES, advance=False) 5571 and self.VALUES_FOLLOWED_BY_PAREN 5572 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5573 ): 5574 this = self._parse_id_var() 5575 5576 if isinstance(this, exp.Identifier): 5577 # We bubble up comments from the Identifier to the Column 5578 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5579 5580 return this 5581 5582 def _parse_colon_as_variant_extract( 5583 self, this: t.Optional[exp.Expression] 5584 ) -> t.Optional[exp.Expression]: 5585 casts = [] 5586 json_path = [] 5587 escape = None 5588 5589 while self._match(TokenType.COLON): 5590 start_index = self._index 5591 5592 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5593 path = self._parse_column_ops( 5594 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5595 ) 5596 5597 # The cast :: operator has a lower precedence than the extraction operator :, so 5598 # we rearrange the AST appropriately to avoid casting the JSON path 5599 while isinstance(path, exp.Cast): 5600 casts.append(path.to) 5601 path = path.this 5602 5603 if casts: 5604 dcolon_offset = next( 5605 i 5606 for i, t in enumerate(self._tokens[start_index:]) 5607 if t.token_type == TokenType.DCOLON 5608 ) 5609 end_token = self._tokens[start_index + dcolon_offset - 1] 5610 else: 5611 end_token = self._prev 5612 5613 if path: 5614 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5615 # it'll roundtrip to a string literal in GET_PATH 5616 if isinstance(path, exp.Identifier) and path.quoted: 5617 escape = True 5618 5619 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5620 5621 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5622 # Databricks transforms it back to the colon/dot notation 5623 if json_path: 5624 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5625 5626 if json_path_expr: 5627 json_path_expr.set("escape", escape) 5628 5629 this = self.expression( 5630 exp.JSONExtract, 5631 this=this, 5632 expression=json_path_expr, 5633 variant_extract=True, 5634 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5635 ) 5636 5637 while casts: 5638 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5639 5640 return this 5641 5642 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5643 return self._parse_types() 5644 5645 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5646 this = self._parse_bracket(this) 5647 5648 while self._match_set(self.COLUMN_OPERATORS): 5649 op_token = self._prev.token_type 5650 op = self.COLUMN_OPERATORS.get(op_token) 5651 5652 if op_token in self.CAST_COLUMN_OPERATORS: 5653 field = self._parse_dcolon() 5654 if not field: 5655 self.raise_error("Expected type") 5656 elif op and self._curr: 5657 field = self._parse_column_reference() or self._parse_bracket() 5658 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5659 field = self._parse_column_ops(field) 5660 else: 5661 field = self._parse_field(any_token=True, anonymous_func=True) 5662 5663 # Function calls can be qualified, e.g., x.y.FOO() 5664 # This converts the final AST to a series of Dots leading to the function call 5665 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5666 if isinstance(field, (exp.Func, exp.Window)) and this: 5667 this = this.transform( 5668 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5669 ) 5670 5671 if op: 5672 this = op(self, this, field) 5673 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5674 this = self.expression( 5675 exp.Column, 5676 comments=this.comments, 5677 this=field, 5678 table=this.this, 5679 db=this.args.get("table"), 5680 catalog=this.args.get("db"), 5681 ) 5682 elif isinstance(field, exp.Window): 5683 # Move the exp.Dot's to the window's function 5684 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5685 field.set("this", window_func) 5686 this = field 5687 else: 5688 this = self.expression(exp.Dot, this=this, expression=field) 5689 5690 if field and field.comments: 5691 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5692 5693 this = self._parse_bracket(this) 5694 5695 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5696 5697 def _parse_paren(self) -> t.Optional[exp.Expression]: 5698 if not self._match(TokenType.L_PAREN): 5699 return None 5700 5701 comments = self._prev_comments 5702 query = self._parse_select() 5703 5704 if query: 5705 expressions = [query] 5706 else: 5707 expressions = self._parse_expressions() 5708 5709 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5710 5711 if not this and self._match(TokenType.R_PAREN, advance=False): 5712 this = self.expression(exp.Tuple) 5713 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5714 this = self._parse_subquery(this=this, parse_alias=False) 5715 elif isinstance(this, exp.Subquery): 5716 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5717 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5718 this = self.expression(exp.Tuple, expressions=expressions) 5719 else: 5720 this = self.expression(exp.Paren, this=this) 5721 5722 if this: 5723 this.add_comments(comments) 5724 5725 self._match_r_paren(expression=this) 5726 return this 5727 5728 def _parse_primary(self) -> t.Optional[exp.Expression]: 5729 if self._match_set(self.PRIMARY_PARSERS): 5730 token_type = self._prev.token_type 5731 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5732 5733 if token_type == TokenType.STRING: 5734 expressions = [primary] 5735 while self._match(TokenType.STRING): 5736 expressions.append(exp.Literal.string(self._prev.text)) 5737 5738 if len(expressions) > 1: 5739 return self.expression(exp.Concat, expressions=expressions) 5740 5741 return primary 5742 5743 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5744 return exp.Literal.number(f"0.{self._prev.text}") 5745 5746 return self._parse_paren() 5747 5748 def _parse_field( 5749 self, 5750 any_token: bool = False, 5751 tokens: t.Optional[t.Collection[TokenType]] = None, 5752 anonymous_func: bool = False, 5753 ) -> t.Optional[exp.Expression]: 5754 if anonymous_func: 5755 field = ( 5756 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5757 or self._parse_primary() 5758 ) 5759 else: 5760 field = self._parse_primary() or self._parse_function( 5761 anonymous=anonymous_func, any_token=any_token 5762 ) 5763 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5764 5765 def _parse_function( 5766 self, 5767 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5768 anonymous: bool = False, 5769 optional_parens: bool = True, 5770 any_token: bool = False, 5771 ) -> t.Optional[exp.Expression]: 5772 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5773 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5774 fn_syntax = False 5775 if ( 5776 self._match(TokenType.L_BRACE, advance=False) 5777 and self._next 5778 and self._next.text.upper() == "FN" 5779 ): 5780 self._advance(2) 5781 fn_syntax = True 5782 5783 func = self._parse_function_call( 5784 functions=functions, 5785 anonymous=anonymous, 5786 optional_parens=optional_parens, 5787 any_token=any_token, 5788 ) 5789 5790 if fn_syntax: 5791 self._match(TokenType.R_BRACE) 5792 5793 return func 5794 5795 def _parse_function_call( 5796 self, 5797 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5798 anonymous: bool = False, 5799 optional_parens: bool = True, 5800 any_token: bool = False, 5801 ) -> t.Optional[exp.Expression]: 5802 if not self._curr: 5803 return None 5804 5805 comments = self._curr.comments 5806 prev = self._prev 5807 token = self._curr 5808 token_type = self._curr.token_type 5809 this = self._curr.text 5810 upper = this.upper() 5811 5812 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5813 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5814 self._advance() 5815 return self._parse_window(parser(self)) 5816 5817 if not self._next or self._next.token_type != TokenType.L_PAREN: 5818 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5819 self._advance() 5820 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5821 5822 return None 5823 5824 if any_token: 5825 if token_type in self.RESERVED_TOKENS: 5826 return None 5827 elif token_type not in self.FUNC_TOKENS: 5828 return None 5829 5830 self._advance(2) 5831 5832 parser = self.FUNCTION_PARSERS.get(upper) 5833 if parser and not anonymous: 5834 this = parser(self) 5835 else: 5836 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5837 5838 if subquery_predicate: 5839 expr = None 5840 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5841 expr = self._parse_select() 5842 self._match_r_paren() 5843 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5844 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5845 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5846 self._advance(-1) 5847 expr = self._parse_bitwise() 5848 5849 if expr: 5850 return self.expression(subquery_predicate, comments=comments, this=expr) 5851 5852 if functions is None: 5853 functions = self.FUNCTIONS 5854 5855 function = functions.get(upper) 5856 known_function = function and not anonymous 5857 5858 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5859 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5860 5861 post_func_comments = self._curr and self._curr.comments 5862 if known_function and post_func_comments: 5863 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5864 # call we'll construct it as exp.Anonymous, even if it's "known" 5865 if any( 5866 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5867 for comment in post_func_comments 5868 ): 5869 known_function = False 5870 5871 if alias and known_function: 5872 args = self._kv_to_prop_eq(args) 5873 5874 if known_function: 5875 func_builder = t.cast(t.Callable, function) 5876 5877 if "dialect" in func_builder.__code__.co_varnames: 5878 func = func_builder(args, dialect=self.dialect) 5879 else: 5880 func = func_builder(args) 5881 5882 func = self.validate_expression(func, args) 5883 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5884 func.meta["name"] = this 5885 5886 this = func 5887 else: 5888 if token_type == TokenType.IDENTIFIER: 5889 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5890 5891 this = self.expression(exp.Anonymous, this=this, expressions=args) 5892 this = this.update_positions(token) 5893 5894 if isinstance(this, exp.Expression): 5895 this.add_comments(comments) 5896 5897 self._match_r_paren(this) 5898 return self._parse_window(this) 5899 5900 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5901 return expression 5902 5903 def _kv_to_prop_eq( 5904 self, expressions: t.List[exp.Expression], parse_map: bool = False 5905 ) -> t.List[exp.Expression]: 5906 transformed = [] 5907 5908 for index, e in enumerate(expressions): 5909 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5910 if isinstance(e, exp.Alias): 5911 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5912 5913 if not isinstance(e, exp.PropertyEQ): 5914 e = self.expression( 5915 exp.PropertyEQ, 5916 this=e.this if parse_map else exp.to_identifier(e.this.name), 5917 expression=e.expression, 5918 ) 5919 5920 if isinstance(e.this, exp.Column): 5921 e.this.replace(e.this.this) 5922 else: 5923 e = self._to_prop_eq(e, index) 5924 5925 transformed.append(e) 5926 5927 return transformed 5928 5929 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5930 return self._parse_statement() 5931 5932 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5933 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5934 5935 def _parse_user_defined_function( 5936 self, kind: t.Optional[TokenType] = None 5937 ) -> t.Optional[exp.Expression]: 5938 this = self._parse_table_parts(schema=True) 5939 5940 if not self._match(TokenType.L_PAREN): 5941 return this 5942 5943 expressions = self._parse_csv(self._parse_function_parameter) 5944 self._match_r_paren() 5945 return self.expression( 5946 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5947 ) 5948 5949 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5950 literal = self._parse_primary() 5951 if literal: 5952 return self.expression(exp.Introducer, this=token.text, expression=literal) 5953 5954 return self._identifier_expression(token) 5955 5956 def _parse_session_parameter(self) -> exp.SessionParameter: 5957 kind = None 5958 this = self._parse_id_var() or self._parse_primary() 5959 5960 if this and self._match(TokenType.DOT): 5961 kind = this.name 5962 this = self._parse_var() or self._parse_primary() 5963 5964 return self.expression(exp.SessionParameter, this=this, kind=kind) 5965 5966 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5967 return self._parse_id_var() 5968 5969 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5970 index = self._index 5971 5972 if self._match(TokenType.L_PAREN): 5973 expressions = t.cast( 5974 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5975 ) 5976 5977 if not self._match(TokenType.R_PAREN): 5978 self._retreat(index) 5979 else: 5980 expressions = [self._parse_lambda_arg()] 5981 5982 if self._match_set(self.LAMBDAS): 5983 return self.LAMBDAS[self._prev.token_type](self, expressions) 5984 5985 self._retreat(index) 5986 5987 this: t.Optional[exp.Expression] 5988 5989 if self._match(TokenType.DISTINCT): 5990 this = self.expression( 5991 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5992 ) 5993 else: 5994 this = self._parse_select_or_expression(alias=alias) 5995 5996 return self._parse_limit( 5997 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5998 ) 5999 6000 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6001 index = self._index 6002 if not self._match(TokenType.L_PAREN): 6003 return this 6004 6005 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6006 # expr can be of both types 6007 if self._match_set(self.SELECT_START_TOKENS): 6008 self._retreat(index) 6009 return this 6010 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6011 self._match_r_paren() 6012 return self.expression(exp.Schema, this=this, expressions=args) 6013 6014 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6015 return self._parse_column_def(self._parse_field(any_token=True)) 6016 6017 def _parse_column_def( 6018 self, this: t.Optional[exp.Expression], computed_column: bool = True 6019 ) -> t.Optional[exp.Expression]: 6020 # column defs are not really columns, they're identifiers 6021 if isinstance(this, exp.Column): 6022 this = this.this 6023 6024 if not computed_column: 6025 self._match(TokenType.ALIAS) 6026 6027 kind = self._parse_types(schema=True) 6028 6029 if self._match_text_seq("FOR", "ORDINALITY"): 6030 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6031 6032 constraints: t.List[exp.Expression] = [] 6033 6034 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6035 ("ALIAS", "MATERIALIZED") 6036 ): 6037 persisted = self._prev.text.upper() == "MATERIALIZED" 6038 constraint_kind = exp.ComputedColumnConstraint( 6039 this=self._parse_assignment(), 6040 persisted=persisted or self._match_text_seq("PERSISTED"), 6041 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6042 ) 6043 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6044 elif ( 6045 kind 6046 and self._match(TokenType.ALIAS, advance=False) 6047 and ( 6048 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6049 or (self._next and self._next.token_type == TokenType.L_PAREN) 6050 ) 6051 ): 6052 self._advance() 6053 constraints.append( 6054 self.expression( 6055 exp.ColumnConstraint, 6056 kind=exp.ComputedColumnConstraint( 6057 this=self._parse_disjunction(), 6058 persisted=self._match_texts(("STORED", "VIRTUAL")) 6059 and self._prev.text.upper() == "STORED", 6060 ), 6061 ) 6062 ) 6063 6064 while True: 6065 constraint = self._parse_column_constraint() 6066 if not constraint: 6067 break 6068 constraints.append(constraint) 6069 6070 if not kind and not constraints: 6071 return this 6072 6073 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6074 6075 def _parse_auto_increment( 6076 self, 6077 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6078 start = None 6079 increment = None 6080 order = None 6081 6082 if self._match(TokenType.L_PAREN, advance=False): 6083 args = self._parse_wrapped_csv(self._parse_bitwise) 6084 start = seq_get(args, 0) 6085 increment = seq_get(args, 1) 6086 elif self._match_text_seq("START"): 6087 start = self._parse_bitwise() 6088 self._match_text_seq("INCREMENT") 6089 increment = self._parse_bitwise() 6090 if self._match_text_seq("ORDER"): 6091 order = True 6092 elif self._match_text_seq("NOORDER"): 6093 order = False 6094 6095 if start and increment: 6096 return exp.GeneratedAsIdentityColumnConstraint( 6097 start=start, increment=increment, this=False, order=order 6098 ) 6099 6100 return exp.AutoIncrementColumnConstraint() 6101 6102 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6103 if not self._match_text_seq("REFRESH"): 6104 self._retreat(self._index - 1) 6105 return None 6106 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6107 6108 def _parse_compress(self) -> exp.CompressColumnConstraint: 6109 if self._match(TokenType.L_PAREN, advance=False): 6110 return self.expression( 6111 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6112 ) 6113 6114 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6115 6116 def _parse_generated_as_identity( 6117 self, 6118 ) -> ( 6119 exp.GeneratedAsIdentityColumnConstraint 6120 | exp.ComputedColumnConstraint 6121 | exp.GeneratedAsRowColumnConstraint 6122 ): 6123 if self._match_text_seq("BY", "DEFAULT"): 6124 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6125 this = self.expression( 6126 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6127 ) 6128 else: 6129 self._match_text_seq("ALWAYS") 6130 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6131 6132 self._match(TokenType.ALIAS) 6133 6134 if self._match_text_seq("ROW"): 6135 start = self._match_text_seq("START") 6136 if not start: 6137 self._match(TokenType.END) 6138 hidden = self._match_text_seq("HIDDEN") 6139 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6140 6141 identity = self._match_text_seq("IDENTITY") 6142 6143 if self._match(TokenType.L_PAREN): 6144 if self._match(TokenType.START_WITH): 6145 this.set("start", self._parse_bitwise()) 6146 if self._match_text_seq("INCREMENT", "BY"): 6147 this.set("increment", self._parse_bitwise()) 6148 if self._match_text_seq("MINVALUE"): 6149 this.set("minvalue", self._parse_bitwise()) 6150 if self._match_text_seq("MAXVALUE"): 6151 this.set("maxvalue", self._parse_bitwise()) 6152 6153 if self._match_text_seq("CYCLE"): 6154 this.set("cycle", True) 6155 elif self._match_text_seq("NO", "CYCLE"): 6156 this.set("cycle", False) 6157 6158 if not identity: 6159 this.set("expression", self._parse_range()) 6160 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6161 args = self._parse_csv(self._parse_bitwise) 6162 this.set("start", seq_get(args, 0)) 6163 this.set("increment", seq_get(args, 1)) 6164 6165 self._match_r_paren() 6166 6167 return this 6168 6169 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6170 self._match_text_seq("LENGTH") 6171 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6172 6173 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6174 if self._match_text_seq("NULL"): 6175 return self.expression(exp.NotNullColumnConstraint) 6176 if self._match_text_seq("CASESPECIFIC"): 6177 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6178 if self._match_text_seq("FOR", "REPLICATION"): 6179 return self.expression(exp.NotForReplicationColumnConstraint) 6180 6181 # Unconsume the `NOT` token 6182 self._retreat(self._index - 1) 6183 return None 6184 6185 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6186 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6187 6188 procedure_option_follows = ( 6189 self._match(TokenType.WITH, advance=False) 6190 and self._next 6191 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6192 ) 6193 6194 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6195 return self.expression( 6196 exp.ColumnConstraint, 6197 this=this, 6198 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6199 ) 6200 6201 return this 6202 6203 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6204 if not self._match(TokenType.CONSTRAINT): 6205 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6206 6207 return self.expression( 6208 exp.Constraint, 6209 this=self._parse_id_var(), 6210 expressions=self._parse_unnamed_constraints(), 6211 ) 6212 6213 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6214 constraints = [] 6215 while True: 6216 constraint = self._parse_unnamed_constraint() or self._parse_function() 6217 if not constraint: 6218 break 6219 constraints.append(constraint) 6220 6221 return constraints 6222 6223 def _parse_unnamed_constraint( 6224 self, constraints: t.Optional[t.Collection[str]] = None 6225 ) -> t.Optional[exp.Expression]: 6226 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6227 constraints or self.CONSTRAINT_PARSERS 6228 ): 6229 return None 6230 6231 constraint = self._prev.text.upper() 6232 if constraint not in self.CONSTRAINT_PARSERS: 6233 self.raise_error(f"No parser found for schema constraint {constraint}.") 6234 6235 return self.CONSTRAINT_PARSERS[constraint](self) 6236 6237 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6238 return self._parse_id_var(any_token=False) 6239 6240 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6241 self._match_texts(("KEY", "INDEX")) 6242 return self.expression( 6243 exp.UniqueColumnConstraint, 6244 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6245 this=self._parse_schema(self._parse_unique_key()), 6246 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6247 on_conflict=self._parse_on_conflict(), 6248 options=self._parse_key_constraint_options(), 6249 ) 6250 6251 def _parse_key_constraint_options(self) -> t.List[str]: 6252 options = [] 6253 while True: 6254 if not self._curr: 6255 break 6256 6257 if self._match(TokenType.ON): 6258 action = None 6259 on = self._advance_any() and self._prev.text 6260 6261 if self._match_text_seq("NO", "ACTION"): 6262 action = "NO ACTION" 6263 elif self._match_text_seq("CASCADE"): 6264 action = "CASCADE" 6265 elif self._match_text_seq("RESTRICT"): 6266 action = "RESTRICT" 6267 elif self._match_pair(TokenType.SET, TokenType.NULL): 6268 action = "SET NULL" 6269 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6270 action = "SET DEFAULT" 6271 else: 6272 self.raise_error("Invalid key constraint") 6273 6274 options.append(f"ON {on} {action}") 6275 else: 6276 var = self._parse_var_from_options( 6277 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6278 ) 6279 if not var: 6280 break 6281 options.append(var.name) 6282 6283 return options 6284 6285 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6286 if match and not self._match(TokenType.REFERENCES): 6287 return None 6288 6289 expressions = None 6290 this = self._parse_table(schema=True) 6291 options = self._parse_key_constraint_options() 6292 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6293 6294 def _parse_foreign_key(self) -> exp.ForeignKey: 6295 expressions = ( 6296 self._parse_wrapped_id_vars() 6297 if not self._match(TokenType.REFERENCES, advance=False) 6298 else None 6299 ) 6300 reference = self._parse_references() 6301 on_options = {} 6302 6303 while self._match(TokenType.ON): 6304 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6305 self.raise_error("Expected DELETE or UPDATE") 6306 6307 kind = self._prev.text.lower() 6308 6309 if self._match_text_seq("NO", "ACTION"): 6310 action = "NO ACTION" 6311 elif self._match(TokenType.SET): 6312 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6313 action = "SET " + self._prev.text.upper() 6314 else: 6315 self._advance() 6316 action = self._prev.text.upper() 6317 6318 on_options[kind] = action 6319 6320 return self.expression( 6321 exp.ForeignKey, 6322 expressions=expressions, 6323 reference=reference, 6324 options=self._parse_key_constraint_options(), 6325 **on_options, # type: ignore 6326 ) 6327 6328 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6329 return self._parse_ordered() or self._parse_field() 6330 6331 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6332 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6333 self._retreat(self._index - 1) 6334 return None 6335 6336 id_vars = self._parse_wrapped_id_vars() 6337 return self.expression( 6338 exp.PeriodForSystemTimeConstraint, 6339 this=seq_get(id_vars, 0), 6340 expression=seq_get(id_vars, 1), 6341 ) 6342 6343 def _parse_primary_key( 6344 self, wrapped_optional: bool = False, in_props: bool = False 6345 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6346 desc = ( 6347 self._match_set((TokenType.ASC, TokenType.DESC)) 6348 and self._prev.token_type == TokenType.DESC 6349 ) 6350 6351 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6352 return self.expression( 6353 exp.PrimaryKeyColumnConstraint, 6354 desc=desc, 6355 options=self._parse_key_constraint_options(), 6356 ) 6357 6358 expressions = self._parse_wrapped_csv( 6359 self._parse_primary_key_part, optional=wrapped_optional 6360 ) 6361 6362 return self.expression( 6363 exp.PrimaryKey, 6364 expressions=expressions, 6365 include=self._parse_index_params(), 6366 options=self._parse_key_constraint_options(), 6367 ) 6368 6369 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6370 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6371 6372 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6373 """ 6374 Parses a datetime column in ODBC format. We parse the column into the corresponding 6375 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6376 same as we did for `DATE('yyyy-mm-dd')`. 6377 6378 Reference: 6379 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6380 """ 6381 self._match(TokenType.VAR) 6382 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6383 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6384 if not self._match(TokenType.R_BRACE): 6385 self.raise_error("Expected }") 6386 return expression 6387 6388 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6389 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6390 return this 6391 6392 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6393 map_token = seq_get(self._tokens, self._index - 2) 6394 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6395 else: 6396 parse_map = False 6397 6398 bracket_kind = self._prev.token_type 6399 if ( 6400 bracket_kind == TokenType.L_BRACE 6401 and self._curr 6402 and self._curr.token_type == TokenType.VAR 6403 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6404 ): 6405 return self._parse_odbc_datetime_literal() 6406 6407 expressions = self._parse_csv( 6408 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6409 ) 6410 6411 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6412 self.raise_error("Expected ]") 6413 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6414 self.raise_error("Expected }") 6415 6416 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6417 if bracket_kind == TokenType.L_BRACE: 6418 this = self.expression( 6419 exp.Struct, 6420 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6421 ) 6422 elif not this: 6423 this = build_array_constructor( 6424 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6425 ) 6426 else: 6427 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6428 if constructor_type: 6429 return build_array_constructor( 6430 constructor_type, 6431 args=expressions, 6432 bracket_kind=bracket_kind, 6433 dialect=self.dialect, 6434 ) 6435 6436 expressions = apply_index_offset( 6437 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6438 ) 6439 this = self.expression( 6440 exp.Bracket, 6441 this=this, 6442 expressions=expressions, 6443 comments=this.pop_comments(), 6444 ) 6445 6446 self._add_comments(this) 6447 return self._parse_bracket(this) 6448 6449 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6450 if self._match(TokenType.COLON): 6451 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6452 return this 6453 6454 def _parse_case(self) -> t.Optional[exp.Expression]: 6455 ifs = [] 6456 default = None 6457 6458 comments = self._prev_comments 6459 expression = self._parse_assignment() 6460 6461 while self._match(TokenType.WHEN): 6462 this = self._parse_assignment() 6463 self._match(TokenType.THEN) 6464 then = self._parse_assignment() 6465 ifs.append(self.expression(exp.If, this=this, true=then)) 6466 6467 if self._match(TokenType.ELSE): 6468 default = self._parse_assignment() 6469 6470 if not self._match(TokenType.END): 6471 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6472 default = exp.column("interval") 6473 else: 6474 self.raise_error("Expected END after CASE", self._prev) 6475 6476 return self.expression( 6477 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6478 ) 6479 6480 def _parse_if(self) -> t.Optional[exp.Expression]: 6481 if self._match(TokenType.L_PAREN): 6482 args = self._parse_csv( 6483 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6484 ) 6485 this = self.validate_expression(exp.If.from_arg_list(args), args) 6486 self._match_r_paren() 6487 else: 6488 index = self._index - 1 6489 6490 if self.NO_PAREN_IF_COMMANDS and index == 0: 6491 return self._parse_as_command(self._prev) 6492 6493 condition = self._parse_assignment() 6494 6495 if not condition: 6496 self._retreat(index) 6497 return None 6498 6499 self._match(TokenType.THEN) 6500 true = self._parse_assignment() 6501 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6502 self._match(TokenType.END) 6503 this = self.expression(exp.If, this=condition, true=true, false=false) 6504 6505 return this 6506 6507 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6508 if not self._match_text_seq("VALUE", "FOR"): 6509 self._retreat(self._index - 1) 6510 return None 6511 6512 return self.expression( 6513 exp.NextValueFor, 6514 this=self._parse_column(), 6515 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6516 ) 6517 6518 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6519 this = self._parse_function() or self._parse_var_or_string(upper=True) 6520 6521 if self._match(TokenType.FROM): 6522 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6523 6524 if not self._match(TokenType.COMMA): 6525 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6526 6527 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6528 6529 def _parse_gap_fill(self) -> exp.GapFill: 6530 self._match(TokenType.TABLE) 6531 this = self._parse_table() 6532 6533 self._match(TokenType.COMMA) 6534 args = [this, *self._parse_csv(self._parse_lambda)] 6535 6536 gap_fill = exp.GapFill.from_arg_list(args) 6537 return self.validate_expression(gap_fill, args) 6538 6539 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6540 this = self._parse_assignment() 6541 6542 if not self._match(TokenType.ALIAS): 6543 if self._match(TokenType.COMMA): 6544 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6545 6546 self.raise_error("Expected AS after CAST") 6547 6548 fmt = None 6549 to = self._parse_types() 6550 6551 default = self._match(TokenType.DEFAULT) 6552 if default: 6553 default = self._parse_bitwise() 6554 self._match_text_seq("ON", "CONVERSION", "ERROR") 6555 6556 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6557 fmt_string = self._parse_string() 6558 fmt = self._parse_at_time_zone(fmt_string) 6559 6560 if not to: 6561 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6562 if to.this in exp.DataType.TEMPORAL_TYPES: 6563 this = self.expression( 6564 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6565 this=this, 6566 format=exp.Literal.string( 6567 format_time( 6568 fmt_string.this if fmt_string else "", 6569 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6570 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6571 ) 6572 ), 6573 safe=safe, 6574 ) 6575 6576 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6577 this.set("zone", fmt.args["zone"]) 6578 return this 6579 elif not to: 6580 self.raise_error("Expected TYPE after CAST") 6581 elif isinstance(to, exp.Identifier): 6582 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6583 elif to.this == exp.DataType.Type.CHAR: 6584 if self._match(TokenType.CHARACTER_SET): 6585 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6586 6587 return self.build_cast( 6588 strict=strict, 6589 this=this, 6590 to=to, 6591 format=fmt, 6592 safe=safe, 6593 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6594 default=default, 6595 ) 6596 6597 def _parse_string_agg(self) -> exp.GroupConcat: 6598 if self._match(TokenType.DISTINCT): 6599 args: t.List[t.Optional[exp.Expression]] = [ 6600 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6601 ] 6602 if self._match(TokenType.COMMA): 6603 args.extend(self._parse_csv(self._parse_assignment)) 6604 else: 6605 args = self._parse_csv(self._parse_assignment) # type: ignore 6606 6607 if self._match_text_seq("ON", "OVERFLOW"): 6608 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6609 if self._match_text_seq("ERROR"): 6610 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6611 else: 6612 self._match_text_seq("TRUNCATE") 6613 on_overflow = self.expression( 6614 exp.OverflowTruncateBehavior, 6615 this=self._parse_string(), 6616 with_count=( 6617 self._match_text_seq("WITH", "COUNT") 6618 or not self._match_text_seq("WITHOUT", "COUNT") 6619 ), 6620 ) 6621 else: 6622 on_overflow = None 6623 6624 index = self._index 6625 if not self._match(TokenType.R_PAREN) and args: 6626 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6627 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6628 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6629 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6630 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6631 6632 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6633 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6634 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6635 if not self._match_text_seq("WITHIN", "GROUP"): 6636 self._retreat(index) 6637 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6638 6639 # The corresponding match_r_paren will be called in parse_function (caller) 6640 self._match_l_paren() 6641 6642 return self.expression( 6643 exp.GroupConcat, 6644 this=self._parse_order(this=seq_get(args, 0)), 6645 separator=seq_get(args, 1), 6646 on_overflow=on_overflow, 6647 ) 6648 6649 def _parse_convert( 6650 self, strict: bool, safe: t.Optional[bool] = None 6651 ) -> t.Optional[exp.Expression]: 6652 this = self._parse_bitwise() 6653 6654 if self._match(TokenType.USING): 6655 to: t.Optional[exp.Expression] = self.expression( 6656 exp.CharacterSet, this=self._parse_var() 6657 ) 6658 elif self._match(TokenType.COMMA): 6659 to = self._parse_types() 6660 else: 6661 to = None 6662 6663 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6664 6665 def _parse_xml_table(self) -> exp.XMLTable: 6666 namespaces = None 6667 passing = None 6668 columns = None 6669 6670 if self._match_text_seq("XMLNAMESPACES", "("): 6671 namespaces = self._parse_xml_namespace() 6672 self._match_text_seq(")", ",") 6673 6674 this = self._parse_string() 6675 6676 if self._match_text_seq("PASSING"): 6677 # The BY VALUE keywords are optional and are provided for semantic clarity 6678 self._match_text_seq("BY", "VALUE") 6679 passing = self._parse_csv(self._parse_column) 6680 6681 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6682 6683 if self._match_text_seq("COLUMNS"): 6684 columns = self._parse_csv(self._parse_field_def) 6685 6686 return self.expression( 6687 exp.XMLTable, 6688 this=this, 6689 namespaces=namespaces, 6690 passing=passing, 6691 columns=columns, 6692 by_ref=by_ref, 6693 ) 6694 6695 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6696 namespaces = [] 6697 6698 while True: 6699 if self._match(TokenType.DEFAULT): 6700 uri = self._parse_string() 6701 else: 6702 uri = self._parse_alias(self._parse_string()) 6703 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6704 if not self._match(TokenType.COMMA): 6705 break 6706 6707 return namespaces 6708 6709 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6710 args = self._parse_csv(self._parse_assignment) 6711 6712 if len(args) < 3: 6713 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6714 6715 return self.expression(exp.DecodeCase, expressions=args) 6716 6717 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6718 self._match_text_seq("KEY") 6719 key = self._parse_column() 6720 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6721 self._match_text_seq("VALUE") 6722 value = self._parse_bitwise() 6723 6724 if not key and not value: 6725 return None 6726 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6727 6728 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6729 if not this or not self._match_text_seq("FORMAT", "JSON"): 6730 return this 6731 6732 return self.expression(exp.FormatJson, this=this) 6733 6734 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6735 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6736 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6737 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6738 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6739 else: 6740 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6741 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6742 6743 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6744 6745 if not empty and not error and not null: 6746 return None 6747 6748 return self.expression( 6749 exp.OnCondition, 6750 empty=empty, 6751 error=error, 6752 null=null, 6753 ) 6754 6755 def _parse_on_handling( 6756 self, on: str, *values: str 6757 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6758 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6759 for value in values: 6760 if self._match_text_seq(value, "ON", on): 6761 return f"{value} ON {on}" 6762 6763 index = self._index 6764 if self._match(TokenType.DEFAULT): 6765 default_value = self._parse_bitwise() 6766 if self._match_text_seq("ON", on): 6767 return default_value 6768 6769 self._retreat(index) 6770 6771 return None 6772 6773 @t.overload 6774 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6775 6776 @t.overload 6777 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6778 6779 def _parse_json_object(self, agg=False): 6780 star = self._parse_star() 6781 expressions = ( 6782 [star] 6783 if star 6784 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6785 ) 6786 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6787 6788 unique_keys = None 6789 if self._match_text_seq("WITH", "UNIQUE"): 6790 unique_keys = True 6791 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6792 unique_keys = False 6793 6794 self._match_text_seq("KEYS") 6795 6796 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6797 self._parse_type() 6798 ) 6799 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6800 6801 return self.expression( 6802 exp.JSONObjectAgg if agg else exp.JSONObject, 6803 expressions=expressions, 6804 null_handling=null_handling, 6805 unique_keys=unique_keys, 6806 return_type=return_type, 6807 encoding=encoding, 6808 ) 6809 6810 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6811 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6812 if not self._match_text_seq("NESTED"): 6813 this = self._parse_id_var() 6814 kind = self._parse_types(allow_identifiers=False) 6815 nested = None 6816 else: 6817 this = None 6818 kind = None 6819 nested = True 6820 6821 path = self._match_text_seq("PATH") and self._parse_string() 6822 nested_schema = nested and self._parse_json_schema() 6823 6824 return self.expression( 6825 exp.JSONColumnDef, 6826 this=this, 6827 kind=kind, 6828 path=path, 6829 nested_schema=nested_schema, 6830 ) 6831 6832 def _parse_json_schema(self) -> exp.JSONSchema: 6833 self._match_text_seq("COLUMNS") 6834 return self.expression( 6835 exp.JSONSchema, 6836 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6837 ) 6838 6839 def _parse_json_table(self) -> exp.JSONTable: 6840 this = self._parse_format_json(self._parse_bitwise()) 6841 path = self._match(TokenType.COMMA) and self._parse_string() 6842 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6843 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6844 schema = self._parse_json_schema() 6845 6846 return exp.JSONTable( 6847 this=this, 6848 schema=schema, 6849 path=path, 6850 error_handling=error_handling, 6851 empty_handling=empty_handling, 6852 ) 6853 6854 def _parse_match_against(self) -> exp.MatchAgainst: 6855 expressions = self._parse_csv(self._parse_column) 6856 6857 self._match_text_seq(")", "AGAINST", "(") 6858 6859 this = self._parse_string() 6860 6861 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6862 modifier = "IN NATURAL LANGUAGE MODE" 6863 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6864 modifier = f"{modifier} WITH QUERY EXPANSION" 6865 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6866 modifier = "IN BOOLEAN MODE" 6867 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6868 modifier = "WITH QUERY EXPANSION" 6869 else: 6870 modifier = None 6871 6872 return self.expression( 6873 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6874 ) 6875 6876 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6877 def _parse_open_json(self) -> exp.OpenJSON: 6878 this = self._parse_bitwise() 6879 path = self._match(TokenType.COMMA) and self._parse_string() 6880 6881 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6882 this = self._parse_field(any_token=True) 6883 kind = self._parse_types() 6884 path = self._parse_string() 6885 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6886 6887 return self.expression( 6888 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6889 ) 6890 6891 expressions = None 6892 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6893 self._match_l_paren() 6894 expressions = self._parse_csv(_parse_open_json_column_def) 6895 6896 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6897 6898 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6899 args = self._parse_csv(self._parse_bitwise) 6900 6901 if self._match(TokenType.IN): 6902 return self.expression( 6903 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6904 ) 6905 6906 if haystack_first: 6907 haystack = seq_get(args, 0) 6908 needle = seq_get(args, 1) 6909 else: 6910 haystack = seq_get(args, 1) 6911 needle = seq_get(args, 0) 6912 6913 return self.expression( 6914 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6915 ) 6916 6917 def _parse_predict(self) -> exp.Predict: 6918 self._match_text_seq("MODEL") 6919 this = self._parse_table() 6920 6921 self._match(TokenType.COMMA) 6922 self._match_text_seq("TABLE") 6923 6924 return self.expression( 6925 exp.Predict, 6926 this=this, 6927 expression=self._parse_table(), 6928 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6929 ) 6930 6931 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6932 args = self._parse_csv(self._parse_table) 6933 return exp.JoinHint(this=func_name.upper(), expressions=args) 6934 6935 def _parse_substring(self) -> exp.Substring: 6936 # Postgres supports the form: substring(string [from int] [for int]) 6937 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6938 6939 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6940 6941 if self._match(TokenType.FROM): 6942 args.append(self._parse_bitwise()) 6943 if self._match(TokenType.FOR): 6944 if len(args) == 1: 6945 args.append(exp.Literal.number(1)) 6946 args.append(self._parse_bitwise()) 6947 6948 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6949 6950 def _parse_trim(self) -> exp.Trim: 6951 # https://www.w3resource.com/sql/character-functions/trim.php 6952 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6953 6954 position = None 6955 collation = None 6956 expression = None 6957 6958 if self._match_texts(self.TRIM_TYPES): 6959 position = self._prev.text.upper() 6960 6961 this = self._parse_bitwise() 6962 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6963 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6964 expression = self._parse_bitwise() 6965 6966 if invert_order: 6967 this, expression = expression, this 6968 6969 if self._match(TokenType.COLLATE): 6970 collation = self._parse_bitwise() 6971 6972 return self.expression( 6973 exp.Trim, this=this, position=position, expression=expression, collation=collation 6974 ) 6975 6976 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6977 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6978 6979 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6980 return self._parse_window(self._parse_id_var(), alias=True) 6981 6982 def _parse_respect_or_ignore_nulls( 6983 self, this: t.Optional[exp.Expression] 6984 ) -> t.Optional[exp.Expression]: 6985 if self._match_text_seq("IGNORE", "NULLS"): 6986 return self.expression(exp.IgnoreNulls, this=this) 6987 if self._match_text_seq("RESPECT", "NULLS"): 6988 return self.expression(exp.RespectNulls, this=this) 6989 return this 6990 6991 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6992 if self._match(TokenType.HAVING): 6993 self._match_texts(("MAX", "MIN")) 6994 max = self._prev.text.upper() != "MIN" 6995 return self.expression( 6996 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6997 ) 6998 6999 return this 7000 7001 def _parse_window( 7002 self, this: t.Optional[exp.Expression], alias: bool = False 7003 ) -> t.Optional[exp.Expression]: 7004 func = this 7005 comments = func.comments if isinstance(func, exp.Expression) else None 7006 7007 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7008 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7009 if self._match_text_seq("WITHIN", "GROUP"): 7010 order = self._parse_wrapped(self._parse_order) 7011 this = self.expression(exp.WithinGroup, this=this, expression=order) 7012 7013 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7014 self._match(TokenType.WHERE) 7015 this = self.expression( 7016 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7017 ) 7018 self._match_r_paren() 7019 7020 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7021 # Some dialects choose to implement and some do not. 7022 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7023 7024 # There is some code above in _parse_lambda that handles 7025 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7026 7027 # The below changes handle 7028 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7029 7030 # Oracle allows both formats 7031 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7032 # and Snowflake chose to do the same for familiarity 7033 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7034 if isinstance(this, exp.AggFunc): 7035 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7036 7037 if ignore_respect and ignore_respect is not this: 7038 ignore_respect.replace(ignore_respect.this) 7039 this = self.expression(ignore_respect.__class__, this=this) 7040 7041 this = self._parse_respect_or_ignore_nulls(this) 7042 7043 # bigquery select from window x AS (partition by ...) 7044 if alias: 7045 over = None 7046 self._match(TokenType.ALIAS) 7047 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7048 return this 7049 else: 7050 over = self._prev.text.upper() 7051 7052 if comments and isinstance(func, exp.Expression): 7053 func.pop_comments() 7054 7055 if not self._match(TokenType.L_PAREN): 7056 return self.expression( 7057 exp.Window, 7058 comments=comments, 7059 this=this, 7060 alias=self._parse_id_var(False), 7061 over=over, 7062 ) 7063 7064 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7065 7066 first = self._match(TokenType.FIRST) 7067 if self._match_text_seq("LAST"): 7068 first = False 7069 7070 partition, order = self._parse_partition_and_order() 7071 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7072 7073 if kind: 7074 self._match(TokenType.BETWEEN) 7075 start = self._parse_window_spec() 7076 self._match(TokenType.AND) 7077 end = self._parse_window_spec() 7078 exclude = ( 7079 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7080 if self._match_text_seq("EXCLUDE") 7081 else None 7082 ) 7083 7084 spec = self.expression( 7085 exp.WindowSpec, 7086 kind=kind, 7087 start=start["value"], 7088 start_side=start["side"], 7089 end=end["value"], 7090 end_side=end["side"], 7091 exclude=exclude, 7092 ) 7093 else: 7094 spec = None 7095 7096 self._match_r_paren() 7097 7098 window = self.expression( 7099 exp.Window, 7100 comments=comments, 7101 this=this, 7102 partition_by=partition, 7103 order=order, 7104 spec=spec, 7105 alias=window_alias, 7106 over=over, 7107 first=first, 7108 ) 7109 7110 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7111 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7112 return self._parse_window(window, alias=alias) 7113 7114 return window 7115 7116 def _parse_partition_and_order( 7117 self, 7118 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7119 return self._parse_partition_by(), self._parse_order() 7120 7121 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7122 self._match(TokenType.BETWEEN) 7123 7124 return { 7125 "value": ( 7126 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7127 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7128 or self._parse_bitwise() 7129 ), 7130 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7131 } 7132 7133 def _parse_alias( 7134 self, this: t.Optional[exp.Expression], explicit: bool = False 7135 ) -> t.Optional[exp.Expression]: 7136 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7137 # so this section tries to parse the clause version and if it fails, it treats the token 7138 # as an identifier (alias) 7139 if self._can_parse_limit_or_offset(): 7140 return this 7141 7142 any_token = self._match(TokenType.ALIAS) 7143 comments = self._prev_comments or [] 7144 7145 if explicit and not any_token: 7146 return this 7147 7148 if self._match(TokenType.L_PAREN): 7149 aliases = self.expression( 7150 exp.Aliases, 7151 comments=comments, 7152 this=this, 7153 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7154 ) 7155 self._match_r_paren(aliases) 7156 return aliases 7157 7158 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7159 self.STRING_ALIASES and self._parse_string_as_identifier() 7160 ) 7161 7162 if alias: 7163 comments.extend(alias.pop_comments()) 7164 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7165 column = this.this 7166 7167 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7168 if not this.comments and column and column.comments: 7169 this.comments = column.pop_comments() 7170 7171 return this 7172 7173 def _parse_id_var( 7174 self, 7175 any_token: bool = True, 7176 tokens: t.Optional[t.Collection[TokenType]] = None, 7177 ) -> t.Optional[exp.Expression]: 7178 expression = self._parse_identifier() 7179 if not expression and ( 7180 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7181 ): 7182 quoted = self._prev.token_type == TokenType.STRING 7183 expression = self._identifier_expression(quoted=quoted) 7184 7185 return expression 7186 7187 def _parse_string(self) -> t.Optional[exp.Expression]: 7188 if self._match_set(self.STRING_PARSERS): 7189 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7190 return self._parse_placeholder() 7191 7192 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7193 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7194 if output: 7195 output.update_positions(self._prev) 7196 return output 7197 7198 def _parse_number(self) -> t.Optional[exp.Expression]: 7199 if self._match_set(self.NUMERIC_PARSERS): 7200 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7201 return self._parse_placeholder() 7202 7203 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7204 if self._match(TokenType.IDENTIFIER): 7205 return self._identifier_expression(quoted=True) 7206 return self._parse_placeholder() 7207 7208 def _parse_var( 7209 self, 7210 any_token: bool = False, 7211 tokens: t.Optional[t.Collection[TokenType]] = None, 7212 upper: bool = False, 7213 ) -> t.Optional[exp.Expression]: 7214 if ( 7215 (any_token and self._advance_any()) 7216 or self._match(TokenType.VAR) 7217 or (self._match_set(tokens) if tokens else False) 7218 ): 7219 return self.expression( 7220 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7221 ) 7222 return self._parse_placeholder() 7223 7224 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7225 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7226 self._advance() 7227 return self._prev 7228 return None 7229 7230 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7231 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7232 7233 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7234 return self._parse_primary() or self._parse_var(any_token=True) 7235 7236 def _parse_null(self) -> t.Optional[exp.Expression]: 7237 if self._match_set(self.NULL_TOKENS): 7238 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7239 return self._parse_placeholder() 7240 7241 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7242 if self._match(TokenType.TRUE): 7243 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7244 if self._match(TokenType.FALSE): 7245 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7246 return self._parse_placeholder() 7247 7248 def _parse_star(self) -> t.Optional[exp.Expression]: 7249 if self._match(TokenType.STAR): 7250 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7251 return self._parse_placeholder() 7252 7253 def _parse_parameter(self) -> exp.Parameter: 7254 this = self._parse_identifier() or self._parse_primary_or_var() 7255 return self.expression(exp.Parameter, this=this) 7256 7257 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7258 if self._match_set(self.PLACEHOLDER_PARSERS): 7259 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7260 if placeholder: 7261 return placeholder 7262 self._advance(-1) 7263 return None 7264 7265 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7266 if not self._match_texts(keywords): 7267 return None 7268 if self._match(TokenType.L_PAREN, advance=False): 7269 return self._parse_wrapped_csv(self._parse_expression) 7270 7271 expression = self._parse_expression() 7272 return [expression] if expression else None 7273 7274 def _parse_csv( 7275 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7276 ) -> t.List[exp.Expression]: 7277 parse_result = parse_method() 7278 items = [parse_result] if parse_result is not None else [] 7279 7280 while self._match(sep): 7281 self._add_comments(parse_result) 7282 parse_result = parse_method() 7283 if parse_result is not None: 7284 items.append(parse_result) 7285 7286 return items 7287 7288 def _parse_tokens( 7289 self, parse_method: t.Callable, expressions: t.Dict 7290 ) -> t.Optional[exp.Expression]: 7291 this = parse_method() 7292 7293 while self._match_set(expressions): 7294 this = self.expression( 7295 expressions[self._prev.token_type], 7296 this=this, 7297 comments=self._prev_comments, 7298 expression=parse_method(), 7299 ) 7300 7301 return this 7302 7303 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7304 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7305 7306 def _parse_wrapped_csv( 7307 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7308 ) -> t.List[exp.Expression]: 7309 return self._parse_wrapped( 7310 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7311 ) 7312 7313 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7314 wrapped = self._match(TokenType.L_PAREN) 7315 if not wrapped and not optional: 7316 self.raise_error("Expecting (") 7317 parse_result = parse_method() 7318 if wrapped: 7319 self._match_r_paren() 7320 return parse_result 7321 7322 def _parse_expressions(self) -> t.List[exp.Expression]: 7323 return self._parse_csv(self._parse_expression) 7324 7325 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7326 return self._parse_select() or self._parse_set_operations( 7327 self._parse_alias(self._parse_assignment(), explicit=True) 7328 if alias 7329 else self._parse_assignment() 7330 ) 7331 7332 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7333 return self._parse_query_modifiers( 7334 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7335 ) 7336 7337 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7338 this = None 7339 if self._match_texts(self.TRANSACTION_KIND): 7340 this = self._prev.text 7341 7342 self._match_texts(("TRANSACTION", "WORK")) 7343 7344 modes = [] 7345 while True: 7346 mode = [] 7347 while self._match(TokenType.VAR): 7348 mode.append(self._prev.text) 7349 7350 if mode: 7351 modes.append(" ".join(mode)) 7352 if not self._match(TokenType.COMMA): 7353 break 7354 7355 return self.expression(exp.Transaction, this=this, modes=modes) 7356 7357 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7358 chain = None 7359 savepoint = None 7360 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7361 7362 self._match_texts(("TRANSACTION", "WORK")) 7363 7364 if self._match_text_seq("TO"): 7365 self._match_text_seq("SAVEPOINT") 7366 savepoint = self._parse_id_var() 7367 7368 if self._match(TokenType.AND): 7369 chain = not self._match_text_seq("NO") 7370 self._match_text_seq("CHAIN") 7371 7372 if is_rollback: 7373 return self.expression(exp.Rollback, savepoint=savepoint) 7374 7375 return self.expression(exp.Commit, chain=chain) 7376 7377 def _parse_refresh(self) -> exp.Refresh: 7378 self._match(TokenType.TABLE) 7379 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7380 7381 def _parse_column_def_with_exists(self): 7382 start = self._index 7383 self._match(TokenType.COLUMN) 7384 7385 exists_column = self._parse_exists(not_=True) 7386 expression = self._parse_field_def() 7387 7388 if not isinstance(expression, exp.ColumnDef): 7389 self._retreat(start) 7390 return None 7391 7392 expression.set("exists", exists_column) 7393 7394 return expression 7395 7396 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7397 if not self._prev.text.upper() == "ADD": 7398 return None 7399 7400 expression = self._parse_column_def_with_exists() 7401 if not expression: 7402 return None 7403 7404 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7405 if self._match_texts(("FIRST", "AFTER")): 7406 position = self._prev.text 7407 column_position = self.expression( 7408 exp.ColumnPosition, this=self._parse_column(), position=position 7409 ) 7410 expression.set("position", column_position) 7411 7412 return expression 7413 7414 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7415 drop = self._match(TokenType.DROP) and self._parse_drop() 7416 if drop and not isinstance(drop, exp.Command): 7417 drop.set("kind", drop.args.get("kind", "COLUMN")) 7418 return drop 7419 7420 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7421 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7422 return self.expression( 7423 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7424 ) 7425 7426 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7427 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7428 self._match_text_seq("ADD") 7429 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7430 return self.expression( 7431 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7432 ) 7433 7434 column_def = self._parse_add_column() 7435 if isinstance(column_def, exp.ColumnDef): 7436 return column_def 7437 7438 exists = self._parse_exists(not_=True) 7439 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7440 return self.expression( 7441 exp.AddPartition, 7442 exists=exists, 7443 this=self._parse_field(any_token=True), 7444 location=self._match_text_seq("LOCATION", advance=False) 7445 and self._parse_property(), 7446 ) 7447 7448 return None 7449 7450 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7451 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7452 or self._match_text_seq("COLUMNS") 7453 ): 7454 schema = self._parse_schema() 7455 7456 return ( 7457 ensure_list(schema) 7458 if schema 7459 else self._parse_csv(self._parse_column_def_with_exists) 7460 ) 7461 7462 return self._parse_csv(_parse_add_alteration) 7463 7464 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7465 if self._match_texts(self.ALTER_ALTER_PARSERS): 7466 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7467 7468 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7469 # keyword after ALTER we default to parsing this statement 7470 self._match(TokenType.COLUMN) 7471 column = self._parse_field(any_token=True) 7472 7473 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7474 return self.expression(exp.AlterColumn, this=column, drop=True) 7475 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7476 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7477 if self._match(TokenType.COMMENT): 7478 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7479 if self._match_text_seq("DROP", "NOT", "NULL"): 7480 return self.expression( 7481 exp.AlterColumn, 7482 this=column, 7483 drop=True, 7484 allow_null=True, 7485 ) 7486 if self._match_text_seq("SET", "NOT", "NULL"): 7487 return self.expression( 7488 exp.AlterColumn, 7489 this=column, 7490 allow_null=False, 7491 ) 7492 7493 if self._match_text_seq("SET", "VISIBLE"): 7494 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7495 if self._match_text_seq("SET", "INVISIBLE"): 7496 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7497 7498 self._match_text_seq("SET", "DATA") 7499 self._match_text_seq("TYPE") 7500 return self.expression( 7501 exp.AlterColumn, 7502 this=column, 7503 dtype=self._parse_types(), 7504 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7505 using=self._match(TokenType.USING) and self._parse_assignment(), 7506 ) 7507 7508 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7509 if self._match_texts(("ALL", "EVEN", "AUTO")): 7510 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7511 7512 self._match_text_seq("KEY", "DISTKEY") 7513 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7514 7515 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7516 if compound: 7517 self._match_text_seq("SORTKEY") 7518 7519 if self._match(TokenType.L_PAREN, advance=False): 7520 return self.expression( 7521 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7522 ) 7523 7524 self._match_texts(("AUTO", "NONE")) 7525 return self.expression( 7526 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7527 ) 7528 7529 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7530 index = self._index - 1 7531 7532 partition_exists = self._parse_exists() 7533 if self._match(TokenType.PARTITION, advance=False): 7534 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7535 7536 self._retreat(index) 7537 return self._parse_csv(self._parse_drop_column) 7538 7539 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7540 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7541 exists = self._parse_exists() 7542 old_column = self._parse_column() 7543 to = self._match_text_seq("TO") 7544 new_column = self._parse_column() 7545 7546 if old_column is None or to is None or new_column is None: 7547 return None 7548 7549 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7550 7551 self._match_text_seq("TO") 7552 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7553 7554 def _parse_alter_table_set(self) -> exp.AlterSet: 7555 alter_set = self.expression(exp.AlterSet) 7556 7557 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7558 "TABLE", "PROPERTIES" 7559 ): 7560 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7561 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7562 alter_set.set("expressions", [self._parse_assignment()]) 7563 elif self._match_texts(("LOGGED", "UNLOGGED")): 7564 alter_set.set("option", exp.var(self._prev.text.upper())) 7565 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7566 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7567 elif self._match_text_seq("LOCATION"): 7568 alter_set.set("location", self._parse_field()) 7569 elif self._match_text_seq("ACCESS", "METHOD"): 7570 alter_set.set("access_method", self._parse_field()) 7571 elif self._match_text_seq("TABLESPACE"): 7572 alter_set.set("tablespace", self._parse_field()) 7573 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7574 alter_set.set("file_format", [self._parse_field()]) 7575 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7576 alter_set.set("file_format", self._parse_wrapped_options()) 7577 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7578 alter_set.set("copy_options", self._parse_wrapped_options()) 7579 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7580 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7581 else: 7582 if self._match_text_seq("SERDE"): 7583 alter_set.set("serde", self._parse_field()) 7584 7585 properties = self._parse_wrapped(self._parse_properties, optional=True) 7586 alter_set.set("expressions", [properties]) 7587 7588 return alter_set 7589 7590 def _parse_alter(self) -> exp.Alter | exp.Command: 7591 start = self._prev 7592 7593 alter_token = self._match_set(self.ALTERABLES) and self._prev 7594 if not alter_token: 7595 return self._parse_as_command(start) 7596 7597 exists = self._parse_exists() 7598 only = self._match_text_seq("ONLY") 7599 this = self._parse_table(schema=True) 7600 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7601 7602 if self._next: 7603 self._advance() 7604 7605 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7606 if parser: 7607 actions = ensure_list(parser(self)) 7608 not_valid = self._match_text_seq("NOT", "VALID") 7609 options = self._parse_csv(self._parse_property) 7610 7611 if not self._curr and actions: 7612 return self.expression( 7613 exp.Alter, 7614 this=this, 7615 kind=alter_token.text.upper(), 7616 exists=exists, 7617 actions=actions, 7618 only=only, 7619 options=options, 7620 cluster=cluster, 7621 not_valid=not_valid, 7622 ) 7623 7624 return self._parse_as_command(start) 7625 7626 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7627 start = self._prev 7628 # https://duckdb.org/docs/sql/statements/analyze 7629 if not self._curr: 7630 return self.expression(exp.Analyze) 7631 7632 options = [] 7633 while self._match_texts(self.ANALYZE_STYLES): 7634 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7635 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7636 else: 7637 options.append(self._prev.text.upper()) 7638 7639 this: t.Optional[exp.Expression] = None 7640 inner_expression: t.Optional[exp.Expression] = None 7641 7642 kind = self._curr and self._curr.text.upper() 7643 7644 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7645 this = self._parse_table_parts() 7646 elif self._match_text_seq("TABLES"): 7647 if self._match_set((TokenType.FROM, TokenType.IN)): 7648 kind = f"{kind} {self._prev.text.upper()}" 7649 this = self._parse_table(schema=True, is_db_reference=True) 7650 elif self._match_text_seq("DATABASE"): 7651 this = self._parse_table(schema=True, is_db_reference=True) 7652 elif self._match_text_seq("CLUSTER"): 7653 this = self._parse_table() 7654 # Try matching inner expr keywords before fallback to parse table. 7655 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7656 kind = None 7657 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7658 else: 7659 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7660 kind = None 7661 this = self._parse_table_parts() 7662 7663 partition = self._try_parse(self._parse_partition) 7664 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7665 return self._parse_as_command(start) 7666 7667 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7668 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7669 "WITH", "ASYNC", "MODE" 7670 ): 7671 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7672 else: 7673 mode = None 7674 7675 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7676 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7677 7678 properties = self._parse_properties() 7679 return self.expression( 7680 exp.Analyze, 7681 kind=kind, 7682 this=this, 7683 mode=mode, 7684 partition=partition, 7685 properties=properties, 7686 expression=inner_expression, 7687 options=options, 7688 ) 7689 7690 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7691 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7692 this = None 7693 kind = self._prev.text.upper() 7694 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7695 expressions = [] 7696 7697 if not self._match_text_seq("STATISTICS"): 7698 self.raise_error("Expecting token STATISTICS") 7699 7700 if self._match_text_seq("NOSCAN"): 7701 this = "NOSCAN" 7702 elif self._match(TokenType.FOR): 7703 if self._match_text_seq("ALL", "COLUMNS"): 7704 this = "FOR ALL COLUMNS" 7705 if self._match_texts("COLUMNS"): 7706 this = "FOR COLUMNS" 7707 expressions = self._parse_csv(self._parse_column_reference) 7708 elif self._match_text_seq("SAMPLE"): 7709 sample = self._parse_number() 7710 expressions = [ 7711 self.expression( 7712 exp.AnalyzeSample, 7713 sample=sample, 7714 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7715 ) 7716 ] 7717 7718 return self.expression( 7719 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7720 ) 7721 7722 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7723 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7724 kind = None 7725 this = None 7726 expression: t.Optional[exp.Expression] = None 7727 if self._match_text_seq("REF", "UPDATE"): 7728 kind = "REF" 7729 this = "UPDATE" 7730 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7731 this = "UPDATE SET DANGLING TO NULL" 7732 elif self._match_text_seq("STRUCTURE"): 7733 kind = "STRUCTURE" 7734 if self._match_text_seq("CASCADE", "FAST"): 7735 this = "CASCADE FAST" 7736 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7737 ("ONLINE", "OFFLINE") 7738 ): 7739 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7740 expression = self._parse_into() 7741 7742 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7743 7744 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7745 this = self._prev.text.upper() 7746 if self._match_text_seq("COLUMNS"): 7747 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7748 return None 7749 7750 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7751 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7752 if self._match_text_seq("STATISTICS"): 7753 return self.expression(exp.AnalyzeDelete, kind=kind) 7754 return None 7755 7756 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7757 if self._match_text_seq("CHAINED", "ROWS"): 7758 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7759 return None 7760 7761 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7762 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7763 this = self._prev.text.upper() 7764 expression: t.Optional[exp.Expression] = None 7765 expressions = [] 7766 update_options = None 7767 7768 if self._match_text_seq("HISTOGRAM", "ON"): 7769 expressions = self._parse_csv(self._parse_column_reference) 7770 with_expressions = [] 7771 while self._match(TokenType.WITH): 7772 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7773 if self._match_texts(("SYNC", "ASYNC")): 7774 if self._match_text_seq("MODE", advance=False): 7775 with_expressions.append(f"{self._prev.text.upper()} MODE") 7776 self._advance() 7777 else: 7778 buckets = self._parse_number() 7779 if self._match_text_seq("BUCKETS"): 7780 with_expressions.append(f"{buckets} BUCKETS") 7781 if with_expressions: 7782 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7783 7784 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7785 TokenType.UPDATE, advance=False 7786 ): 7787 update_options = self._prev.text.upper() 7788 self._advance() 7789 elif self._match_text_seq("USING", "DATA"): 7790 expression = self.expression(exp.UsingData, this=self._parse_string()) 7791 7792 return self.expression( 7793 exp.AnalyzeHistogram, 7794 this=this, 7795 expressions=expressions, 7796 expression=expression, 7797 update_options=update_options, 7798 ) 7799 7800 def _parse_merge(self) -> exp.Merge: 7801 self._match(TokenType.INTO) 7802 target = self._parse_table() 7803 7804 if target and self._match(TokenType.ALIAS, advance=False): 7805 target.set("alias", self._parse_table_alias()) 7806 7807 self._match(TokenType.USING) 7808 using = self._parse_table() 7809 7810 self._match(TokenType.ON) 7811 on = self._parse_assignment() 7812 7813 return self.expression( 7814 exp.Merge, 7815 this=target, 7816 using=using, 7817 on=on, 7818 whens=self._parse_when_matched(), 7819 returning=self._parse_returning(), 7820 ) 7821 7822 def _parse_when_matched(self) -> exp.Whens: 7823 whens = [] 7824 7825 while self._match(TokenType.WHEN): 7826 matched = not self._match(TokenType.NOT) 7827 self._match_text_seq("MATCHED") 7828 source = ( 7829 False 7830 if self._match_text_seq("BY", "TARGET") 7831 else self._match_text_seq("BY", "SOURCE") 7832 ) 7833 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7834 7835 self._match(TokenType.THEN) 7836 7837 if self._match(TokenType.INSERT): 7838 this = self._parse_star() 7839 if this: 7840 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7841 else: 7842 then = self.expression( 7843 exp.Insert, 7844 this=exp.var("ROW") 7845 if self._match_text_seq("ROW") 7846 else self._parse_value(values=False), 7847 expression=self._match_text_seq("VALUES") and self._parse_value(), 7848 ) 7849 elif self._match(TokenType.UPDATE): 7850 expressions = self._parse_star() 7851 if expressions: 7852 then = self.expression(exp.Update, expressions=expressions) 7853 else: 7854 then = self.expression( 7855 exp.Update, 7856 expressions=self._match(TokenType.SET) 7857 and self._parse_csv(self._parse_equality), 7858 ) 7859 elif self._match(TokenType.DELETE): 7860 then = self.expression(exp.Var, this=self._prev.text) 7861 else: 7862 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7863 7864 whens.append( 7865 self.expression( 7866 exp.When, 7867 matched=matched, 7868 source=source, 7869 condition=condition, 7870 then=then, 7871 ) 7872 ) 7873 return self.expression(exp.Whens, expressions=whens) 7874 7875 def _parse_show(self) -> t.Optional[exp.Expression]: 7876 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7877 if parser: 7878 return parser(self) 7879 return self._parse_as_command(self._prev) 7880 7881 def _parse_set_item_assignment( 7882 self, kind: t.Optional[str] = None 7883 ) -> t.Optional[exp.Expression]: 7884 index = self._index 7885 7886 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7887 return self._parse_set_transaction(global_=kind == "GLOBAL") 7888 7889 left = self._parse_primary() or self._parse_column() 7890 assignment_delimiter = self._match_texts(("=", "TO")) 7891 7892 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7893 self._retreat(index) 7894 return None 7895 7896 right = self._parse_statement() or self._parse_id_var() 7897 if isinstance(right, (exp.Column, exp.Identifier)): 7898 right = exp.var(right.name) 7899 7900 this = self.expression(exp.EQ, this=left, expression=right) 7901 return self.expression(exp.SetItem, this=this, kind=kind) 7902 7903 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7904 self._match_text_seq("TRANSACTION") 7905 characteristics = self._parse_csv( 7906 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7907 ) 7908 return self.expression( 7909 exp.SetItem, 7910 expressions=characteristics, 7911 kind="TRANSACTION", 7912 **{"global": global_}, # type: ignore 7913 ) 7914 7915 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7916 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7917 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7918 7919 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7920 index = self._index 7921 set_ = self.expression( 7922 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7923 ) 7924 7925 if self._curr: 7926 self._retreat(index) 7927 return self._parse_as_command(self._prev) 7928 7929 return set_ 7930 7931 def _parse_var_from_options( 7932 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7933 ) -> t.Optional[exp.Var]: 7934 start = self._curr 7935 if not start: 7936 return None 7937 7938 option = start.text.upper() 7939 continuations = options.get(option) 7940 7941 index = self._index 7942 self._advance() 7943 for keywords in continuations or []: 7944 if isinstance(keywords, str): 7945 keywords = (keywords,) 7946 7947 if self._match_text_seq(*keywords): 7948 option = f"{option} {' '.join(keywords)}" 7949 break 7950 else: 7951 if continuations or continuations is None: 7952 if raise_unmatched: 7953 self.raise_error(f"Unknown option {option}") 7954 7955 self._retreat(index) 7956 return None 7957 7958 return exp.var(option) 7959 7960 def _parse_as_command(self, start: Token) -> exp.Command: 7961 while self._curr: 7962 self._advance() 7963 text = self._find_sql(start, self._prev) 7964 size = len(start.text) 7965 self._warn_unsupported() 7966 return exp.Command(this=text[:size], expression=text[size:]) 7967 7968 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7969 settings = [] 7970 7971 self._match_l_paren() 7972 kind = self._parse_id_var() 7973 7974 if self._match(TokenType.L_PAREN): 7975 while True: 7976 key = self._parse_id_var() 7977 value = self._parse_primary() 7978 if not key and value is None: 7979 break 7980 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7981 self._match(TokenType.R_PAREN) 7982 7983 self._match_r_paren() 7984 7985 return self.expression( 7986 exp.DictProperty, 7987 this=this, 7988 kind=kind.this if kind else None, 7989 settings=settings, 7990 ) 7991 7992 def _parse_dict_range(self, this: str) -> exp.DictRange: 7993 self._match_l_paren() 7994 has_min = self._match_text_seq("MIN") 7995 if has_min: 7996 min = self._parse_var() or self._parse_primary() 7997 self._match_text_seq("MAX") 7998 max = self._parse_var() or self._parse_primary() 7999 else: 8000 max = self._parse_var() or self._parse_primary() 8001 min = exp.Literal.number(0) 8002 self._match_r_paren() 8003 return self.expression(exp.DictRange, this=this, min=min, max=max) 8004 8005 def _parse_comprehension( 8006 self, this: t.Optional[exp.Expression] 8007 ) -> t.Optional[exp.Comprehension]: 8008 index = self._index 8009 expression = self._parse_column() 8010 if not self._match(TokenType.IN): 8011 self._retreat(index - 1) 8012 return None 8013 iterator = self._parse_column() 8014 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8015 return self.expression( 8016 exp.Comprehension, 8017 this=this, 8018 expression=expression, 8019 iterator=iterator, 8020 condition=condition, 8021 ) 8022 8023 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8024 if self._match(TokenType.HEREDOC_STRING): 8025 return self.expression(exp.Heredoc, this=self._prev.text) 8026 8027 if not self._match_text_seq("$"): 8028 return None 8029 8030 tags = ["$"] 8031 tag_text = None 8032 8033 if self._is_connected(): 8034 self._advance() 8035 tags.append(self._prev.text.upper()) 8036 else: 8037 self.raise_error("No closing $ found") 8038 8039 if tags[-1] != "$": 8040 if self._is_connected() and self._match_text_seq("$"): 8041 tag_text = tags[-1] 8042 tags.append("$") 8043 else: 8044 self.raise_error("No closing $ found") 8045 8046 heredoc_start = self._curr 8047 8048 while self._curr: 8049 if self._match_text_seq(*tags, advance=False): 8050 this = self._find_sql(heredoc_start, self._prev) 8051 self._advance(len(tags)) 8052 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8053 8054 self._advance() 8055 8056 self.raise_error(f"No closing {''.join(tags)} found") 8057 return None 8058 8059 def _find_parser( 8060 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8061 ) -> t.Optional[t.Callable]: 8062 if not self._curr: 8063 return None 8064 8065 index = self._index 8066 this = [] 8067 while True: 8068 # The current token might be multiple words 8069 curr = self._curr.text.upper() 8070 key = curr.split(" ") 8071 this.append(curr) 8072 8073 self._advance() 8074 result, trie = in_trie(trie, key) 8075 if result == TrieResult.FAILED: 8076 break 8077 8078 if result == TrieResult.EXISTS: 8079 subparser = parsers[" ".join(this)] 8080 return subparser 8081 8082 self._retreat(index) 8083 return None 8084 8085 def _match(self, token_type, advance=True, expression=None): 8086 if not self._curr: 8087 return None 8088 8089 if self._curr.token_type == token_type: 8090 if advance: 8091 self._advance() 8092 self._add_comments(expression) 8093 return True 8094 8095 return None 8096 8097 def _match_set(self, types, advance=True): 8098 if not self._curr: 8099 return None 8100 8101 if self._curr.token_type in types: 8102 if advance: 8103 self._advance() 8104 return True 8105 8106 return None 8107 8108 def _match_pair(self, token_type_a, token_type_b, advance=True): 8109 if not self._curr or not self._next: 8110 return None 8111 8112 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8113 if advance: 8114 self._advance(2) 8115 return True 8116 8117 return None 8118 8119 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8120 if not self._match(TokenType.L_PAREN, expression=expression): 8121 self.raise_error("Expecting (") 8122 8123 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8124 if not self._match(TokenType.R_PAREN, expression=expression): 8125 self.raise_error("Expecting )") 8126 8127 def _match_texts(self, texts, advance=True): 8128 if ( 8129 self._curr 8130 and self._curr.token_type != TokenType.STRING 8131 and self._curr.text.upper() in texts 8132 ): 8133 if advance: 8134 self._advance() 8135 return True 8136 return None 8137 8138 def _match_text_seq(self, *texts, advance=True): 8139 index = self._index 8140 for text in texts: 8141 if ( 8142 self._curr 8143 and self._curr.token_type != TokenType.STRING 8144 and self._curr.text.upper() == text 8145 ): 8146 self._advance() 8147 else: 8148 self._retreat(index) 8149 return None 8150 8151 if not advance: 8152 self._retreat(index) 8153 8154 return True 8155 8156 def _replace_lambda( 8157 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8158 ) -> t.Optional[exp.Expression]: 8159 if not node: 8160 return node 8161 8162 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8163 8164 for column in node.find_all(exp.Column): 8165 typ = lambda_types.get(column.parts[0].name) 8166 if typ is not None: 8167 dot_or_id = column.to_dot() if column.table else column.this 8168 8169 if typ: 8170 dot_or_id = self.expression( 8171 exp.Cast, 8172 this=dot_or_id, 8173 to=typ, 8174 ) 8175 8176 parent = column.parent 8177 8178 while isinstance(parent, exp.Dot): 8179 if not isinstance(parent.parent, exp.Dot): 8180 parent.replace(dot_or_id) 8181 break 8182 parent = parent.parent 8183 else: 8184 if column is node: 8185 node = dot_or_id 8186 else: 8187 column.replace(dot_or_id) 8188 return node 8189 8190 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8191 start = self._prev 8192 8193 # Not to be confused with TRUNCATE(number, decimals) function call 8194 if self._match(TokenType.L_PAREN): 8195 self._retreat(self._index - 2) 8196 return self._parse_function() 8197 8198 # Clickhouse supports TRUNCATE DATABASE as well 8199 is_database = self._match(TokenType.DATABASE) 8200 8201 self._match(TokenType.TABLE) 8202 8203 exists = self._parse_exists(not_=False) 8204 8205 expressions = self._parse_csv( 8206 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8207 ) 8208 8209 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8210 8211 if self._match_text_seq("RESTART", "IDENTITY"): 8212 identity = "RESTART" 8213 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8214 identity = "CONTINUE" 8215 else: 8216 identity = None 8217 8218 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8219 option = self._prev.text 8220 else: 8221 option = None 8222 8223 partition = self._parse_partition() 8224 8225 # Fallback case 8226 if self._curr: 8227 return self._parse_as_command(start) 8228 8229 return self.expression( 8230 exp.TruncateTable, 8231 expressions=expressions, 8232 is_database=is_database, 8233 exists=exists, 8234 cluster=cluster, 8235 identity=identity, 8236 option=option, 8237 partition=partition, 8238 ) 8239 8240 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8241 this = self._parse_ordered(self._parse_opclass) 8242 8243 if not self._match(TokenType.WITH): 8244 return this 8245 8246 op = self._parse_var(any_token=True) 8247 8248 return self.expression(exp.WithOperator, this=this, op=op) 8249 8250 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8251 self._match(TokenType.EQ) 8252 self._match(TokenType.L_PAREN) 8253 8254 opts: t.List[t.Optional[exp.Expression]] = [] 8255 option: exp.Expression | None 8256 while self._curr and not self._match(TokenType.R_PAREN): 8257 if self._match_text_seq("FORMAT_NAME", "="): 8258 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8259 option = self._parse_format_name() 8260 else: 8261 option = self._parse_property() 8262 8263 if option is None: 8264 self.raise_error("Unable to parse option") 8265 break 8266 8267 opts.append(option) 8268 8269 return opts 8270 8271 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8272 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8273 8274 options = [] 8275 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8276 option = self._parse_var(any_token=True) 8277 prev = self._prev.text.upper() 8278 8279 # Different dialects might separate options and values by white space, "=" and "AS" 8280 self._match(TokenType.EQ) 8281 self._match(TokenType.ALIAS) 8282 8283 param = self.expression(exp.CopyParameter, this=option) 8284 8285 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8286 TokenType.L_PAREN, advance=False 8287 ): 8288 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8289 param.set("expressions", self._parse_wrapped_options()) 8290 elif prev == "FILE_FORMAT": 8291 # T-SQL's external file format case 8292 param.set("expression", self._parse_field()) 8293 else: 8294 param.set("expression", self._parse_unquoted_field()) 8295 8296 options.append(param) 8297 self._match(sep) 8298 8299 return options 8300 8301 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8302 expr = self.expression(exp.Credentials) 8303 8304 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8305 expr.set("storage", self._parse_field()) 8306 if self._match_text_seq("CREDENTIALS"): 8307 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8308 creds = ( 8309 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8310 ) 8311 expr.set("credentials", creds) 8312 if self._match_text_seq("ENCRYPTION"): 8313 expr.set("encryption", self._parse_wrapped_options()) 8314 if self._match_text_seq("IAM_ROLE"): 8315 expr.set("iam_role", self._parse_field()) 8316 if self._match_text_seq("REGION"): 8317 expr.set("region", self._parse_field()) 8318 8319 return expr 8320 8321 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8322 return self._parse_field() 8323 8324 def _parse_copy(self) -> exp.Copy | exp.Command: 8325 start = self._prev 8326 8327 self._match(TokenType.INTO) 8328 8329 this = ( 8330 self._parse_select(nested=True, parse_subquery_alias=False) 8331 if self._match(TokenType.L_PAREN, advance=False) 8332 else self._parse_table(schema=True) 8333 ) 8334 8335 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8336 8337 files = self._parse_csv(self._parse_file_location) 8338 credentials = self._parse_credentials() 8339 8340 self._match_text_seq("WITH") 8341 8342 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8343 8344 # Fallback case 8345 if self._curr: 8346 return self._parse_as_command(start) 8347 8348 return self.expression( 8349 exp.Copy, 8350 this=this, 8351 kind=kind, 8352 credentials=credentials, 8353 files=files, 8354 params=params, 8355 ) 8356 8357 def _parse_normalize(self) -> exp.Normalize: 8358 return self.expression( 8359 exp.Normalize, 8360 this=self._parse_bitwise(), 8361 form=self._match(TokenType.COMMA) and self._parse_var(), 8362 ) 8363 8364 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8365 args = self._parse_csv(lambda: self._parse_lambda()) 8366 8367 this = seq_get(args, 0) 8368 decimals = seq_get(args, 1) 8369 8370 return expr_type( 8371 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8372 ) 8373 8374 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8375 star_token = self._prev 8376 8377 if self._match_text_seq("COLUMNS", "(", advance=False): 8378 this = self._parse_function() 8379 if isinstance(this, exp.Columns): 8380 this.set("unpack", True) 8381 return this 8382 8383 return self.expression( 8384 exp.Star, 8385 **{ # type: ignore 8386 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8387 "replace": self._parse_star_op("REPLACE"), 8388 "rename": self._parse_star_op("RENAME"), 8389 }, 8390 ).update_positions(star_token) 8391 8392 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8393 privilege_parts = [] 8394 8395 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8396 # (end of privilege list) or L_PAREN (start of column list) are met 8397 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8398 privilege_parts.append(self._curr.text.upper()) 8399 self._advance() 8400 8401 this = exp.var(" ".join(privilege_parts)) 8402 expressions = ( 8403 self._parse_wrapped_csv(self._parse_column) 8404 if self._match(TokenType.L_PAREN, advance=False) 8405 else None 8406 ) 8407 8408 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8409 8410 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8411 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8412 principal = self._parse_id_var() 8413 8414 if not principal: 8415 return None 8416 8417 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8418 8419 def _parse_grant(self) -> exp.Grant | exp.Command: 8420 start = self._prev 8421 8422 privileges = self._parse_csv(self._parse_grant_privilege) 8423 8424 self._match(TokenType.ON) 8425 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8426 8427 # Attempt to parse the securable e.g. MySQL allows names 8428 # such as "foo.*", "*.*" which are not easily parseable yet 8429 securable = self._try_parse(self._parse_table_parts) 8430 8431 if not securable or not self._match_text_seq("TO"): 8432 return self._parse_as_command(start) 8433 8434 principals = self._parse_csv(self._parse_grant_principal) 8435 8436 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8437 8438 if self._curr: 8439 return self._parse_as_command(start) 8440 8441 return self.expression( 8442 exp.Grant, 8443 privileges=privileges, 8444 kind=kind, 8445 securable=securable, 8446 principals=principals, 8447 grant_option=grant_option, 8448 ) 8449 8450 def _parse_overlay(self) -> exp.Overlay: 8451 return self.expression( 8452 exp.Overlay, 8453 **{ # type: ignore 8454 "this": self._parse_bitwise(), 8455 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8456 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8457 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8458 }, 8459 ) 8460 8461 def _parse_format_name(self) -> exp.Property: 8462 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8463 # for FILE_FORMAT = <format_name> 8464 return self.expression( 8465 exp.Property, 8466 this=exp.var("FORMAT_NAME"), 8467 value=self._parse_string() or self._parse_table_parts(), 8468 ) 8469 8470 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8471 args: t.List[exp.Expression] = [] 8472 8473 if self._match(TokenType.DISTINCT): 8474 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8475 self._match(TokenType.COMMA) 8476 8477 args.extend(self._parse_csv(self._parse_assignment)) 8478 8479 return self.expression( 8480 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8481 ) 8482 8483 def _identifier_expression( 8484 self, token: t.Optional[Token] = None, **kwargs: t.Any 8485 ) -> exp.Identifier: 8486 token = token or self._prev 8487 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8488 expression.update_positions(token) 8489 return expression 8490 8491 def _build_pipe_cte( 8492 self, 8493 query: exp.Query, 8494 expressions: t.List[exp.Expression], 8495 alias_cte: t.Optional[exp.TableAlias] = None, 8496 ) -> exp.Select: 8497 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8498 if alias_cte: 8499 new_cte = alias_cte 8500 else: 8501 self._pipe_cte_counter += 1 8502 new_cte = f"__tmp{self._pipe_cte_counter}" 8503 8504 with_ = query.args.get("with") 8505 ctes = with_.pop() if with_ else None 8506 8507 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8508 if ctes: 8509 new_select.set("with", ctes) 8510 8511 return new_select.with_(new_cte, as_=query, copy=False) 8512 8513 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8514 select = self._parse_select(consume_pipe=False) 8515 if not select: 8516 return query 8517 8518 return self._build_pipe_cte( 8519 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8520 ) 8521 8522 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8523 limit = self._parse_limit() 8524 offset = self._parse_offset() 8525 if limit: 8526 curr_limit = query.args.get("limit", limit) 8527 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8528 query.limit(limit, copy=False) 8529 if offset: 8530 curr_offset = query.args.get("offset") 8531 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8532 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8533 8534 return query 8535 8536 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8537 this = self._parse_assignment() 8538 if self._match_text_seq("GROUP", "AND", advance=False): 8539 return this 8540 8541 this = self._parse_alias(this) 8542 8543 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8544 return self._parse_ordered(lambda: this) 8545 8546 return this 8547 8548 def _parse_pipe_syntax_aggregate_group_order_by( 8549 self, query: exp.Select, group_by_exists: bool = True 8550 ) -> exp.Select: 8551 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8552 aggregates_or_groups, orders = [], [] 8553 for element in expr: 8554 if isinstance(element, exp.Ordered): 8555 this = element.this 8556 if isinstance(this, exp.Alias): 8557 element.set("this", this.args["alias"]) 8558 orders.append(element) 8559 else: 8560 this = element 8561 aggregates_or_groups.append(this) 8562 8563 if group_by_exists: 8564 query.select(*aggregates_or_groups, copy=False).group_by( 8565 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8566 copy=False, 8567 ) 8568 else: 8569 query.select(*aggregates_or_groups, append=False, copy=False) 8570 8571 if orders: 8572 return query.order_by(*orders, append=False, copy=False) 8573 8574 return query 8575 8576 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8577 self._match_text_seq("AGGREGATE") 8578 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8579 8580 if self._match(TokenType.GROUP_BY) or ( 8581 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8582 ): 8583 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8584 8585 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8586 8587 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8588 first_setop = self.parse_set_operation(this=query) 8589 if not first_setop: 8590 return None 8591 8592 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8593 expr = self._parse_paren() 8594 return expr.assert_is(exp.Subquery).unnest() if expr else None 8595 8596 first_setop.this.pop() 8597 8598 setops = [ 8599 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8600 *self._parse_csv(_parse_and_unwrap_query), 8601 ] 8602 8603 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8604 with_ = query.args.get("with") 8605 ctes = with_.pop() if with_ else None 8606 8607 if isinstance(first_setop, exp.Union): 8608 query = query.union(*setops, copy=False, **first_setop.args) 8609 elif isinstance(first_setop, exp.Except): 8610 query = query.except_(*setops, copy=False, **first_setop.args) 8611 else: 8612 query = query.intersect(*setops, copy=False, **first_setop.args) 8613 8614 query.set("with", ctes) 8615 8616 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8617 8618 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8619 join = self._parse_join() 8620 if not join: 8621 return None 8622 8623 if isinstance(query, exp.Select): 8624 return query.join(join, copy=False) 8625 8626 return query 8627 8628 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8629 pivots = self._parse_pivots() 8630 if not pivots: 8631 return query 8632 8633 from_ = query.args.get("from") 8634 if from_: 8635 from_.this.set("pivots", pivots) 8636 8637 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8638 8639 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8640 self._match_text_seq("EXTEND") 8641 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8642 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8643 8644 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8645 sample = self._parse_table_sample() 8646 8647 with_ = query.args.get("with") 8648 if with_: 8649 with_.expressions[-1].this.set("sample", sample) 8650 else: 8651 query.set("sample", sample) 8652 8653 return query 8654 8655 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8656 if isinstance(query, exp.Subquery): 8657 query = exp.select("*").from_(query, copy=False) 8658 8659 if not query.args.get("from"): 8660 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8661 8662 while self._match(TokenType.PIPE_GT): 8663 start = self._curr 8664 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8665 if not parser: 8666 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8667 # keywords, making it tricky to disambiguate them without lookahead. The approach 8668 # here is to try and parse a set operation and if that fails, then try to parse a 8669 # join operator. If that fails as well, then the operator is not supported. 8670 parsed_query = self._parse_pipe_syntax_set_operator(query) 8671 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8672 if not parsed_query: 8673 self._retreat(start) 8674 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8675 break 8676 query = parsed_query 8677 else: 8678 query = parser(self, query) 8679 8680 return query 8681 8682 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8683 vars = self._parse_csv(self._parse_id_var) 8684 if not vars: 8685 return None 8686 8687 return self.expression( 8688 exp.DeclareItem, 8689 this=vars, 8690 kind=self._parse_types(), 8691 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8692 ) 8693 8694 def _parse_declare(self) -> exp.Declare | exp.Command: 8695 start = self._prev 8696 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8697 8698 if not expressions or self._curr: 8699 return self._parse_as_command(start) 8700 8701 return self.expression(exp.Declare, expressions=expressions) 8702 8703 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8704 exp_class = exp.Cast if strict else exp.TryCast 8705 8706 if exp_class == exp.TryCast: 8707 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8708 8709 return self.expression(exp_class, **kwargs) 8710 8711 def _parse_json_value(self) -> exp.JSONValue: 8712 this = self._parse_bitwise() 8713 self._match(TokenType.COMMA) 8714 path = self._parse_bitwise() 8715 8716 returning = self._match(TokenType.RETURNING) and self._parse_type() 8717 8718 return self.expression( 8719 exp.JSONValue, 8720 this=this, 8721 path=self.dialect.to_json_path(path), 8722 returning=returning, 8723 on_condition=self._parse_on_condition(), 8724 )
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *CREATABLES, 573 *SUBQUERY_PREDICATES, 574 *TYPE_TOKENS, 575 *NO_PAREN_FUNCTIONS, 576 } 577 ID_VAR_TOKENS.remove(TokenType.UNION) 578 579 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 580 TokenType.ANTI, 581 TokenType.ASOF, 582 TokenType.FULL, 583 TokenType.LEFT, 584 TokenType.LOCK, 585 TokenType.NATURAL, 586 TokenType.RIGHT, 587 TokenType.SEMI, 588 TokenType.WINDOW, 589 } 590 591 ALIAS_TOKENS = ID_VAR_TOKENS 592 593 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 594 595 ARRAY_CONSTRUCTORS = { 596 "ARRAY": exp.Array, 597 "LIST": exp.List, 598 } 599 600 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 601 602 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 603 604 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 605 606 FUNC_TOKENS = { 607 TokenType.COLLATE, 608 TokenType.COMMAND, 609 TokenType.CURRENT_DATE, 610 TokenType.CURRENT_DATETIME, 611 TokenType.CURRENT_SCHEMA, 612 TokenType.CURRENT_TIMESTAMP, 613 TokenType.CURRENT_TIME, 614 TokenType.CURRENT_USER, 615 TokenType.FILTER, 616 TokenType.FIRST, 617 TokenType.FORMAT, 618 TokenType.GET, 619 TokenType.GLOB, 620 TokenType.IDENTIFIER, 621 TokenType.INDEX, 622 TokenType.ISNULL, 623 TokenType.ILIKE, 624 TokenType.INSERT, 625 TokenType.LIKE, 626 TokenType.MERGE, 627 TokenType.NEXT, 628 TokenType.OFFSET, 629 TokenType.PRIMARY_KEY, 630 TokenType.RANGE, 631 TokenType.REPLACE, 632 TokenType.RLIKE, 633 TokenType.ROW, 634 TokenType.UNNEST, 635 TokenType.VAR, 636 TokenType.LEFT, 637 TokenType.RIGHT, 638 TokenType.SEQUENCE, 639 TokenType.DATE, 640 TokenType.DATETIME, 641 TokenType.TABLE, 642 TokenType.TIMESTAMP, 643 TokenType.TIMESTAMPTZ, 644 TokenType.TRUNCATE, 645 TokenType.WINDOW, 646 TokenType.XOR, 647 *TYPE_TOKENS, 648 *SUBQUERY_PREDICATES, 649 } 650 651 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 652 TokenType.AND: exp.And, 653 } 654 655 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.COLON_EQ: exp.PropertyEQ, 657 } 658 659 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.OR: exp.Or, 661 } 662 663 EQUALITY = { 664 TokenType.EQ: exp.EQ, 665 TokenType.NEQ: exp.NEQ, 666 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 667 } 668 669 COMPARISON = { 670 TokenType.GT: exp.GT, 671 TokenType.GTE: exp.GTE, 672 TokenType.LT: exp.LT, 673 TokenType.LTE: exp.LTE, 674 } 675 676 BITWISE = { 677 TokenType.AMP: exp.BitwiseAnd, 678 TokenType.CARET: exp.BitwiseXor, 679 TokenType.PIPE: exp.BitwiseOr, 680 } 681 682 TERM = { 683 TokenType.DASH: exp.Sub, 684 TokenType.PLUS: exp.Add, 685 TokenType.MOD: exp.Mod, 686 TokenType.COLLATE: exp.Collate, 687 } 688 689 FACTOR = { 690 TokenType.DIV: exp.IntDiv, 691 TokenType.LR_ARROW: exp.Distance, 692 TokenType.SLASH: exp.Div, 693 TokenType.STAR: exp.Mul, 694 } 695 696 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 697 698 TIMES = { 699 TokenType.TIME, 700 TokenType.TIMETZ, 701 } 702 703 TIMESTAMPS = { 704 TokenType.TIMESTAMP, 705 TokenType.TIMESTAMPNTZ, 706 TokenType.TIMESTAMPTZ, 707 TokenType.TIMESTAMPLTZ, 708 *TIMES, 709 } 710 711 SET_OPERATIONS = { 712 TokenType.UNION, 713 TokenType.INTERSECT, 714 TokenType.EXCEPT, 715 } 716 717 JOIN_METHODS = { 718 TokenType.ASOF, 719 TokenType.NATURAL, 720 TokenType.POSITIONAL, 721 } 722 723 JOIN_SIDES = { 724 TokenType.LEFT, 725 TokenType.RIGHT, 726 TokenType.FULL, 727 } 728 729 JOIN_KINDS = { 730 TokenType.ANTI, 731 TokenType.CROSS, 732 TokenType.INNER, 733 TokenType.OUTER, 734 TokenType.SEMI, 735 TokenType.STRAIGHT_JOIN, 736 } 737 738 JOIN_HINTS: t.Set[str] = set() 739 740 LAMBDAS = { 741 TokenType.ARROW: lambda self, expressions: self.expression( 742 exp.Lambda, 743 this=self._replace_lambda( 744 self._parse_assignment(), 745 expressions, 746 ), 747 expressions=expressions, 748 ), 749 TokenType.FARROW: lambda self, expressions: self.expression( 750 exp.Kwarg, 751 this=exp.var(expressions[0].name), 752 expression=self._parse_assignment(), 753 ), 754 } 755 756 COLUMN_OPERATORS = { 757 TokenType.DOT: None, 758 TokenType.DOTCOLON: lambda self, this, to: self.expression( 759 exp.JSONCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.DCOLON: lambda self, this, to: self.build_cast( 764 strict=self.STRICT_CAST, this=this, to=to 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 CAST_COLUMN_OPERATORS = { 796 TokenType.DOTCOLON, 797 TokenType.DCOLON, 798 } 799 800 EXPRESSION_PARSERS = { 801 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 802 exp.Column: lambda self: self._parse_column(), 803 exp.Condition: lambda self: self._parse_assignment(), 804 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 805 exp.Expression: lambda self: self._parse_expression(), 806 exp.From: lambda self: self._parse_from(joins=True), 807 exp.Group: lambda self: self._parse_group(), 808 exp.Having: lambda self: self._parse_having(), 809 exp.Hint: lambda self: self._parse_hint_body(), 810 exp.Identifier: lambda self: self._parse_id_var(), 811 exp.Join: lambda self: self._parse_join(), 812 exp.Lambda: lambda self: self._parse_lambda(), 813 exp.Lateral: lambda self: self._parse_lateral(), 814 exp.Limit: lambda self: self._parse_limit(), 815 exp.Offset: lambda self: self._parse_offset(), 816 exp.Order: lambda self: self._parse_order(), 817 exp.Ordered: lambda self: self._parse_ordered(), 818 exp.Properties: lambda self: self._parse_properties(), 819 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 820 exp.Qualify: lambda self: self._parse_qualify(), 821 exp.Returning: lambda self: self._parse_returning(), 822 exp.Select: lambda self: self._parse_select(), 823 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 824 exp.Table: lambda self: self._parse_table_parts(), 825 exp.TableAlias: lambda self: self._parse_table_alias(), 826 exp.Tuple: lambda self: self._parse_value(values=False), 827 exp.Whens: lambda self: self._parse_when_matched(), 828 exp.Where: lambda self: self._parse_where(), 829 exp.Window: lambda self: self._parse_named_window(), 830 exp.With: lambda self: self._parse_with(), 831 "JOIN_TYPE": lambda self: self._parse_join_parts(), 832 } 833 834 STATEMENT_PARSERS = { 835 TokenType.ALTER: lambda self: self._parse_alter(), 836 TokenType.ANALYZE: lambda self: self._parse_analyze(), 837 TokenType.BEGIN: lambda self: self._parse_transaction(), 838 TokenType.CACHE: lambda self: self._parse_cache(), 839 TokenType.COMMENT: lambda self: self._parse_comment(), 840 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 841 TokenType.COPY: lambda self: self._parse_copy(), 842 TokenType.CREATE: lambda self: self._parse_create(), 843 TokenType.DELETE: lambda self: self._parse_delete(), 844 TokenType.DESC: lambda self: self._parse_describe(), 845 TokenType.DESCRIBE: lambda self: self._parse_describe(), 846 TokenType.DROP: lambda self: self._parse_drop(), 847 TokenType.GRANT: lambda self: self._parse_grant(), 848 TokenType.INSERT: lambda self: self._parse_insert(), 849 TokenType.KILL: lambda self: self._parse_kill(), 850 TokenType.LOAD: lambda self: self._parse_load(), 851 TokenType.MERGE: lambda self: self._parse_merge(), 852 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 853 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 854 TokenType.REFRESH: lambda self: self._parse_refresh(), 855 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 856 TokenType.SET: lambda self: self._parse_set(), 857 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 858 TokenType.UNCACHE: lambda self: self._parse_uncache(), 859 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 860 TokenType.UPDATE: lambda self: self._parse_update(), 861 TokenType.USE: lambda self: self._parse_use(), 862 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 863 } 864 865 UNARY_PARSERS = { 866 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 867 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 868 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 869 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 870 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 871 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 872 } 873 874 STRING_PARSERS = { 875 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 876 exp.RawString, this=token.text 877 ), 878 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 879 exp.National, this=token.text 880 ), 881 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 882 TokenType.STRING: lambda self, token: self.expression( 883 exp.Literal, this=token.text, is_string=True 884 ), 885 TokenType.UNICODE_STRING: lambda self, token: self.expression( 886 exp.UnicodeString, 887 this=token.text, 888 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 889 ), 890 } 891 892 NUMERIC_PARSERS = { 893 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 894 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 895 TokenType.HEX_STRING: lambda self, token: self.expression( 896 exp.HexString, 897 this=token.text, 898 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 899 ), 900 TokenType.NUMBER: lambda self, token: self.expression( 901 exp.Literal, this=token.text, is_string=False 902 ), 903 } 904 905 PRIMARY_PARSERS = { 906 **STRING_PARSERS, 907 **NUMERIC_PARSERS, 908 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 909 TokenType.NULL: lambda self, _: self.expression(exp.Null), 910 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 911 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 912 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 913 TokenType.STAR: lambda self, _: self._parse_star_ops(), 914 } 915 916 PLACEHOLDER_PARSERS = { 917 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 918 TokenType.PARAMETER: lambda self: self._parse_parameter(), 919 TokenType.COLON: lambda self: ( 920 self.expression(exp.Placeholder, this=self._prev.text) 921 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 922 else None 923 ), 924 } 925 926 RANGE_PARSERS = { 927 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 928 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 929 TokenType.GLOB: binary_range_parser(exp.Glob), 930 TokenType.ILIKE: binary_range_parser(exp.ILike), 931 TokenType.IN: lambda self, this: self._parse_in(this), 932 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 933 TokenType.IS: lambda self, this: self._parse_is(this), 934 TokenType.LIKE: binary_range_parser(exp.Like), 935 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 936 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 937 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 938 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 939 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 940 } 941 942 PIPE_SYNTAX_TRANSFORM_PARSERS = { 943 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 944 "AS": lambda self, query: self._build_pipe_cte( 945 query, [exp.Star()], self._parse_table_alias() 946 ), 947 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 948 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 949 "ORDER BY": lambda self, query: query.order_by( 950 self._parse_order(), append=False, copy=False 951 ), 952 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 953 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 954 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 955 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 956 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 957 } 958 959 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 960 "ALLOWED_VALUES": lambda self: self.expression( 961 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 962 ), 963 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 964 "AUTO": lambda self: self._parse_auto_property(), 965 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 966 "BACKUP": lambda self: self.expression( 967 exp.BackupProperty, this=self._parse_var(any_token=True) 968 ), 969 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 970 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 971 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 972 "CHECKSUM": lambda self: self._parse_checksum(), 973 "CLUSTER BY": lambda self: self._parse_cluster(), 974 "CLUSTERED": lambda self: self._parse_clustered_by(), 975 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 976 exp.CollateProperty, **kwargs 977 ), 978 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 979 "CONTAINS": lambda self: self._parse_contains_property(), 980 "COPY": lambda self: self._parse_copy_property(), 981 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 982 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 983 "DEFINER": lambda self: self._parse_definer(), 984 "DETERMINISTIC": lambda self: self.expression( 985 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 986 ), 987 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 988 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 989 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 990 "DISTKEY": lambda self: self._parse_distkey(), 991 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 992 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 993 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 994 "ENVIRONMENT": lambda self: self.expression( 995 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 996 ), 997 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 998 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 999 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1000 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1001 "FREESPACE": lambda self: self._parse_freespace(), 1002 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1003 "HEAP": lambda self: self.expression(exp.HeapProperty), 1004 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1005 "IMMUTABLE": lambda self: self.expression( 1006 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1007 ), 1008 "INHERITS": lambda self: self.expression( 1009 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1010 ), 1011 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1012 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1013 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1014 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1015 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1016 "LIKE": lambda self: self._parse_create_like(), 1017 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1018 "LOCK": lambda self: self._parse_locking(), 1019 "LOCKING": lambda self: self._parse_locking(), 1020 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1021 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1022 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1023 "MODIFIES": lambda self: self._parse_modifies_property(), 1024 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1025 "NO": lambda self: self._parse_no_property(), 1026 "ON": lambda self: self._parse_on_property(), 1027 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1028 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1029 "PARTITION": lambda self: self._parse_partitioned_of(), 1030 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1031 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1032 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1033 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1034 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1035 "READS": lambda self: self._parse_reads_property(), 1036 "REMOTE": lambda self: self._parse_remote_with_connection(), 1037 "RETURNS": lambda self: self._parse_returns(), 1038 "STRICT": lambda self: self.expression(exp.StrictProperty), 1039 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1040 "ROW": lambda self: self._parse_row(), 1041 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1042 "SAMPLE": lambda self: self.expression( 1043 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1044 ), 1045 "SECURE": lambda self: self.expression(exp.SecureProperty), 1046 "SECURITY": lambda self: self._parse_security(), 1047 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1048 "SETTINGS": lambda self: self._parse_settings_property(), 1049 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1050 "SORTKEY": lambda self: self._parse_sortkey(), 1051 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1052 "STABLE": lambda self: self.expression( 1053 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1054 ), 1055 "STORED": lambda self: self._parse_stored(), 1056 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1057 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1058 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1059 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1060 "TO": lambda self: self._parse_to_table(), 1061 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1062 "TRANSFORM": lambda self: self.expression( 1063 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1064 ), 1065 "TTL": lambda self: self._parse_ttl(), 1066 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1067 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1068 "VOLATILE": lambda self: self._parse_volatile_property(), 1069 "WITH": lambda self: self._parse_with_property(), 1070 } 1071 1072 CONSTRAINT_PARSERS = { 1073 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1074 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1075 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1076 "CHARACTER SET": lambda self: self.expression( 1077 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1078 ), 1079 "CHECK": lambda self: self.expression( 1080 exp.CheckColumnConstraint, 1081 this=self._parse_wrapped(self._parse_assignment), 1082 enforced=self._match_text_seq("ENFORCED"), 1083 ), 1084 "COLLATE": lambda self: self.expression( 1085 exp.CollateColumnConstraint, 1086 this=self._parse_identifier() or self._parse_column(), 1087 ), 1088 "COMMENT": lambda self: self.expression( 1089 exp.CommentColumnConstraint, this=self._parse_string() 1090 ), 1091 "COMPRESS": lambda self: self._parse_compress(), 1092 "CLUSTERED": lambda self: self.expression( 1093 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1094 ), 1095 "NONCLUSTERED": lambda self: self.expression( 1096 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1097 ), 1098 "DEFAULT": lambda self: self.expression( 1099 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1102 "EPHEMERAL": lambda self: self.expression( 1103 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1104 ), 1105 "EXCLUDE": lambda self: self.expression( 1106 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1107 ), 1108 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1109 "FORMAT": lambda self: self.expression( 1110 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1111 ), 1112 "GENERATED": lambda self: self._parse_generated_as_identity(), 1113 "IDENTITY": lambda self: self._parse_auto_increment(), 1114 "INLINE": lambda self: self._parse_inline(), 1115 "LIKE": lambda self: self._parse_create_like(), 1116 "NOT": lambda self: self._parse_not_constraint(), 1117 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1118 "ON": lambda self: ( 1119 self._match(TokenType.UPDATE) 1120 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1121 ) 1122 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1123 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1124 "PERIOD": lambda self: self._parse_period_for_system_time(), 1125 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1126 "REFERENCES": lambda self: self._parse_references(match=False), 1127 "TITLE": lambda self: self.expression( 1128 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1129 ), 1130 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1131 "UNIQUE": lambda self: self._parse_unique(), 1132 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1133 "WATERMARK": lambda self: self.expression( 1134 exp.WatermarkColumnConstraint, 1135 this=self._match(TokenType.FOR) and self._parse_column(), 1136 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1137 ), 1138 "WITH": lambda self: self.expression( 1139 exp.Properties, expressions=self._parse_wrapped_properties() 1140 ), 1141 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1142 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1143 } 1144 1145 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1146 if not self._match(TokenType.L_PAREN, advance=False): 1147 # Partitioning by bucket or truncate follows the syntax: 1148 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1149 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1150 self._retreat(self._index - 1) 1151 return None 1152 1153 klass = ( 1154 exp.PartitionedByBucket 1155 if self._prev.text.upper() == "BUCKET" 1156 else exp.PartitionByTruncate 1157 ) 1158 1159 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1160 this, expression = seq_get(args, 0), seq_get(args, 1) 1161 1162 if isinstance(this, exp.Literal): 1163 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1164 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1165 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1166 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1167 # 1168 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1169 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1170 this, expression = expression, this 1171 1172 return self.expression(klass, this=this, expression=expression) 1173 1174 ALTER_PARSERS = { 1175 "ADD": lambda self: self._parse_alter_table_add(), 1176 "AS": lambda self: self._parse_select(), 1177 "ALTER": lambda self: self._parse_alter_table_alter(), 1178 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1179 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1180 "DROP": lambda self: self._parse_alter_table_drop(), 1181 "RENAME": lambda self: self._parse_alter_table_rename(), 1182 "SET": lambda self: self._parse_alter_table_set(), 1183 "SWAP": lambda self: self.expression( 1184 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1185 ), 1186 } 1187 1188 ALTER_ALTER_PARSERS = { 1189 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1190 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1191 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1192 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1193 } 1194 1195 SCHEMA_UNNAMED_CONSTRAINTS = { 1196 "CHECK", 1197 "EXCLUDE", 1198 "FOREIGN KEY", 1199 "LIKE", 1200 "PERIOD", 1201 "PRIMARY KEY", 1202 "UNIQUE", 1203 "WATERMARK", 1204 "BUCKET", 1205 "TRUNCATE", 1206 } 1207 1208 NO_PAREN_FUNCTION_PARSERS = { 1209 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1210 "CASE": lambda self: self._parse_case(), 1211 "CONNECT_BY_ROOT": lambda self: self.expression( 1212 exp.ConnectByRoot, this=self._parse_column() 1213 ), 1214 "IF": lambda self: self._parse_if(), 1215 } 1216 1217 INVALID_FUNC_NAME_TOKENS = { 1218 TokenType.IDENTIFIER, 1219 TokenType.STRING, 1220 } 1221 1222 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1223 1224 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1225 1226 FUNCTION_PARSERS = { 1227 **{ 1228 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1229 }, 1230 **{ 1231 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1232 }, 1233 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1234 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1235 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1236 "DECODE": lambda self: self._parse_decode(), 1237 "EXTRACT": lambda self: self._parse_extract(), 1238 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1239 "GAP_FILL": lambda self: self._parse_gap_fill(), 1240 "JSON_OBJECT": lambda self: self._parse_json_object(), 1241 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1242 "JSON_TABLE": lambda self: self._parse_json_table(), 1243 "MATCH": lambda self: self._parse_match_against(), 1244 "NORMALIZE": lambda self: self._parse_normalize(), 1245 "OPENJSON": lambda self: self._parse_open_json(), 1246 "OVERLAY": lambda self: self._parse_overlay(), 1247 "POSITION": lambda self: self._parse_position(), 1248 "PREDICT": lambda self: self._parse_predict(), 1249 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1250 "STRING_AGG": lambda self: self._parse_string_agg(), 1251 "SUBSTRING": lambda self: self._parse_substring(), 1252 "TRIM": lambda self: self._parse_trim(), 1253 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1254 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1255 "XMLELEMENT": lambda self: self.expression( 1256 exp.XMLElement, 1257 this=self._match_text_seq("NAME") and self._parse_id_var(), 1258 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1259 ), 1260 "XMLTABLE": lambda self: self._parse_xml_table(), 1261 } 1262 1263 QUERY_MODIFIER_PARSERS = { 1264 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1265 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1266 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1267 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1268 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1269 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1270 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1271 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1272 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1273 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1274 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1275 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1276 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1277 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1278 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1279 TokenType.CLUSTER_BY: lambda self: ( 1280 "cluster", 1281 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1282 ), 1283 TokenType.DISTRIBUTE_BY: lambda self: ( 1284 "distribute", 1285 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1286 ), 1287 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1288 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1289 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1290 } 1291 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1292 1293 SET_PARSERS = { 1294 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1295 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1296 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1297 "TRANSACTION": lambda self: self._parse_set_transaction(), 1298 } 1299 1300 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1301 1302 TYPE_LITERAL_PARSERS = { 1303 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1304 } 1305 1306 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1307 1308 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1309 1310 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1311 1312 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1313 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1314 "ISOLATION": ( 1315 ("LEVEL", "REPEATABLE", "READ"), 1316 ("LEVEL", "READ", "COMMITTED"), 1317 ("LEVEL", "READ", "UNCOMITTED"), 1318 ("LEVEL", "SERIALIZABLE"), 1319 ), 1320 "READ": ("WRITE", "ONLY"), 1321 } 1322 1323 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1324 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1325 ) 1326 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1327 1328 CREATE_SEQUENCE: OPTIONS_TYPE = { 1329 "SCALE": ("EXTEND", "NOEXTEND"), 1330 "SHARD": ("EXTEND", "NOEXTEND"), 1331 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1332 **dict.fromkeys( 1333 ( 1334 "SESSION", 1335 "GLOBAL", 1336 "KEEP", 1337 "NOKEEP", 1338 "ORDER", 1339 "NOORDER", 1340 "NOCACHE", 1341 "CYCLE", 1342 "NOCYCLE", 1343 "NOMINVALUE", 1344 "NOMAXVALUE", 1345 "NOSCALE", 1346 "NOSHARD", 1347 ), 1348 tuple(), 1349 ), 1350 } 1351 1352 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1353 1354 USABLES: OPTIONS_TYPE = dict.fromkeys( 1355 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1356 ) 1357 1358 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1359 1360 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1361 "TYPE": ("EVOLUTION",), 1362 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1363 } 1364 1365 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1366 1367 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1368 1369 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1370 "NOT": ("ENFORCED",), 1371 "MATCH": ( 1372 "FULL", 1373 "PARTIAL", 1374 "SIMPLE", 1375 ), 1376 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1377 "USING": ( 1378 "BTREE", 1379 "HASH", 1380 ), 1381 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1382 } 1383 1384 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1385 "NO": ("OTHERS",), 1386 "CURRENT": ("ROW",), 1387 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1388 } 1389 1390 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1391 1392 CLONE_KEYWORDS = {"CLONE", "COPY"} 1393 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1394 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1395 1396 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1397 1398 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1399 1400 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1401 1402 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1403 1404 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1405 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1406 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1407 1408 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1409 1410 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1411 1412 ADD_CONSTRAINT_TOKENS = { 1413 TokenType.CONSTRAINT, 1414 TokenType.FOREIGN_KEY, 1415 TokenType.INDEX, 1416 TokenType.KEY, 1417 TokenType.PRIMARY_KEY, 1418 TokenType.UNIQUE, 1419 } 1420 1421 DISTINCT_TOKENS = {TokenType.DISTINCT} 1422 1423 NULL_TOKENS = {TokenType.NULL} 1424 1425 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1426 1427 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1428 1429 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1430 1431 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1432 1433 ODBC_DATETIME_LITERALS = { 1434 "d": exp.Date, 1435 "t": exp.Time, 1436 "ts": exp.Timestamp, 1437 } 1438 1439 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1440 1441 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1442 1443 # The style options for the DESCRIBE statement 1444 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1445 1446 # The style options for the ANALYZE statement 1447 ANALYZE_STYLES = { 1448 "BUFFER_USAGE_LIMIT", 1449 "FULL", 1450 "LOCAL", 1451 "NO_WRITE_TO_BINLOG", 1452 "SAMPLE", 1453 "SKIP_LOCKED", 1454 "VERBOSE", 1455 } 1456 1457 ANALYZE_EXPRESSION_PARSERS = { 1458 "ALL": lambda self: self._parse_analyze_columns(), 1459 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1460 "DELETE": lambda self: self._parse_analyze_delete(), 1461 "DROP": lambda self: self._parse_analyze_histogram(), 1462 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1463 "LIST": lambda self: self._parse_analyze_list(), 1464 "PREDICATE": lambda self: self._parse_analyze_columns(), 1465 "UPDATE": lambda self: self._parse_analyze_histogram(), 1466 "VALIDATE": lambda self: self._parse_analyze_validate(), 1467 } 1468 1469 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1470 1471 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1472 1473 OPERATION_MODIFIERS: t.Set[str] = set() 1474 1475 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1476 1477 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1478 1479 STRICT_CAST = True 1480 1481 PREFIXED_PIVOT_COLUMNS = False 1482 IDENTIFY_PIVOT_STRINGS = False 1483 1484 LOG_DEFAULTS_TO_LN = False 1485 1486 # Whether the table sample clause expects CSV syntax 1487 TABLESAMPLE_CSV = False 1488 1489 # The default method used for table sampling 1490 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1491 1492 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1493 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1494 1495 # Whether the TRIM function expects the characters to trim as its first argument 1496 TRIM_PATTERN_FIRST = False 1497 1498 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1499 STRING_ALIASES = False 1500 1501 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1502 MODIFIERS_ATTACHED_TO_SET_OP = True 1503 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1504 1505 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1506 NO_PAREN_IF_COMMANDS = True 1507 1508 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1509 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1510 1511 # Whether the `:` operator is used to extract a value from a VARIANT column 1512 COLON_IS_VARIANT_EXTRACT = False 1513 1514 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1515 # If this is True and '(' is not found, the keyword will be treated as an identifier 1516 VALUES_FOLLOWED_BY_PAREN = True 1517 1518 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1519 SUPPORTS_IMPLICIT_UNNEST = False 1520 1521 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1522 INTERVAL_SPANS = True 1523 1524 # Whether a PARTITION clause can follow a table reference 1525 SUPPORTS_PARTITION_SELECTION = False 1526 1527 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1528 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1529 1530 # Whether the 'AS' keyword is optional in the CTE definition syntax 1531 OPTIONAL_ALIAS_TOKEN_CTE = True 1532 1533 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1534 ALTER_RENAME_REQUIRES_COLUMN = True 1535 1536 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1537 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1538 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1539 # as BigQuery, where all joins have the same precedence. 1540 JOINS_HAVE_EQUAL_PRECEDENCE = False 1541 1542 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1543 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1544 1545 # Whether map literals support arbitrary expressions as keys. 1546 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1547 # When False, keys are typically restricted to identifiers. 1548 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1549 1550 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1551 # is true for Snowflake but not for BigQuery which can also process strings 1552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1553 1554 __slots__ = ( 1555 "error_level", 1556 "error_message_context", 1557 "max_errors", 1558 "dialect", 1559 "sql", 1560 "errors", 1561 "_tokens", 1562 "_index", 1563 "_curr", 1564 "_next", 1565 "_prev", 1566 "_prev_comments", 1567 "_pipe_cte_counter", 1568 ) 1569 1570 # Autofilled 1571 SHOW_TRIE: t.Dict = {} 1572 SET_TRIE: t.Dict = {} 1573 1574 def __init__( 1575 self, 1576 error_level: t.Optional[ErrorLevel] = None, 1577 error_message_context: int = 100, 1578 max_errors: int = 3, 1579 dialect: DialectType = None, 1580 ): 1581 from sqlglot.dialects import Dialect 1582 1583 self.error_level = error_level or ErrorLevel.IMMEDIATE 1584 self.error_message_context = error_message_context 1585 self.max_errors = max_errors 1586 self.dialect = Dialect.get_or_raise(dialect) 1587 self.reset() 1588 1589 def reset(self): 1590 self.sql = "" 1591 self.errors = [] 1592 self._tokens = [] 1593 self._index = 0 1594 self._curr = None 1595 self._next = None 1596 self._prev = None 1597 self._prev_comments = None 1598 self._pipe_cte_counter = 0 1599 1600 def parse( 1601 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens and returns a list of syntax trees, one tree 1605 per parsed SQL statement. 1606 1607 Args: 1608 raw_tokens: The list of tokens. 1609 sql: The original SQL string, used to produce helpful debug messages. 1610 1611 Returns: 1612 The list of the produced syntax trees. 1613 """ 1614 return self._parse( 1615 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1616 ) 1617 1618 def parse_into( 1619 self, 1620 expression_types: exp.IntoType, 1621 raw_tokens: t.List[Token], 1622 sql: t.Optional[str] = None, 1623 ) -> t.List[t.Optional[exp.Expression]]: 1624 """ 1625 Parses a list of tokens into a given Expression type. If a collection of Expression 1626 types is given instead, this method will try to parse the token list into each one 1627 of them, stopping at the first for which the parsing succeeds. 1628 1629 Args: 1630 expression_types: The expression type(s) to try and parse the token list into. 1631 raw_tokens: The list of tokens. 1632 sql: The original SQL string, used to produce helpful debug messages. 1633 1634 Returns: 1635 The target Expression. 1636 """ 1637 errors = [] 1638 for expression_type in ensure_list(expression_types): 1639 parser = self.EXPRESSION_PARSERS.get(expression_type) 1640 if not parser: 1641 raise TypeError(f"No parser registered for {expression_type}") 1642 1643 try: 1644 return self._parse(parser, raw_tokens, sql) 1645 except ParseError as e: 1646 e.errors[0]["into_expression"] = expression_type 1647 errors.append(e) 1648 1649 raise ParseError( 1650 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1651 errors=merge_errors(errors), 1652 ) from errors[-1] 1653 1654 def _parse( 1655 self, 1656 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1657 raw_tokens: t.List[Token], 1658 sql: t.Optional[str] = None, 1659 ) -> t.List[t.Optional[exp.Expression]]: 1660 self.reset() 1661 self.sql = sql or "" 1662 1663 total = len(raw_tokens) 1664 chunks: t.List[t.List[Token]] = [[]] 1665 1666 for i, token in enumerate(raw_tokens): 1667 if token.token_type == TokenType.SEMICOLON: 1668 if token.comments: 1669 chunks.append([token]) 1670 1671 if i < total - 1: 1672 chunks.append([]) 1673 else: 1674 chunks[-1].append(token) 1675 1676 expressions = [] 1677 1678 for tokens in chunks: 1679 self._index = -1 1680 self._tokens = tokens 1681 self._advance() 1682 1683 expressions.append(parse_method(self)) 1684 1685 if self._index < len(self._tokens): 1686 self.raise_error("Invalid expression / Unexpected token") 1687 1688 self.check_errors() 1689 1690 return expressions 1691 1692 def check_errors(self) -> None: 1693 """Logs or raises any found errors, depending on the chosen error level setting.""" 1694 if self.error_level == ErrorLevel.WARN: 1695 for error in self.errors: 1696 logger.error(str(error)) 1697 elif self.error_level == ErrorLevel.RAISE and self.errors: 1698 raise ParseError( 1699 concat_messages(self.errors, self.max_errors), 1700 errors=merge_errors(self.errors), 1701 ) 1702 1703 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1704 """ 1705 Appends an error in the list of recorded errors or raises it, depending on the chosen 1706 error level setting. 1707 """ 1708 token = token or self._curr or self._prev or Token.string("") 1709 start = token.start 1710 end = token.end + 1 1711 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1712 highlight = self.sql[start:end] 1713 end_context = self.sql[end : end + self.error_message_context] 1714 1715 error = ParseError.new( 1716 f"{message}. Line {token.line}, Col: {token.col}.\n" 1717 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1718 description=message, 1719 line=token.line, 1720 col=token.col, 1721 start_context=start_context, 1722 highlight=highlight, 1723 end_context=end_context, 1724 ) 1725 1726 if self.error_level == ErrorLevel.IMMEDIATE: 1727 raise error 1728 1729 self.errors.append(error) 1730 1731 def expression( 1732 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1733 ) -> E: 1734 """ 1735 Creates a new, validated Expression. 1736 1737 Args: 1738 exp_class: The expression class to instantiate. 1739 comments: An optional list of comments to attach to the expression. 1740 kwargs: The arguments to set for the expression along with their respective values. 1741 1742 Returns: 1743 The target expression. 1744 """ 1745 instance = exp_class(**kwargs) 1746 instance.add_comments(comments) if comments else self._add_comments(instance) 1747 return self.validate_expression(instance) 1748 1749 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1750 if expression and self._prev_comments: 1751 expression.add_comments(self._prev_comments) 1752 self._prev_comments = None 1753 1754 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1755 """ 1756 Validates an Expression, making sure that all its mandatory arguments are set. 1757 1758 Args: 1759 expression: The expression to validate. 1760 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1761 1762 Returns: 1763 The validated expression. 1764 """ 1765 if self.error_level != ErrorLevel.IGNORE: 1766 for error_message in expression.error_messages(args): 1767 self.raise_error(error_message) 1768 1769 return expression 1770 1771 def _find_sql(self, start: Token, end: Token) -> str: 1772 return self.sql[start.start : end.end + 1] 1773 1774 def _is_connected(self) -> bool: 1775 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1776 1777 def _advance(self, times: int = 1) -> None: 1778 self._index += times 1779 self._curr = seq_get(self._tokens, self._index) 1780 self._next = seq_get(self._tokens, self._index + 1) 1781 1782 if self._index > 0: 1783 self._prev = self._tokens[self._index - 1] 1784 self._prev_comments = self._prev.comments 1785 else: 1786 self._prev = None 1787 self._prev_comments = None 1788 1789 def _retreat(self, index: int) -> None: 1790 if index != self._index: 1791 self._advance(index - self._index) 1792 1793 def _warn_unsupported(self) -> None: 1794 if len(self._tokens) <= 1: 1795 return 1796 1797 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1798 # interested in emitting a warning for the one being currently processed. 1799 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1800 1801 logger.warning( 1802 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1803 ) 1804 1805 def _parse_command(self) -> exp.Command: 1806 self._warn_unsupported() 1807 return self.expression( 1808 exp.Command, 1809 comments=self._prev_comments, 1810 this=self._prev.text.upper(), 1811 expression=self._parse_string(), 1812 ) 1813 1814 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1815 """ 1816 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1817 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1818 solve this by setting & resetting the parser state accordingly 1819 """ 1820 index = self._index 1821 error_level = self.error_level 1822 1823 self.error_level = ErrorLevel.IMMEDIATE 1824 try: 1825 this = parse_method() 1826 except ParseError: 1827 this = None 1828 finally: 1829 if not this or retreat: 1830 self._retreat(index) 1831 self.error_level = error_level 1832 1833 return this 1834 1835 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1836 start = self._prev 1837 exists = self._parse_exists() if allow_exists else None 1838 1839 self._match(TokenType.ON) 1840 1841 materialized = self._match_text_seq("MATERIALIZED") 1842 kind = self._match_set(self.CREATABLES) and self._prev 1843 if not kind: 1844 return self._parse_as_command(start) 1845 1846 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1847 this = self._parse_user_defined_function(kind=kind.token_type) 1848 elif kind.token_type == TokenType.TABLE: 1849 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1850 elif kind.token_type == TokenType.COLUMN: 1851 this = self._parse_column() 1852 else: 1853 this = self._parse_id_var() 1854 1855 self._match(TokenType.IS) 1856 1857 return self.expression( 1858 exp.Comment, 1859 this=this, 1860 kind=kind.text, 1861 expression=self._parse_string(), 1862 exists=exists, 1863 materialized=materialized, 1864 ) 1865 1866 def _parse_to_table( 1867 self, 1868 ) -> exp.ToTableProperty: 1869 table = self._parse_table_parts(schema=True) 1870 return self.expression(exp.ToTableProperty, this=table) 1871 1872 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1873 def _parse_ttl(self) -> exp.Expression: 1874 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1875 this = self._parse_bitwise() 1876 1877 if self._match_text_seq("DELETE"): 1878 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1879 if self._match_text_seq("RECOMPRESS"): 1880 return self.expression( 1881 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1882 ) 1883 if self._match_text_seq("TO", "DISK"): 1884 return self.expression( 1885 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1886 ) 1887 if self._match_text_seq("TO", "VOLUME"): 1888 return self.expression( 1889 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1890 ) 1891 1892 return this 1893 1894 expressions = self._parse_csv(_parse_ttl_action) 1895 where = self._parse_where() 1896 group = self._parse_group() 1897 1898 aggregates = None 1899 if group and self._match(TokenType.SET): 1900 aggregates = self._parse_csv(self._parse_set_item) 1901 1902 return self.expression( 1903 exp.MergeTreeTTL, 1904 expressions=expressions, 1905 where=where, 1906 group=group, 1907 aggregates=aggregates, 1908 ) 1909 1910 def _parse_statement(self) -> t.Optional[exp.Expression]: 1911 if self._curr is None: 1912 return None 1913 1914 if self._match_set(self.STATEMENT_PARSERS): 1915 comments = self._prev_comments 1916 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1917 stmt.add_comments(comments, prepend=True) 1918 return stmt 1919 1920 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1921 return self._parse_command() 1922 1923 expression = self._parse_expression() 1924 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1925 return self._parse_query_modifiers(expression) 1926 1927 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1928 start = self._prev 1929 temporary = self._match(TokenType.TEMPORARY) 1930 materialized = self._match_text_seq("MATERIALIZED") 1931 1932 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1933 if not kind: 1934 return self._parse_as_command(start) 1935 1936 concurrently = self._match_text_seq("CONCURRENTLY") 1937 if_exists = exists or self._parse_exists() 1938 1939 if kind == "COLUMN": 1940 this = self._parse_column() 1941 else: 1942 this = self._parse_table_parts( 1943 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1944 ) 1945 1946 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1947 1948 if self._match(TokenType.L_PAREN, advance=False): 1949 expressions = self._parse_wrapped_csv(self._parse_types) 1950 else: 1951 expressions = None 1952 1953 return self.expression( 1954 exp.Drop, 1955 exists=if_exists, 1956 this=this, 1957 expressions=expressions, 1958 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1959 temporary=temporary, 1960 materialized=materialized, 1961 cascade=self._match_text_seq("CASCADE"), 1962 constraints=self._match_text_seq("CONSTRAINTS"), 1963 purge=self._match_text_seq("PURGE"), 1964 cluster=cluster, 1965 concurrently=concurrently, 1966 ) 1967 1968 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1969 return ( 1970 self._match_text_seq("IF") 1971 and (not not_ or self._match(TokenType.NOT)) 1972 and self._match(TokenType.EXISTS) 1973 ) 1974 1975 def _parse_create(self) -> exp.Create | exp.Command: 1976 # Note: this can't be None because we've matched a statement parser 1977 start = self._prev 1978 1979 replace = ( 1980 start.token_type == TokenType.REPLACE 1981 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1982 or self._match_pair(TokenType.OR, TokenType.ALTER) 1983 ) 1984 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1985 1986 unique = self._match(TokenType.UNIQUE) 1987 1988 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1989 clustered = True 1990 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1991 "COLUMNSTORE" 1992 ): 1993 clustered = False 1994 else: 1995 clustered = None 1996 1997 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1998 self._advance() 1999 2000 properties = None 2001 create_token = self._match_set(self.CREATABLES) and self._prev 2002 2003 if not create_token: 2004 # exp.Properties.Location.POST_CREATE 2005 properties = self._parse_properties() 2006 create_token = self._match_set(self.CREATABLES) and self._prev 2007 2008 if not properties or not create_token: 2009 return self._parse_as_command(start) 2010 2011 concurrently = self._match_text_seq("CONCURRENTLY") 2012 exists = self._parse_exists(not_=True) 2013 this = None 2014 expression: t.Optional[exp.Expression] = None 2015 indexes = None 2016 no_schema_binding = None 2017 begin = None 2018 end = None 2019 clone = None 2020 2021 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2022 nonlocal properties 2023 if properties and temp_props: 2024 properties.expressions.extend(temp_props.expressions) 2025 elif temp_props: 2026 properties = temp_props 2027 2028 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2029 this = self._parse_user_defined_function(kind=create_token.token_type) 2030 2031 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2032 extend_props(self._parse_properties()) 2033 2034 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2035 extend_props(self._parse_properties()) 2036 2037 if not expression: 2038 if self._match(TokenType.COMMAND): 2039 expression = self._parse_as_command(self._prev) 2040 else: 2041 begin = self._match(TokenType.BEGIN) 2042 return_ = self._match_text_seq("RETURN") 2043 2044 if self._match(TokenType.STRING, advance=False): 2045 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2046 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2047 expression = self._parse_string() 2048 extend_props(self._parse_properties()) 2049 else: 2050 expression = self._parse_user_defined_function_expression() 2051 2052 end = self._match_text_seq("END") 2053 2054 if return_: 2055 expression = self.expression(exp.Return, this=expression) 2056 elif create_token.token_type == TokenType.INDEX: 2057 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2058 if not self._match(TokenType.ON): 2059 index = self._parse_id_var() 2060 anonymous = False 2061 else: 2062 index = None 2063 anonymous = True 2064 2065 this = self._parse_index(index=index, anonymous=anonymous) 2066 elif create_token.token_type in self.DB_CREATABLES: 2067 table_parts = self._parse_table_parts( 2068 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2069 ) 2070 2071 # exp.Properties.Location.POST_NAME 2072 self._match(TokenType.COMMA) 2073 extend_props(self._parse_properties(before=True)) 2074 2075 this = self._parse_schema(this=table_parts) 2076 2077 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2078 extend_props(self._parse_properties()) 2079 2080 has_alias = self._match(TokenType.ALIAS) 2081 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2082 # exp.Properties.Location.POST_ALIAS 2083 extend_props(self._parse_properties()) 2084 2085 if create_token.token_type == TokenType.SEQUENCE: 2086 expression = self._parse_types() 2087 props = self._parse_properties() 2088 if props: 2089 sequence_props = exp.SequenceProperties() 2090 options = [] 2091 for prop in props: 2092 if isinstance(prop, exp.SequenceProperties): 2093 for arg, value in prop.args.items(): 2094 if arg == "options": 2095 options.extend(value) 2096 else: 2097 sequence_props.set(arg, value) 2098 prop.pop() 2099 2100 if options: 2101 sequence_props.set("options", options) 2102 2103 props.append("expressions", sequence_props) 2104 extend_props(props) 2105 else: 2106 expression = self._parse_ddl_select() 2107 2108 # Some dialects also support using a table as an alias instead of a SELECT. 2109 # Here we fallback to this as an alternative. 2110 if not expression and has_alias: 2111 expression = self._try_parse(self._parse_table_parts) 2112 2113 if create_token.token_type == TokenType.TABLE: 2114 # exp.Properties.Location.POST_EXPRESSION 2115 extend_props(self._parse_properties()) 2116 2117 indexes = [] 2118 while True: 2119 index = self._parse_index() 2120 2121 # exp.Properties.Location.POST_INDEX 2122 extend_props(self._parse_properties()) 2123 if not index: 2124 break 2125 else: 2126 self._match(TokenType.COMMA) 2127 indexes.append(index) 2128 elif create_token.token_type == TokenType.VIEW: 2129 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2130 no_schema_binding = True 2131 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2132 extend_props(self._parse_properties()) 2133 2134 shallow = self._match_text_seq("SHALLOW") 2135 2136 if self._match_texts(self.CLONE_KEYWORDS): 2137 copy = self._prev.text.lower() == "copy" 2138 clone = self.expression( 2139 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2140 ) 2141 2142 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2143 return self._parse_as_command(start) 2144 2145 create_kind_text = create_token.text.upper() 2146 return self.expression( 2147 exp.Create, 2148 this=this, 2149 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2150 replace=replace, 2151 refresh=refresh, 2152 unique=unique, 2153 expression=expression, 2154 exists=exists, 2155 properties=properties, 2156 indexes=indexes, 2157 no_schema_binding=no_schema_binding, 2158 begin=begin, 2159 end=end, 2160 clone=clone, 2161 concurrently=concurrently, 2162 clustered=clustered, 2163 ) 2164 2165 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2166 seq = exp.SequenceProperties() 2167 2168 options = [] 2169 index = self._index 2170 2171 while self._curr: 2172 self._match(TokenType.COMMA) 2173 if self._match_text_seq("INCREMENT"): 2174 self._match_text_seq("BY") 2175 self._match_text_seq("=") 2176 seq.set("increment", self._parse_term()) 2177 elif self._match_text_seq("MINVALUE"): 2178 seq.set("minvalue", self._parse_term()) 2179 elif self._match_text_seq("MAXVALUE"): 2180 seq.set("maxvalue", self._parse_term()) 2181 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2182 self._match_text_seq("=") 2183 seq.set("start", self._parse_term()) 2184 elif self._match_text_seq("CACHE"): 2185 # T-SQL allows empty CACHE which is initialized dynamically 2186 seq.set("cache", self._parse_number() or True) 2187 elif self._match_text_seq("OWNED", "BY"): 2188 # "OWNED BY NONE" is the default 2189 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2190 else: 2191 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2192 if opt: 2193 options.append(opt) 2194 else: 2195 break 2196 2197 seq.set("options", options if options else None) 2198 return None if self._index == index else seq 2199 2200 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2201 # only used for teradata currently 2202 self._match(TokenType.COMMA) 2203 2204 kwargs = { 2205 "no": self._match_text_seq("NO"), 2206 "dual": self._match_text_seq("DUAL"), 2207 "before": self._match_text_seq("BEFORE"), 2208 "default": self._match_text_seq("DEFAULT"), 2209 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2210 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2211 "after": self._match_text_seq("AFTER"), 2212 "minimum": self._match_texts(("MIN", "MINIMUM")), 2213 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2214 } 2215 2216 if self._match_texts(self.PROPERTY_PARSERS): 2217 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2218 try: 2219 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2220 except TypeError: 2221 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2222 2223 return None 2224 2225 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2226 return self._parse_wrapped_csv(self._parse_property) 2227 2228 def _parse_property(self) -> t.Optional[exp.Expression]: 2229 if self._match_texts(self.PROPERTY_PARSERS): 2230 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2231 2232 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2233 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2234 2235 if self._match_text_seq("COMPOUND", "SORTKEY"): 2236 return self._parse_sortkey(compound=True) 2237 2238 if self._match_text_seq("SQL", "SECURITY"): 2239 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2240 2241 index = self._index 2242 2243 seq_props = self._parse_sequence_properties() 2244 if seq_props: 2245 return seq_props 2246 2247 self._retreat(index) 2248 key = self._parse_column() 2249 2250 if not self._match(TokenType.EQ): 2251 self._retreat(index) 2252 return None 2253 2254 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2255 if isinstance(key, exp.Column): 2256 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2257 2258 value = self._parse_bitwise() or self._parse_var(any_token=True) 2259 2260 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2261 if isinstance(value, exp.Column): 2262 value = exp.var(value.name) 2263 2264 return self.expression(exp.Property, this=key, value=value) 2265 2266 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2267 if self._match_text_seq("BY"): 2268 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2269 2270 self._match(TokenType.ALIAS) 2271 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2272 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2273 2274 return self.expression( 2275 exp.FileFormatProperty, 2276 this=( 2277 self.expression( 2278 exp.InputOutputFormat, 2279 input_format=input_format, 2280 output_format=output_format, 2281 ) 2282 if input_format or output_format 2283 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2284 ), 2285 hive_format=True, 2286 ) 2287 2288 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2289 field = self._parse_field() 2290 if isinstance(field, exp.Identifier) and not field.quoted: 2291 field = exp.var(field) 2292 2293 return field 2294 2295 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2296 self._match(TokenType.EQ) 2297 self._match(TokenType.ALIAS) 2298 2299 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2300 2301 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2302 properties = [] 2303 while True: 2304 if before: 2305 prop = self._parse_property_before() 2306 else: 2307 prop = self._parse_property() 2308 if not prop: 2309 break 2310 for p in ensure_list(prop): 2311 properties.append(p) 2312 2313 if properties: 2314 return self.expression(exp.Properties, expressions=properties) 2315 2316 return None 2317 2318 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2319 return self.expression( 2320 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2321 ) 2322 2323 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2324 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2325 security_specifier = self._prev.text.upper() 2326 return self.expression(exp.SecurityProperty, this=security_specifier) 2327 return None 2328 2329 def _parse_settings_property(self) -> exp.SettingsProperty: 2330 return self.expression( 2331 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2332 ) 2333 2334 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2335 if self._index >= 2: 2336 pre_volatile_token = self._tokens[self._index - 2] 2337 else: 2338 pre_volatile_token = None 2339 2340 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2341 return exp.VolatileProperty() 2342 2343 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2344 2345 def _parse_retention_period(self) -> exp.Var: 2346 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2347 number = self._parse_number() 2348 number_str = f"{number} " if number else "" 2349 unit = self._parse_var(any_token=True) 2350 return exp.var(f"{number_str}{unit}") 2351 2352 def _parse_system_versioning_property( 2353 self, with_: bool = False 2354 ) -> exp.WithSystemVersioningProperty: 2355 self._match(TokenType.EQ) 2356 prop = self.expression( 2357 exp.WithSystemVersioningProperty, 2358 **{ # type: ignore 2359 "on": True, 2360 "with": with_, 2361 }, 2362 ) 2363 2364 if self._match_text_seq("OFF"): 2365 prop.set("on", False) 2366 return prop 2367 2368 self._match(TokenType.ON) 2369 if self._match(TokenType.L_PAREN): 2370 while self._curr and not self._match(TokenType.R_PAREN): 2371 if self._match_text_seq("HISTORY_TABLE", "="): 2372 prop.set("this", self._parse_table_parts()) 2373 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2374 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2375 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2376 prop.set("retention_period", self._parse_retention_period()) 2377 2378 self._match(TokenType.COMMA) 2379 2380 return prop 2381 2382 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2383 self._match(TokenType.EQ) 2384 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2385 prop = self.expression(exp.DataDeletionProperty, on=on) 2386 2387 if self._match(TokenType.L_PAREN): 2388 while self._curr and not self._match(TokenType.R_PAREN): 2389 if self._match_text_seq("FILTER_COLUMN", "="): 2390 prop.set("filter_column", self._parse_column()) 2391 elif self._match_text_seq("RETENTION_PERIOD", "="): 2392 prop.set("retention_period", self._parse_retention_period()) 2393 2394 self._match(TokenType.COMMA) 2395 2396 return prop 2397 2398 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2399 kind = "HASH" 2400 expressions: t.Optional[t.List[exp.Expression]] = None 2401 if self._match_text_seq("BY", "HASH"): 2402 expressions = self._parse_wrapped_csv(self._parse_id_var) 2403 elif self._match_text_seq("BY", "RANDOM"): 2404 kind = "RANDOM" 2405 2406 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2407 buckets: t.Optional[exp.Expression] = None 2408 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2409 buckets = self._parse_number() 2410 2411 return self.expression( 2412 exp.DistributedByProperty, 2413 expressions=expressions, 2414 kind=kind, 2415 buckets=buckets, 2416 order=self._parse_order(), 2417 ) 2418 2419 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2420 self._match_text_seq("KEY") 2421 expressions = self._parse_wrapped_id_vars() 2422 return self.expression(expr_type, expressions=expressions) 2423 2424 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2425 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2426 prop = self._parse_system_versioning_property(with_=True) 2427 self._match_r_paren() 2428 return prop 2429 2430 if self._match(TokenType.L_PAREN, advance=False): 2431 return self._parse_wrapped_properties() 2432 2433 if self._match_text_seq("JOURNAL"): 2434 return self._parse_withjournaltable() 2435 2436 if self._match_texts(self.VIEW_ATTRIBUTES): 2437 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2438 2439 if self._match_text_seq("DATA"): 2440 return self._parse_withdata(no=False) 2441 elif self._match_text_seq("NO", "DATA"): 2442 return self._parse_withdata(no=True) 2443 2444 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2445 return self._parse_serde_properties(with_=True) 2446 2447 if self._match(TokenType.SCHEMA): 2448 return self.expression( 2449 exp.WithSchemaBindingProperty, 2450 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2451 ) 2452 2453 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2454 return self.expression( 2455 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2456 ) 2457 2458 if not self._next: 2459 return None 2460 2461 return self._parse_withisolatedloading() 2462 2463 def _parse_procedure_option(self) -> exp.Expression | None: 2464 if self._match_text_seq("EXECUTE", "AS"): 2465 return self.expression( 2466 exp.ExecuteAsProperty, 2467 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2468 or self._parse_string(), 2469 ) 2470 2471 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2472 2473 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2474 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2475 self._match(TokenType.EQ) 2476 2477 user = self._parse_id_var() 2478 self._match(TokenType.PARAMETER) 2479 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2480 2481 if not user or not host: 2482 return None 2483 2484 return exp.DefinerProperty(this=f"{user}@{host}") 2485 2486 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2487 self._match(TokenType.TABLE) 2488 self._match(TokenType.EQ) 2489 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2490 2491 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2492 return self.expression(exp.LogProperty, no=no) 2493 2494 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2495 return self.expression(exp.JournalProperty, **kwargs) 2496 2497 def _parse_checksum(self) -> exp.ChecksumProperty: 2498 self._match(TokenType.EQ) 2499 2500 on = None 2501 if self._match(TokenType.ON): 2502 on = True 2503 elif self._match_text_seq("OFF"): 2504 on = False 2505 2506 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2507 2508 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2509 return self.expression( 2510 exp.Cluster, 2511 expressions=( 2512 self._parse_wrapped_csv(self._parse_ordered) 2513 if wrapped 2514 else self._parse_csv(self._parse_ordered) 2515 ), 2516 ) 2517 2518 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2519 self._match_text_seq("BY") 2520 2521 self._match_l_paren() 2522 expressions = self._parse_csv(self._parse_column) 2523 self._match_r_paren() 2524 2525 if self._match_text_seq("SORTED", "BY"): 2526 self._match_l_paren() 2527 sorted_by = self._parse_csv(self._parse_ordered) 2528 self._match_r_paren() 2529 else: 2530 sorted_by = None 2531 2532 self._match(TokenType.INTO) 2533 buckets = self._parse_number() 2534 self._match_text_seq("BUCKETS") 2535 2536 return self.expression( 2537 exp.ClusteredByProperty, 2538 expressions=expressions, 2539 sorted_by=sorted_by, 2540 buckets=buckets, 2541 ) 2542 2543 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2544 if not self._match_text_seq("GRANTS"): 2545 self._retreat(self._index - 1) 2546 return None 2547 2548 return self.expression(exp.CopyGrantsProperty) 2549 2550 def _parse_freespace(self) -> exp.FreespaceProperty: 2551 self._match(TokenType.EQ) 2552 return self.expression( 2553 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2554 ) 2555 2556 def _parse_mergeblockratio( 2557 self, no: bool = False, default: bool = False 2558 ) -> exp.MergeBlockRatioProperty: 2559 if self._match(TokenType.EQ): 2560 return self.expression( 2561 exp.MergeBlockRatioProperty, 2562 this=self._parse_number(), 2563 percent=self._match(TokenType.PERCENT), 2564 ) 2565 2566 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2567 2568 def _parse_datablocksize( 2569 self, 2570 default: t.Optional[bool] = None, 2571 minimum: t.Optional[bool] = None, 2572 maximum: t.Optional[bool] = None, 2573 ) -> exp.DataBlocksizeProperty: 2574 self._match(TokenType.EQ) 2575 size = self._parse_number() 2576 2577 units = None 2578 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2579 units = self._prev.text 2580 2581 return self.expression( 2582 exp.DataBlocksizeProperty, 2583 size=size, 2584 units=units, 2585 default=default, 2586 minimum=minimum, 2587 maximum=maximum, 2588 ) 2589 2590 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2591 self._match(TokenType.EQ) 2592 always = self._match_text_seq("ALWAYS") 2593 manual = self._match_text_seq("MANUAL") 2594 never = self._match_text_seq("NEVER") 2595 default = self._match_text_seq("DEFAULT") 2596 2597 autotemp = None 2598 if self._match_text_seq("AUTOTEMP"): 2599 autotemp = self._parse_schema() 2600 2601 return self.expression( 2602 exp.BlockCompressionProperty, 2603 always=always, 2604 manual=manual, 2605 never=never, 2606 default=default, 2607 autotemp=autotemp, 2608 ) 2609 2610 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2611 index = self._index 2612 no = self._match_text_seq("NO") 2613 concurrent = self._match_text_seq("CONCURRENT") 2614 2615 if not self._match_text_seq("ISOLATED", "LOADING"): 2616 self._retreat(index) 2617 return None 2618 2619 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2620 return self.expression( 2621 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2622 ) 2623 2624 def _parse_locking(self) -> exp.LockingProperty: 2625 if self._match(TokenType.TABLE): 2626 kind = "TABLE" 2627 elif self._match(TokenType.VIEW): 2628 kind = "VIEW" 2629 elif self._match(TokenType.ROW): 2630 kind = "ROW" 2631 elif self._match_text_seq("DATABASE"): 2632 kind = "DATABASE" 2633 else: 2634 kind = None 2635 2636 if kind in ("DATABASE", "TABLE", "VIEW"): 2637 this = self._parse_table_parts() 2638 else: 2639 this = None 2640 2641 if self._match(TokenType.FOR): 2642 for_or_in = "FOR" 2643 elif self._match(TokenType.IN): 2644 for_or_in = "IN" 2645 else: 2646 for_or_in = None 2647 2648 if self._match_text_seq("ACCESS"): 2649 lock_type = "ACCESS" 2650 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2651 lock_type = "EXCLUSIVE" 2652 elif self._match_text_seq("SHARE"): 2653 lock_type = "SHARE" 2654 elif self._match_text_seq("READ"): 2655 lock_type = "READ" 2656 elif self._match_text_seq("WRITE"): 2657 lock_type = "WRITE" 2658 elif self._match_text_seq("CHECKSUM"): 2659 lock_type = "CHECKSUM" 2660 else: 2661 lock_type = None 2662 2663 override = self._match_text_seq("OVERRIDE") 2664 2665 return self.expression( 2666 exp.LockingProperty, 2667 this=this, 2668 kind=kind, 2669 for_or_in=for_or_in, 2670 lock_type=lock_type, 2671 override=override, 2672 ) 2673 2674 def _parse_partition_by(self) -> t.List[exp.Expression]: 2675 if self._match(TokenType.PARTITION_BY): 2676 return self._parse_csv(self._parse_assignment) 2677 return [] 2678 2679 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2680 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2681 if self._match_text_seq("MINVALUE"): 2682 return exp.var("MINVALUE") 2683 if self._match_text_seq("MAXVALUE"): 2684 return exp.var("MAXVALUE") 2685 return self._parse_bitwise() 2686 2687 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2688 expression = None 2689 from_expressions = None 2690 to_expressions = None 2691 2692 if self._match(TokenType.IN): 2693 this = self._parse_wrapped_csv(self._parse_bitwise) 2694 elif self._match(TokenType.FROM): 2695 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2696 self._match_text_seq("TO") 2697 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2698 elif self._match_text_seq("WITH", "(", "MODULUS"): 2699 this = self._parse_number() 2700 self._match_text_seq(",", "REMAINDER") 2701 expression = self._parse_number() 2702 self._match_r_paren() 2703 else: 2704 self.raise_error("Failed to parse partition bound spec.") 2705 2706 return self.expression( 2707 exp.PartitionBoundSpec, 2708 this=this, 2709 expression=expression, 2710 from_expressions=from_expressions, 2711 to_expressions=to_expressions, 2712 ) 2713 2714 # https://www.postgresql.org/docs/current/sql-createtable.html 2715 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2716 if not self._match_text_seq("OF"): 2717 self._retreat(self._index - 1) 2718 return None 2719 2720 this = self._parse_table(schema=True) 2721 2722 if self._match(TokenType.DEFAULT): 2723 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2724 elif self._match_text_seq("FOR", "VALUES"): 2725 expression = self._parse_partition_bound_spec() 2726 else: 2727 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2728 2729 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2730 2731 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2732 self._match(TokenType.EQ) 2733 return self.expression( 2734 exp.PartitionedByProperty, 2735 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2736 ) 2737 2738 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2739 if self._match_text_seq("AND", "STATISTICS"): 2740 statistics = True 2741 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2742 statistics = False 2743 else: 2744 statistics = None 2745 2746 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2747 2748 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2749 if self._match_text_seq("SQL"): 2750 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2751 return None 2752 2753 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2754 if self._match_text_seq("SQL", "DATA"): 2755 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2756 return None 2757 2758 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2759 if self._match_text_seq("PRIMARY", "INDEX"): 2760 return exp.NoPrimaryIndexProperty() 2761 if self._match_text_seq("SQL"): 2762 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2763 return None 2764 2765 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2766 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2767 return exp.OnCommitProperty() 2768 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2769 return exp.OnCommitProperty(delete=True) 2770 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2771 2772 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2773 if self._match_text_seq("SQL", "DATA"): 2774 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2775 return None 2776 2777 def _parse_distkey(self) -> exp.DistKeyProperty: 2778 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2779 2780 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2781 table = self._parse_table(schema=True) 2782 2783 options = [] 2784 while self._match_texts(("INCLUDING", "EXCLUDING")): 2785 this = self._prev.text.upper() 2786 2787 id_var = self._parse_id_var() 2788 if not id_var: 2789 return None 2790 2791 options.append( 2792 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2793 ) 2794 2795 return self.expression(exp.LikeProperty, this=table, expressions=options) 2796 2797 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2798 return self.expression( 2799 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2800 ) 2801 2802 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2803 self._match(TokenType.EQ) 2804 return self.expression( 2805 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2806 ) 2807 2808 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2809 self._match_text_seq("WITH", "CONNECTION") 2810 return self.expression( 2811 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2812 ) 2813 2814 def _parse_returns(self) -> exp.ReturnsProperty: 2815 value: t.Optional[exp.Expression] 2816 null = None 2817 is_table = self._match(TokenType.TABLE) 2818 2819 if is_table: 2820 if self._match(TokenType.LT): 2821 value = self.expression( 2822 exp.Schema, 2823 this="TABLE", 2824 expressions=self._parse_csv(self._parse_struct_types), 2825 ) 2826 if not self._match(TokenType.GT): 2827 self.raise_error("Expecting >") 2828 else: 2829 value = self._parse_schema(exp.var("TABLE")) 2830 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2831 null = True 2832 value = None 2833 else: 2834 value = self._parse_types() 2835 2836 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2837 2838 def _parse_describe(self) -> exp.Describe: 2839 kind = self._match_set(self.CREATABLES) and self._prev.text 2840 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2841 if self._match(TokenType.DOT): 2842 style = None 2843 self._retreat(self._index - 2) 2844 2845 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2846 2847 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2848 this = self._parse_statement() 2849 else: 2850 this = self._parse_table(schema=True) 2851 2852 properties = self._parse_properties() 2853 expressions = properties.expressions if properties else None 2854 partition = self._parse_partition() 2855 return self.expression( 2856 exp.Describe, 2857 this=this, 2858 style=style, 2859 kind=kind, 2860 expressions=expressions, 2861 partition=partition, 2862 format=format, 2863 ) 2864 2865 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2866 kind = self._prev.text.upper() 2867 expressions = [] 2868 2869 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2870 if self._match(TokenType.WHEN): 2871 expression = self._parse_disjunction() 2872 self._match(TokenType.THEN) 2873 else: 2874 expression = None 2875 2876 else_ = self._match(TokenType.ELSE) 2877 2878 if not self._match(TokenType.INTO): 2879 return None 2880 2881 return self.expression( 2882 exp.ConditionalInsert, 2883 this=self.expression( 2884 exp.Insert, 2885 this=self._parse_table(schema=True), 2886 expression=self._parse_derived_table_values(), 2887 ), 2888 expression=expression, 2889 else_=else_, 2890 ) 2891 2892 expression = parse_conditional_insert() 2893 while expression is not None: 2894 expressions.append(expression) 2895 expression = parse_conditional_insert() 2896 2897 return self.expression( 2898 exp.MultitableInserts, 2899 kind=kind, 2900 comments=comments, 2901 expressions=expressions, 2902 source=self._parse_table(), 2903 ) 2904 2905 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2906 comments = [] 2907 hint = self._parse_hint() 2908 overwrite = self._match(TokenType.OVERWRITE) 2909 ignore = self._match(TokenType.IGNORE) 2910 local = self._match_text_seq("LOCAL") 2911 alternative = None 2912 is_function = None 2913 2914 if self._match_text_seq("DIRECTORY"): 2915 this: t.Optional[exp.Expression] = self.expression( 2916 exp.Directory, 2917 this=self._parse_var_or_string(), 2918 local=local, 2919 row_format=self._parse_row_format(match_row=True), 2920 ) 2921 else: 2922 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2923 comments += ensure_list(self._prev_comments) 2924 return self._parse_multitable_inserts(comments) 2925 2926 if self._match(TokenType.OR): 2927 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2928 2929 self._match(TokenType.INTO) 2930 comments += ensure_list(self._prev_comments) 2931 self._match(TokenType.TABLE) 2932 is_function = self._match(TokenType.FUNCTION) 2933 2934 this = ( 2935 self._parse_table(schema=True, parse_partition=True) 2936 if not is_function 2937 else self._parse_function() 2938 ) 2939 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2940 this.set("alias", self._parse_table_alias()) 2941 2942 returning = self._parse_returning() 2943 2944 return self.expression( 2945 exp.Insert, 2946 comments=comments, 2947 hint=hint, 2948 is_function=is_function, 2949 this=this, 2950 stored=self._match_text_seq("STORED") and self._parse_stored(), 2951 by_name=self._match_text_seq("BY", "NAME"), 2952 exists=self._parse_exists(), 2953 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2954 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2955 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2956 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2957 conflict=self._parse_on_conflict(), 2958 returning=returning or self._parse_returning(), 2959 overwrite=overwrite, 2960 alternative=alternative, 2961 ignore=ignore, 2962 source=self._match(TokenType.TABLE) and self._parse_table(), 2963 ) 2964 2965 def _parse_kill(self) -> exp.Kill: 2966 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2967 2968 return self.expression( 2969 exp.Kill, 2970 this=self._parse_primary(), 2971 kind=kind, 2972 ) 2973 2974 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2975 conflict = self._match_text_seq("ON", "CONFLICT") 2976 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2977 2978 if not conflict and not duplicate: 2979 return None 2980 2981 conflict_keys = None 2982 constraint = None 2983 2984 if conflict: 2985 if self._match_text_seq("ON", "CONSTRAINT"): 2986 constraint = self._parse_id_var() 2987 elif self._match(TokenType.L_PAREN): 2988 conflict_keys = self._parse_csv(self._parse_id_var) 2989 self._match_r_paren() 2990 2991 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2992 if self._prev.token_type == TokenType.UPDATE: 2993 self._match(TokenType.SET) 2994 expressions = self._parse_csv(self._parse_equality) 2995 else: 2996 expressions = None 2997 2998 return self.expression( 2999 exp.OnConflict, 3000 duplicate=duplicate, 3001 expressions=expressions, 3002 action=action, 3003 conflict_keys=conflict_keys, 3004 constraint=constraint, 3005 where=self._parse_where(), 3006 ) 3007 3008 def _parse_returning(self) -> t.Optional[exp.Returning]: 3009 if not self._match(TokenType.RETURNING): 3010 return None 3011 return self.expression( 3012 exp.Returning, 3013 expressions=self._parse_csv(self._parse_expression), 3014 into=self._match(TokenType.INTO) and self._parse_table_part(), 3015 ) 3016 3017 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3018 if not self._match(TokenType.FORMAT): 3019 return None 3020 return self._parse_row_format() 3021 3022 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3023 index = self._index 3024 with_ = with_ or self._match_text_seq("WITH") 3025 3026 if not self._match(TokenType.SERDE_PROPERTIES): 3027 self._retreat(index) 3028 return None 3029 return self.expression( 3030 exp.SerdeProperties, 3031 **{ # type: ignore 3032 "expressions": self._parse_wrapped_properties(), 3033 "with": with_, 3034 }, 3035 ) 3036 3037 def _parse_row_format( 3038 self, match_row: bool = False 3039 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3040 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3041 return None 3042 3043 if self._match_text_seq("SERDE"): 3044 this = self._parse_string() 3045 3046 serde_properties = self._parse_serde_properties() 3047 3048 return self.expression( 3049 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3050 ) 3051 3052 self._match_text_seq("DELIMITED") 3053 3054 kwargs = {} 3055 3056 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3057 kwargs["fields"] = self._parse_string() 3058 if self._match_text_seq("ESCAPED", "BY"): 3059 kwargs["escaped"] = self._parse_string() 3060 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3061 kwargs["collection_items"] = self._parse_string() 3062 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3063 kwargs["map_keys"] = self._parse_string() 3064 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3065 kwargs["lines"] = self._parse_string() 3066 if self._match_text_seq("NULL", "DEFINED", "AS"): 3067 kwargs["null"] = self._parse_string() 3068 3069 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3070 3071 def _parse_load(self) -> exp.LoadData | exp.Command: 3072 if self._match_text_seq("DATA"): 3073 local = self._match_text_seq("LOCAL") 3074 self._match_text_seq("INPATH") 3075 inpath = self._parse_string() 3076 overwrite = self._match(TokenType.OVERWRITE) 3077 self._match_pair(TokenType.INTO, TokenType.TABLE) 3078 3079 return self.expression( 3080 exp.LoadData, 3081 this=self._parse_table(schema=True), 3082 local=local, 3083 overwrite=overwrite, 3084 inpath=inpath, 3085 partition=self._parse_partition(), 3086 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3087 serde=self._match_text_seq("SERDE") and self._parse_string(), 3088 ) 3089 return self._parse_as_command(self._prev) 3090 3091 def _parse_delete(self) -> exp.Delete: 3092 # This handles MySQL's "Multiple-Table Syntax" 3093 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3094 tables = None 3095 if not self._match(TokenType.FROM, advance=False): 3096 tables = self._parse_csv(self._parse_table) or None 3097 3098 returning = self._parse_returning() 3099 3100 return self.expression( 3101 exp.Delete, 3102 tables=tables, 3103 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3104 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3105 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3106 where=self._parse_where(), 3107 returning=returning or self._parse_returning(), 3108 limit=self._parse_limit(), 3109 ) 3110 3111 def _parse_update(self) -> exp.Update: 3112 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3113 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3114 returning = self._parse_returning() 3115 return self.expression( 3116 exp.Update, 3117 **{ # type: ignore 3118 "this": this, 3119 "expressions": expressions, 3120 "from": self._parse_from(joins=True), 3121 "where": self._parse_where(), 3122 "returning": returning or self._parse_returning(), 3123 "order": self._parse_order(), 3124 "limit": self._parse_limit(), 3125 }, 3126 ) 3127 3128 def _parse_use(self) -> exp.Use: 3129 return self.expression( 3130 exp.Use, 3131 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3132 this=self._parse_table(schema=False), 3133 ) 3134 3135 def _parse_uncache(self) -> exp.Uncache: 3136 if not self._match(TokenType.TABLE): 3137 self.raise_error("Expecting TABLE after UNCACHE") 3138 3139 return self.expression( 3140 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3141 ) 3142 3143 def _parse_cache(self) -> exp.Cache: 3144 lazy = self._match_text_seq("LAZY") 3145 self._match(TokenType.TABLE) 3146 table = self._parse_table(schema=True) 3147 3148 options = [] 3149 if self._match_text_seq("OPTIONS"): 3150 self._match_l_paren() 3151 k = self._parse_string() 3152 self._match(TokenType.EQ) 3153 v = self._parse_string() 3154 options = [k, v] 3155 self._match_r_paren() 3156 3157 self._match(TokenType.ALIAS) 3158 return self.expression( 3159 exp.Cache, 3160 this=table, 3161 lazy=lazy, 3162 options=options, 3163 expression=self._parse_select(nested=True), 3164 ) 3165 3166 def _parse_partition(self) -> t.Optional[exp.Partition]: 3167 if not self._match_texts(self.PARTITION_KEYWORDS): 3168 return None 3169 3170 return self.expression( 3171 exp.Partition, 3172 subpartition=self._prev.text.upper() == "SUBPARTITION", 3173 expressions=self._parse_wrapped_csv(self._parse_assignment), 3174 ) 3175 3176 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3177 def _parse_value_expression() -> t.Optional[exp.Expression]: 3178 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3179 return exp.var(self._prev.text.upper()) 3180 return self._parse_expression() 3181 3182 if self._match(TokenType.L_PAREN): 3183 expressions = self._parse_csv(_parse_value_expression) 3184 self._match_r_paren() 3185 return self.expression(exp.Tuple, expressions=expressions) 3186 3187 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3188 expression = self._parse_expression() 3189 if expression: 3190 return self.expression(exp.Tuple, expressions=[expression]) 3191 return None 3192 3193 def _parse_projections(self) -> t.List[exp.Expression]: 3194 return self._parse_expressions() 3195 3196 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3197 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3198 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3199 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3200 ) 3201 elif self._match(TokenType.FROM): 3202 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3203 # Support parentheses for duckdb FROM-first syntax 3204 select = self._parse_select() 3205 if select: 3206 select.set("from", from_) 3207 this = select 3208 else: 3209 this = exp.select("*").from_(t.cast(exp.From, from_)) 3210 else: 3211 this = ( 3212 self._parse_table(consume_pipe=True) 3213 if table 3214 else self._parse_select(nested=True, parse_set_operation=False) 3215 ) 3216 3217 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3218 # in case a modifier (e.g. join) is following 3219 if table and isinstance(this, exp.Values) and this.alias: 3220 alias = this.args["alias"].pop() 3221 this = exp.Table(this=this, alias=alias) 3222 3223 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3224 3225 return this 3226 3227 def _parse_select( 3228 self, 3229 nested: bool = False, 3230 table: bool = False, 3231 parse_subquery_alias: bool = True, 3232 parse_set_operation: bool = True, 3233 consume_pipe: bool = True, 3234 ) -> t.Optional[exp.Expression]: 3235 query = self._parse_select_query( 3236 nested=nested, 3237 table=table, 3238 parse_subquery_alias=parse_subquery_alias, 3239 parse_set_operation=parse_set_operation, 3240 ) 3241 3242 if ( 3243 consume_pipe 3244 and self._match(TokenType.PIPE_GT, advance=False) 3245 and isinstance(query, exp.Query) 3246 ): 3247 query = self._parse_pipe_syntax_query(query) 3248 query = query.subquery(copy=False) if query and table else query 3249 3250 return query 3251 3252 def _parse_select_query( 3253 self, 3254 nested: bool = False, 3255 table: bool = False, 3256 parse_subquery_alias: bool = True, 3257 parse_set_operation: bool = True, 3258 ) -> t.Optional[exp.Expression]: 3259 cte = self._parse_with() 3260 3261 if cte: 3262 this = self._parse_statement() 3263 3264 if not this: 3265 self.raise_error("Failed to parse any statement following CTE") 3266 return cte 3267 3268 if "with" in this.arg_types: 3269 this.set("with", cte) 3270 else: 3271 self.raise_error(f"{this.key} does not support CTE") 3272 this = cte 3273 3274 return this 3275 3276 # duckdb supports leading with FROM x 3277 from_ = ( 3278 self._parse_from(consume_pipe=True) 3279 if self._match(TokenType.FROM, advance=False) 3280 else None 3281 ) 3282 3283 if self._match(TokenType.SELECT): 3284 comments = self._prev_comments 3285 3286 hint = self._parse_hint() 3287 3288 if self._next and not self._next.token_type == TokenType.DOT: 3289 all_ = self._match(TokenType.ALL) 3290 distinct = self._match_set(self.DISTINCT_TOKENS) 3291 else: 3292 all_, distinct = None, None 3293 3294 kind = ( 3295 self._match(TokenType.ALIAS) 3296 and self._match_texts(("STRUCT", "VALUE")) 3297 and self._prev.text.upper() 3298 ) 3299 3300 if distinct: 3301 distinct = self.expression( 3302 exp.Distinct, 3303 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3304 ) 3305 3306 if all_ and distinct: 3307 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3308 3309 operation_modifiers = [] 3310 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3311 operation_modifiers.append(exp.var(self._prev.text.upper())) 3312 3313 limit = self._parse_limit(top=True) 3314 projections = self._parse_projections() 3315 3316 this = self.expression( 3317 exp.Select, 3318 kind=kind, 3319 hint=hint, 3320 distinct=distinct, 3321 expressions=projections, 3322 limit=limit, 3323 operation_modifiers=operation_modifiers or None, 3324 ) 3325 this.comments = comments 3326 3327 into = self._parse_into() 3328 if into: 3329 this.set("into", into) 3330 3331 if not from_: 3332 from_ = self._parse_from() 3333 3334 if from_: 3335 this.set("from", from_) 3336 3337 this = self._parse_query_modifiers(this) 3338 elif (table or nested) and self._match(TokenType.L_PAREN): 3339 this = self._parse_wrapped_select(table=table) 3340 3341 # We return early here so that the UNION isn't attached to the subquery by the 3342 # following call to _parse_set_operations, but instead becomes the parent node 3343 self._match_r_paren() 3344 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3345 elif self._match(TokenType.VALUES, advance=False): 3346 this = self._parse_derived_table_values() 3347 elif from_: 3348 this = exp.select("*").from_(from_.this, copy=False) 3349 elif self._match(TokenType.SUMMARIZE): 3350 table = self._match(TokenType.TABLE) 3351 this = self._parse_select() or self._parse_string() or self._parse_table() 3352 return self.expression(exp.Summarize, this=this, table=table) 3353 elif self._match(TokenType.DESCRIBE): 3354 this = self._parse_describe() 3355 elif self._match_text_seq("STREAM"): 3356 this = self._parse_function() 3357 if this: 3358 this = self.expression(exp.Stream, this=this) 3359 else: 3360 self._retreat(self._index - 1) 3361 else: 3362 this = None 3363 3364 return self._parse_set_operations(this) if parse_set_operation else this 3365 3366 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3367 self._match_text_seq("SEARCH") 3368 3369 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3370 3371 if not kind: 3372 return None 3373 3374 self._match_text_seq("FIRST", "BY") 3375 3376 return self.expression( 3377 exp.RecursiveWithSearch, 3378 kind=kind, 3379 this=self._parse_id_var(), 3380 expression=self._match_text_seq("SET") and self._parse_id_var(), 3381 using=self._match_text_seq("USING") and self._parse_id_var(), 3382 ) 3383 3384 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3385 if not skip_with_token and not self._match(TokenType.WITH): 3386 return None 3387 3388 comments = self._prev_comments 3389 recursive = self._match(TokenType.RECURSIVE) 3390 3391 last_comments = None 3392 expressions = [] 3393 while True: 3394 cte = self._parse_cte() 3395 if isinstance(cte, exp.CTE): 3396 expressions.append(cte) 3397 if last_comments: 3398 cte.add_comments(last_comments) 3399 3400 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3401 break 3402 else: 3403 self._match(TokenType.WITH) 3404 3405 last_comments = self._prev_comments 3406 3407 return self.expression( 3408 exp.With, 3409 comments=comments, 3410 expressions=expressions, 3411 recursive=recursive, 3412 search=self._parse_recursive_with_search(), 3413 ) 3414 3415 def _parse_cte(self) -> t.Optional[exp.CTE]: 3416 index = self._index 3417 3418 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3419 if not alias or not alias.this: 3420 self.raise_error("Expected CTE to have alias") 3421 3422 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3423 self._retreat(index) 3424 return None 3425 3426 comments = self._prev_comments 3427 3428 if self._match_text_seq("NOT", "MATERIALIZED"): 3429 materialized = False 3430 elif self._match_text_seq("MATERIALIZED"): 3431 materialized = True 3432 else: 3433 materialized = None 3434 3435 cte = self.expression( 3436 exp.CTE, 3437 this=self._parse_wrapped(self._parse_statement), 3438 alias=alias, 3439 materialized=materialized, 3440 comments=comments, 3441 ) 3442 3443 values = cte.this 3444 if isinstance(values, exp.Values): 3445 if values.alias: 3446 cte.set("this", exp.select("*").from_(values)) 3447 else: 3448 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3449 3450 return cte 3451 3452 def _parse_table_alias( 3453 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3454 ) -> t.Optional[exp.TableAlias]: 3455 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3456 # so this section tries to parse the clause version and if it fails, it treats the token 3457 # as an identifier (alias) 3458 if self._can_parse_limit_or_offset(): 3459 return None 3460 3461 any_token = self._match(TokenType.ALIAS) 3462 alias = ( 3463 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3464 or self._parse_string_as_identifier() 3465 ) 3466 3467 index = self._index 3468 if self._match(TokenType.L_PAREN): 3469 columns = self._parse_csv(self._parse_function_parameter) 3470 self._match_r_paren() if columns else self._retreat(index) 3471 else: 3472 columns = None 3473 3474 if not alias and not columns: 3475 return None 3476 3477 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3478 3479 # We bubble up comments from the Identifier to the TableAlias 3480 if isinstance(alias, exp.Identifier): 3481 table_alias.add_comments(alias.pop_comments()) 3482 3483 return table_alias 3484 3485 def _parse_subquery( 3486 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3487 ) -> t.Optional[exp.Subquery]: 3488 if not this: 3489 return None 3490 3491 return self.expression( 3492 exp.Subquery, 3493 this=this, 3494 pivots=self._parse_pivots(), 3495 alias=self._parse_table_alias() if parse_alias else None, 3496 sample=self._parse_table_sample(), 3497 ) 3498 3499 def _implicit_unnests_to_explicit(self, this: E) -> E: 3500 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3501 3502 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3503 for i, join in enumerate(this.args.get("joins") or []): 3504 table = join.this 3505 normalized_table = table.copy() 3506 normalized_table.meta["maybe_column"] = True 3507 normalized_table = _norm(normalized_table, dialect=self.dialect) 3508 3509 if isinstance(table, exp.Table) and not join.args.get("on"): 3510 if normalized_table.parts[0].name in refs: 3511 table_as_column = table.to_column() 3512 unnest = exp.Unnest(expressions=[table_as_column]) 3513 3514 # Table.to_column creates a parent Alias node that we want to convert to 3515 # a TableAlias and attach to the Unnest, so it matches the parser's output 3516 if isinstance(table.args.get("alias"), exp.TableAlias): 3517 table_as_column.replace(table_as_column.this) 3518 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3519 3520 table.replace(unnest) 3521 3522 refs.add(normalized_table.alias_or_name) 3523 3524 return this 3525 3526 def _parse_query_modifiers( 3527 self, this: t.Optional[exp.Expression] 3528 ) -> t.Optional[exp.Expression]: 3529 if isinstance(this, self.MODIFIABLES): 3530 for join in self._parse_joins(): 3531 this.append("joins", join) 3532 for lateral in iter(self._parse_lateral, None): 3533 this.append("laterals", lateral) 3534 3535 while True: 3536 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3537 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3538 key, expression = parser(self) 3539 3540 if expression: 3541 this.set(key, expression) 3542 if key == "limit": 3543 offset = expression.args.pop("offset", None) 3544 3545 if offset: 3546 offset = exp.Offset(expression=offset) 3547 this.set("offset", offset) 3548 3549 limit_by_expressions = expression.expressions 3550 expression.set("expressions", None) 3551 offset.set("expressions", limit_by_expressions) 3552 continue 3553 break 3554 3555 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3556 this = self._implicit_unnests_to_explicit(this) 3557 3558 return this 3559 3560 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3561 start = self._curr 3562 while self._curr: 3563 self._advance() 3564 3565 end = self._tokens[self._index - 1] 3566 return exp.Hint(expressions=[self._find_sql(start, end)]) 3567 3568 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3569 return self._parse_function_call() 3570 3571 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3572 start_index = self._index 3573 should_fallback_to_string = False 3574 3575 hints = [] 3576 try: 3577 for hint in iter( 3578 lambda: self._parse_csv( 3579 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3580 ), 3581 [], 3582 ): 3583 hints.extend(hint) 3584 except ParseError: 3585 should_fallback_to_string = True 3586 3587 if should_fallback_to_string or self._curr: 3588 self._retreat(start_index) 3589 return self._parse_hint_fallback_to_string() 3590 3591 return self.expression(exp.Hint, expressions=hints) 3592 3593 def _parse_hint(self) -> t.Optional[exp.Hint]: 3594 if self._match(TokenType.HINT) and self._prev_comments: 3595 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3596 3597 return None 3598 3599 def _parse_into(self) -> t.Optional[exp.Into]: 3600 if not self._match(TokenType.INTO): 3601 return None 3602 3603 temp = self._match(TokenType.TEMPORARY) 3604 unlogged = self._match_text_seq("UNLOGGED") 3605 self._match(TokenType.TABLE) 3606 3607 return self.expression( 3608 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3609 ) 3610 3611 def _parse_from( 3612 self, 3613 joins: bool = False, 3614 skip_from_token: bool = False, 3615 consume_pipe: bool = False, 3616 ) -> t.Optional[exp.From]: 3617 if not skip_from_token and not self._match(TokenType.FROM): 3618 return None 3619 3620 return self.expression( 3621 exp.From, 3622 comments=self._prev_comments, 3623 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3624 ) 3625 3626 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3627 return self.expression( 3628 exp.MatchRecognizeMeasure, 3629 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3630 this=self._parse_expression(), 3631 ) 3632 3633 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3634 if not self._match(TokenType.MATCH_RECOGNIZE): 3635 return None 3636 3637 self._match_l_paren() 3638 3639 partition = self._parse_partition_by() 3640 order = self._parse_order() 3641 3642 measures = ( 3643 self._parse_csv(self._parse_match_recognize_measure) 3644 if self._match_text_seq("MEASURES") 3645 else None 3646 ) 3647 3648 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3649 rows = exp.var("ONE ROW PER MATCH") 3650 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3651 text = "ALL ROWS PER MATCH" 3652 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3653 text += " SHOW EMPTY MATCHES" 3654 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3655 text += " OMIT EMPTY MATCHES" 3656 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3657 text += " WITH UNMATCHED ROWS" 3658 rows = exp.var(text) 3659 else: 3660 rows = None 3661 3662 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3663 text = "AFTER MATCH SKIP" 3664 if self._match_text_seq("PAST", "LAST", "ROW"): 3665 text += " PAST LAST ROW" 3666 elif self._match_text_seq("TO", "NEXT", "ROW"): 3667 text += " TO NEXT ROW" 3668 elif self._match_text_seq("TO", "FIRST"): 3669 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3670 elif self._match_text_seq("TO", "LAST"): 3671 text += f" TO LAST {self._advance_any().text}" # type: ignore 3672 after = exp.var(text) 3673 else: 3674 after = None 3675 3676 if self._match_text_seq("PATTERN"): 3677 self._match_l_paren() 3678 3679 if not self._curr: 3680 self.raise_error("Expecting )", self._curr) 3681 3682 paren = 1 3683 start = self._curr 3684 3685 while self._curr and paren > 0: 3686 if self._curr.token_type == TokenType.L_PAREN: 3687 paren += 1 3688 if self._curr.token_type == TokenType.R_PAREN: 3689 paren -= 1 3690 3691 end = self._prev 3692 self._advance() 3693 3694 if paren > 0: 3695 self.raise_error("Expecting )", self._curr) 3696 3697 pattern = exp.var(self._find_sql(start, end)) 3698 else: 3699 pattern = None 3700 3701 define = ( 3702 self._parse_csv(self._parse_name_as_expression) 3703 if self._match_text_seq("DEFINE") 3704 else None 3705 ) 3706 3707 self._match_r_paren() 3708 3709 return self.expression( 3710 exp.MatchRecognize, 3711 partition_by=partition, 3712 order=order, 3713 measures=measures, 3714 rows=rows, 3715 after=after, 3716 pattern=pattern, 3717 define=define, 3718 alias=self._parse_table_alias(), 3719 ) 3720 3721 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3722 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3723 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3724 cross_apply = False 3725 3726 if cross_apply is not None: 3727 this = self._parse_select(table=True) 3728 view = None 3729 outer = None 3730 elif self._match(TokenType.LATERAL): 3731 this = self._parse_select(table=True) 3732 view = self._match(TokenType.VIEW) 3733 outer = self._match(TokenType.OUTER) 3734 else: 3735 return None 3736 3737 if not this: 3738 this = ( 3739 self._parse_unnest() 3740 or self._parse_function() 3741 or self._parse_id_var(any_token=False) 3742 ) 3743 3744 while self._match(TokenType.DOT): 3745 this = exp.Dot( 3746 this=this, 3747 expression=self._parse_function() or self._parse_id_var(any_token=False), 3748 ) 3749 3750 ordinality: t.Optional[bool] = None 3751 3752 if view: 3753 table = self._parse_id_var(any_token=False) 3754 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3755 table_alias: t.Optional[exp.TableAlias] = self.expression( 3756 exp.TableAlias, this=table, columns=columns 3757 ) 3758 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3759 # We move the alias from the lateral's child node to the lateral itself 3760 table_alias = this.args["alias"].pop() 3761 else: 3762 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3763 table_alias = self._parse_table_alias() 3764 3765 return self.expression( 3766 exp.Lateral, 3767 this=this, 3768 view=view, 3769 outer=outer, 3770 alias=table_alias, 3771 cross_apply=cross_apply, 3772 ordinality=ordinality, 3773 ) 3774 3775 def _parse_join_parts( 3776 self, 3777 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3778 return ( 3779 self._match_set(self.JOIN_METHODS) and self._prev, 3780 self._match_set(self.JOIN_SIDES) and self._prev, 3781 self._match_set(self.JOIN_KINDS) and self._prev, 3782 ) 3783 3784 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3785 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3786 this = self._parse_column() 3787 if isinstance(this, exp.Column): 3788 return this.this 3789 return this 3790 3791 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3792 3793 def _parse_join( 3794 self, skip_join_token: bool = False, parse_bracket: bool = False 3795 ) -> t.Optional[exp.Join]: 3796 if self._match(TokenType.COMMA): 3797 table = self._try_parse(self._parse_table) 3798 cross_join = self.expression(exp.Join, this=table) if table else None 3799 3800 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3801 cross_join.set("kind", "CROSS") 3802 3803 return cross_join 3804 3805 index = self._index 3806 method, side, kind = self._parse_join_parts() 3807 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3808 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3809 join_comments = self._prev_comments 3810 3811 if not skip_join_token and not join: 3812 self._retreat(index) 3813 kind = None 3814 method = None 3815 side = None 3816 3817 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3818 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3819 3820 if not skip_join_token and not join and not outer_apply and not cross_apply: 3821 return None 3822 3823 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3824 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3825 kwargs["expressions"] = self._parse_csv( 3826 lambda: self._parse_table(parse_bracket=parse_bracket) 3827 ) 3828 3829 if method: 3830 kwargs["method"] = method.text 3831 if side: 3832 kwargs["side"] = side.text 3833 if kind: 3834 kwargs["kind"] = kind.text 3835 if hint: 3836 kwargs["hint"] = hint 3837 3838 if self._match(TokenType.MATCH_CONDITION): 3839 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3840 3841 if self._match(TokenType.ON): 3842 kwargs["on"] = self._parse_assignment() 3843 elif self._match(TokenType.USING): 3844 kwargs["using"] = self._parse_using_identifiers() 3845 elif ( 3846 not method 3847 and not (outer_apply or cross_apply) 3848 and not isinstance(kwargs["this"], exp.Unnest) 3849 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3850 ): 3851 index = self._index 3852 joins: t.Optional[list] = list(self._parse_joins()) 3853 3854 if joins and self._match(TokenType.ON): 3855 kwargs["on"] = self._parse_assignment() 3856 elif joins and self._match(TokenType.USING): 3857 kwargs["using"] = self._parse_using_identifiers() 3858 else: 3859 joins = None 3860 self._retreat(index) 3861 3862 kwargs["this"].set("joins", joins if joins else None) 3863 3864 kwargs["pivots"] = self._parse_pivots() 3865 3866 comments = [c for token in (method, side, kind) if token for c in token.comments] 3867 comments = (join_comments or []) + comments 3868 return self.expression(exp.Join, comments=comments, **kwargs) 3869 3870 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3871 this = self._parse_assignment() 3872 3873 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3874 return this 3875 3876 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3877 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3878 3879 return this 3880 3881 def _parse_index_params(self) -> exp.IndexParameters: 3882 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3883 3884 if self._match(TokenType.L_PAREN, advance=False): 3885 columns = self._parse_wrapped_csv(self._parse_with_operator) 3886 else: 3887 columns = None 3888 3889 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3890 partition_by = self._parse_partition_by() 3891 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3892 tablespace = ( 3893 self._parse_var(any_token=True) 3894 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3895 else None 3896 ) 3897 where = self._parse_where() 3898 3899 on = self._parse_field() if self._match(TokenType.ON) else None 3900 3901 return self.expression( 3902 exp.IndexParameters, 3903 using=using, 3904 columns=columns, 3905 include=include, 3906 partition_by=partition_by, 3907 where=where, 3908 with_storage=with_storage, 3909 tablespace=tablespace, 3910 on=on, 3911 ) 3912 3913 def _parse_index( 3914 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3915 ) -> t.Optional[exp.Index]: 3916 if index or anonymous: 3917 unique = None 3918 primary = None 3919 amp = None 3920 3921 self._match(TokenType.ON) 3922 self._match(TokenType.TABLE) # hive 3923 table = self._parse_table_parts(schema=True) 3924 else: 3925 unique = self._match(TokenType.UNIQUE) 3926 primary = self._match_text_seq("PRIMARY") 3927 amp = self._match_text_seq("AMP") 3928 3929 if not self._match(TokenType.INDEX): 3930 return None 3931 3932 index = self._parse_id_var() 3933 table = None 3934 3935 params = self._parse_index_params() 3936 3937 return self.expression( 3938 exp.Index, 3939 this=index, 3940 table=table, 3941 unique=unique, 3942 primary=primary, 3943 amp=amp, 3944 params=params, 3945 ) 3946 3947 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3948 hints: t.List[exp.Expression] = [] 3949 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3950 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3951 hints.append( 3952 self.expression( 3953 exp.WithTableHint, 3954 expressions=self._parse_csv( 3955 lambda: self._parse_function() or self._parse_var(any_token=True) 3956 ), 3957 ) 3958 ) 3959 self._match_r_paren() 3960 else: 3961 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3962 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3963 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3964 3965 self._match_set((TokenType.INDEX, TokenType.KEY)) 3966 if self._match(TokenType.FOR): 3967 hint.set("target", self._advance_any() and self._prev.text.upper()) 3968 3969 hint.set("expressions", self._parse_wrapped_id_vars()) 3970 hints.append(hint) 3971 3972 return hints or None 3973 3974 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3975 return ( 3976 (not schema and self._parse_function(optional_parens=False)) 3977 or self._parse_id_var(any_token=False) 3978 or self._parse_string_as_identifier() 3979 or self._parse_placeholder() 3980 ) 3981 3982 def _parse_table_parts( 3983 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3984 ) -> exp.Table: 3985 catalog = None 3986 db = None 3987 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3988 3989 while self._match(TokenType.DOT): 3990 if catalog: 3991 # This allows nesting the table in arbitrarily many dot expressions if needed 3992 table = self.expression( 3993 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3994 ) 3995 else: 3996 catalog = db 3997 db = table 3998 # "" used for tsql FROM a..b case 3999 table = self._parse_table_part(schema=schema) or "" 4000 4001 if ( 4002 wildcard 4003 and self._is_connected() 4004 and (isinstance(table, exp.Identifier) or not table) 4005 and self._match(TokenType.STAR) 4006 ): 4007 if isinstance(table, exp.Identifier): 4008 table.args["this"] += "*" 4009 else: 4010 table = exp.Identifier(this="*") 4011 4012 # We bubble up comments from the Identifier to the Table 4013 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4014 4015 if is_db_reference: 4016 catalog = db 4017 db = table 4018 table = None 4019 4020 if not table and not is_db_reference: 4021 self.raise_error(f"Expected table name but got {self._curr}") 4022 if not db and is_db_reference: 4023 self.raise_error(f"Expected database name but got {self._curr}") 4024 4025 table = self.expression( 4026 exp.Table, 4027 comments=comments, 4028 this=table, 4029 db=db, 4030 catalog=catalog, 4031 ) 4032 4033 changes = self._parse_changes() 4034 if changes: 4035 table.set("changes", changes) 4036 4037 at_before = self._parse_historical_data() 4038 if at_before: 4039 table.set("when", at_before) 4040 4041 pivots = self._parse_pivots() 4042 if pivots: 4043 table.set("pivots", pivots) 4044 4045 return table 4046 4047 def _parse_table( 4048 self, 4049 schema: bool = False, 4050 joins: bool = False, 4051 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4052 parse_bracket: bool = False, 4053 is_db_reference: bool = False, 4054 parse_partition: bool = False, 4055 consume_pipe: bool = False, 4056 ) -> t.Optional[exp.Expression]: 4057 lateral = self._parse_lateral() 4058 if lateral: 4059 return lateral 4060 4061 unnest = self._parse_unnest() 4062 if unnest: 4063 return unnest 4064 4065 values = self._parse_derived_table_values() 4066 if values: 4067 return values 4068 4069 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4070 if subquery: 4071 if not subquery.args.get("pivots"): 4072 subquery.set("pivots", self._parse_pivots()) 4073 return subquery 4074 4075 bracket = parse_bracket and self._parse_bracket(None) 4076 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4077 4078 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4079 self._parse_table 4080 ) 4081 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4082 4083 only = self._match(TokenType.ONLY) 4084 4085 this = t.cast( 4086 exp.Expression, 4087 bracket 4088 or rows_from 4089 or self._parse_bracket( 4090 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4091 ), 4092 ) 4093 4094 if only: 4095 this.set("only", only) 4096 4097 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4098 self._match_text_seq("*") 4099 4100 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4101 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4102 this.set("partition", self._parse_partition()) 4103 4104 if schema: 4105 return self._parse_schema(this=this) 4106 4107 version = self._parse_version() 4108 4109 if version: 4110 this.set("version", version) 4111 4112 if self.dialect.ALIAS_POST_TABLESAMPLE: 4113 this.set("sample", self._parse_table_sample()) 4114 4115 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4116 if alias: 4117 this.set("alias", alias) 4118 4119 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4120 return self.expression( 4121 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4122 ) 4123 4124 this.set("hints", self._parse_table_hints()) 4125 4126 if not this.args.get("pivots"): 4127 this.set("pivots", self._parse_pivots()) 4128 4129 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4130 this.set("sample", self._parse_table_sample()) 4131 4132 if joins: 4133 for join in self._parse_joins(): 4134 this.append("joins", join) 4135 4136 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4137 this.set("ordinality", True) 4138 this.set("alias", self._parse_table_alias()) 4139 4140 return this 4141 4142 def _parse_version(self) -> t.Optional[exp.Version]: 4143 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4144 this = "TIMESTAMP" 4145 elif self._match(TokenType.VERSION_SNAPSHOT): 4146 this = "VERSION" 4147 else: 4148 return None 4149 4150 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4151 kind = self._prev.text.upper() 4152 start = self._parse_bitwise() 4153 self._match_texts(("TO", "AND")) 4154 end = self._parse_bitwise() 4155 expression: t.Optional[exp.Expression] = self.expression( 4156 exp.Tuple, expressions=[start, end] 4157 ) 4158 elif self._match_text_seq("CONTAINED", "IN"): 4159 kind = "CONTAINED IN" 4160 expression = self.expression( 4161 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4162 ) 4163 elif self._match(TokenType.ALL): 4164 kind = "ALL" 4165 expression = None 4166 else: 4167 self._match_text_seq("AS", "OF") 4168 kind = "AS OF" 4169 expression = self._parse_type() 4170 4171 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4172 4173 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4174 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4175 index = self._index 4176 historical_data = None 4177 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4178 this = self._prev.text.upper() 4179 kind = ( 4180 self._match(TokenType.L_PAREN) 4181 and self._match_texts(self.HISTORICAL_DATA_KIND) 4182 and self._prev.text.upper() 4183 ) 4184 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4185 4186 if expression: 4187 self._match_r_paren() 4188 historical_data = self.expression( 4189 exp.HistoricalData, this=this, kind=kind, expression=expression 4190 ) 4191 else: 4192 self._retreat(index) 4193 4194 return historical_data 4195 4196 def _parse_changes(self) -> t.Optional[exp.Changes]: 4197 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4198 return None 4199 4200 information = self._parse_var(any_token=True) 4201 self._match_r_paren() 4202 4203 return self.expression( 4204 exp.Changes, 4205 information=information, 4206 at_before=self._parse_historical_data(), 4207 end=self._parse_historical_data(), 4208 ) 4209 4210 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4211 if not self._match(TokenType.UNNEST): 4212 return None 4213 4214 expressions = self._parse_wrapped_csv(self._parse_equality) 4215 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4216 4217 alias = self._parse_table_alias() if with_alias else None 4218 4219 if alias: 4220 if self.dialect.UNNEST_COLUMN_ONLY: 4221 if alias.args.get("columns"): 4222 self.raise_error("Unexpected extra column alias in unnest.") 4223 4224 alias.set("columns", [alias.this]) 4225 alias.set("this", None) 4226 4227 columns = alias.args.get("columns") or [] 4228 if offset and len(expressions) < len(columns): 4229 offset = columns.pop() 4230 4231 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4232 self._match(TokenType.ALIAS) 4233 offset = self._parse_id_var( 4234 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4235 ) or exp.to_identifier("offset") 4236 4237 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4238 4239 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4240 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4241 if not is_derived and not ( 4242 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4243 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4244 ): 4245 return None 4246 4247 expressions = self._parse_csv(self._parse_value) 4248 alias = self._parse_table_alias() 4249 4250 if is_derived: 4251 self._match_r_paren() 4252 4253 return self.expression( 4254 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4255 ) 4256 4257 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4258 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4259 as_modifier and self._match_text_seq("USING", "SAMPLE") 4260 ): 4261 return None 4262 4263 bucket_numerator = None 4264 bucket_denominator = None 4265 bucket_field = None 4266 percent = None 4267 size = None 4268 seed = None 4269 4270 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4271 matched_l_paren = self._match(TokenType.L_PAREN) 4272 4273 if self.TABLESAMPLE_CSV: 4274 num = None 4275 expressions = self._parse_csv(self._parse_primary) 4276 else: 4277 expressions = None 4278 num = ( 4279 self._parse_factor() 4280 if self._match(TokenType.NUMBER, advance=False) 4281 else self._parse_primary() or self._parse_placeholder() 4282 ) 4283 4284 if self._match_text_seq("BUCKET"): 4285 bucket_numerator = self._parse_number() 4286 self._match_text_seq("OUT", "OF") 4287 bucket_denominator = bucket_denominator = self._parse_number() 4288 self._match(TokenType.ON) 4289 bucket_field = self._parse_field() 4290 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4291 percent = num 4292 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4293 size = num 4294 else: 4295 percent = num 4296 4297 if matched_l_paren: 4298 self._match_r_paren() 4299 4300 if self._match(TokenType.L_PAREN): 4301 method = self._parse_var(upper=True) 4302 seed = self._match(TokenType.COMMA) and self._parse_number() 4303 self._match_r_paren() 4304 elif self._match_texts(("SEED", "REPEATABLE")): 4305 seed = self._parse_wrapped(self._parse_number) 4306 4307 if not method and self.DEFAULT_SAMPLING_METHOD: 4308 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4309 4310 return self.expression( 4311 exp.TableSample, 4312 expressions=expressions, 4313 method=method, 4314 bucket_numerator=bucket_numerator, 4315 bucket_denominator=bucket_denominator, 4316 bucket_field=bucket_field, 4317 percent=percent, 4318 size=size, 4319 seed=seed, 4320 ) 4321 4322 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4323 return list(iter(self._parse_pivot, None)) or None 4324 4325 def _parse_joins(self) -> t.Iterator[exp.Join]: 4326 return iter(self._parse_join, None) 4327 4328 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4329 if not self._match(TokenType.INTO): 4330 return None 4331 4332 return self.expression( 4333 exp.UnpivotColumns, 4334 this=self._match_text_seq("NAME") and self._parse_column(), 4335 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4336 ) 4337 4338 # https://duckdb.org/docs/sql/statements/pivot 4339 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4340 def _parse_on() -> t.Optional[exp.Expression]: 4341 this = self._parse_bitwise() 4342 4343 if self._match(TokenType.IN): 4344 # PIVOT ... ON col IN (row_val1, row_val2) 4345 return self._parse_in(this) 4346 if self._match(TokenType.ALIAS, advance=False): 4347 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4348 return self._parse_alias(this) 4349 4350 return this 4351 4352 this = self._parse_table() 4353 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4354 into = self._parse_unpivot_columns() 4355 using = self._match(TokenType.USING) and self._parse_csv( 4356 lambda: self._parse_alias(self._parse_function()) 4357 ) 4358 group = self._parse_group() 4359 4360 return self.expression( 4361 exp.Pivot, 4362 this=this, 4363 expressions=expressions, 4364 using=using, 4365 group=group, 4366 unpivot=is_unpivot, 4367 into=into, 4368 ) 4369 4370 def _parse_pivot_in(self) -> exp.In: 4371 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4372 this = self._parse_select_or_expression() 4373 4374 self._match(TokenType.ALIAS) 4375 alias = self._parse_bitwise() 4376 if alias: 4377 if isinstance(alias, exp.Column) and not alias.db: 4378 alias = alias.this 4379 return self.expression(exp.PivotAlias, this=this, alias=alias) 4380 4381 return this 4382 4383 value = self._parse_column() 4384 4385 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4386 self.raise_error("Expecting IN (") 4387 4388 if self._match(TokenType.ANY): 4389 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4390 else: 4391 exprs = self._parse_csv(_parse_aliased_expression) 4392 4393 self._match_r_paren() 4394 return self.expression(exp.In, this=value, expressions=exprs) 4395 4396 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4397 func = self._parse_function() 4398 if not func: 4399 self.raise_error("Expecting an aggregation function in PIVOT") 4400 4401 return self._parse_alias(func) 4402 4403 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4404 index = self._index 4405 include_nulls = None 4406 4407 if self._match(TokenType.PIVOT): 4408 unpivot = False 4409 elif self._match(TokenType.UNPIVOT): 4410 unpivot = True 4411 4412 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4413 if self._match_text_seq("INCLUDE", "NULLS"): 4414 include_nulls = True 4415 elif self._match_text_seq("EXCLUDE", "NULLS"): 4416 include_nulls = False 4417 else: 4418 return None 4419 4420 expressions = [] 4421 4422 if not self._match(TokenType.L_PAREN): 4423 self._retreat(index) 4424 return None 4425 4426 if unpivot: 4427 expressions = self._parse_csv(self._parse_column) 4428 else: 4429 expressions = self._parse_csv(self._parse_pivot_aggregation) 4430 4431 if not expressions: 4432 self.raise_error("Failed to parse PIVOT's aggregation list") 4433 4434 if not self._match(TokenType.FOR): 4435 self.raise_error("Expecting FOR") 4436 4437 fields = [] 4438 while True: 4439 field = self._try_parse(self._parse_pivot_in) 4440 if not field: 4441 break 4442 fields.append(field) 4443 4444 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4445 self._parse_bitwise 4446 ) 4447 4448 group = self._parse_group() 4449 4450 self._match_r_paren() 4451 4452 pivot = self.expression( 4453 exp.Pivot, 4454 expressions=expressions, 4455 fields=fields, 4456 unpivot=unpivot, 4457 include_nulls=include_nulls, 4458 default_on_null=default_on_null, 4459 group=group, 4460 ) 4461 4462 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4463 pivot.set("alias", self._parse_table_alias()) 4464 4465 if not unpivot: 4466 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4467 4468 columns: t.List[exp.Expression] = [] 4469 all_fields = [] 4470 for pivot_field in pivot.fields: 4471 pivot_field_expressions = pivot_field.expressions 4472 4473 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4474 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4475 continue 4476 4477 all_fields.append( 4478 [ 4479 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4480 for fld in pivot_field_expressions 4481 ] 4482 ) 4483 4484 if all_fields: 4485 if names: 4486 all_fields.append(names) 4487 4488 # Generate all possible combinations of the pivot columns 4489 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4490 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4491 for fld_parts_tuple in itertools.product(*all_fields): 4492 fld_parts = list(fld_parts_tuple) 4493 4494 if names and self.PREFIXED_PIVOT_COLUMNS: 4495 # Move the "name" to the front of the list 4496 fld_parts.insert(0, fld_parts.pop(-1)) 4497 4498 columns.append(exp.to_identifier("_".join(fld_parts))) 4499 4500 pivot.set("columns", columns) 4501 4502 return pivot 4503 4504 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4505 return [agg.alias for agg in aggregations if agg.alias] 4506 4507 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4508 if not skip_where_token and not self._match(TokenType.PREWHERE): 4509 return None 4510 4511 return self.expression( 4512 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4513 ) 4514 4515 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4516 if not skip_where_token and not self._match(TokenType.WHERE): 4517 return None 4518 4519 return self.expression( 4520 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4521 ) 4522 4523 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4524 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4525 return None 4526 comments = self._prev_comments 4527 4528 elements: t.Dict[str, t.Any] = defaultdict(list) 4529 4530 if self._match(TokenType.ALL): 4531 elements["all"] = True 4532 elif self._match(TokenType.DISTINCT): 4533 elements["all"] = False 4534 4535 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4536 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4537 4538 while True: 4539 index = self._index 4540 4541 elements["expressions"].extend( 4542 self._parse_csv( 4543 lambda: None 4544 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4545 else self._parse_assignment() 4546 ) 4547 ) 4548 4549 before_with_index = self._index 4550 with_prefix = self._match(TokenType.WITH) 4551 4552 if self._match(TokenType.ROLLUP): 4553 elements["rollup"].append( 4554 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4555 ) 4556 elif self._match(TokenType.CUBE): 4557 elements["cube"].append( 4558 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4559 ) 4560 elif self._match(TokenType.GROUPING_SETS): 4561 elements["grouping_sets"].append( 4562 self.expression( 4563 exp.GroupingSets, 4564 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4565 ) 4566 ) 4567 elif self._match_text_seq("TOTALS"): 4568 elements["totals"] = True # type: ignore 4569 4570 if before_with_index <= self._index <= before_with_index + 1: 4571 self._retreat(before_with_index) 4572 break 4573 4574 if index == self._index: 4575 break 4576 4577 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4578 4579 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4580 return self.expression( 4581 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4582 ) 4583 4584 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4585 if self._match(TokenType.L_PAREN): 4586 grouping_set = self._parse_csv(self._parse_column) 4587 self._match_r_paren() 4588 return self.expression(exp.Tuple, expressions=grouping_set) 4589 4590 return self._parse_column() 4591 4592 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4593 if not skip_having_token and not self._match(TokenType.HAVING): 4594 return None 4595 return self.expression( 4596 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4597 ) 4598 4599 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4600 if not self._match(TokenType.QUALIFY): 4601 return None 4602 return self.expression(exp.Qualify, this=self._parse_assignment()) 4603 4604 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4605 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4606 exp.Prior, this=self._parse_bitwise() 4607 ) 4608 connect = self._parse_assignment() 4609 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4610 return connect 4611 4612 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4613 if skip_start_token: 4614 start = None 4615 elif self._match(TokenType.START_WITH): 4616 start = self._parse_assignment() 4617 else: 4618 return None 4619 4620 self._match(TokenType.CONNECT_BY) 4621 nocycle = self._match_text_seq("NOCYCLE") 4622 connect = self._parse_connect_with_prior() 4623 4624 if not start and self._match(TokenType.START_WITH): 4625 start = self._parse_assignment() 4626 4627 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4628 4629 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4630 this = self._parse_id_var(any_token=True) 4631 if self._match(TokenType.ALIAS): 4632 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4633 return this 4634 4635 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4636 if self._match_text_seq("INTERPOLATE"): 4637 return self._parse_wrapped_csv(self._parse_name_as_expression) 4638 return None 4639 4640 def _parse_order( 4641 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4642 ) -> t.Optional[exp.Expression]: 4643 siblings = None 4644 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4645 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4646 return this 4647 4648 siblings = True 4649 4650 return self.expression( 4651 exp.Order, 4652 comments=self._prev_comments, 4653 this=this, 4654 expressions=self._parse_csv(self._parse_ordered), 4655 siblings=siblings, 4656 ) 4657 4658 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4659 if not self._match(token): 4660 return None 4661 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4662 4663 def _parse_ordered( 4664 self, parse_method: t.Optional[t.Callable] = None 4665 ) -> t.Optional[exp.Ordered]: 4666 this = parse_method() if parse_method else self._parse_assignment() 4667 if not this: 4668 return None 4669 4670 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4671 this = exp.var("ALL") 4672 4673 asc = self._match(TokenType.ASC) 4674 desc = self._match(TokenType.DESC) or (asc and False) 4675 4676 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4677 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4678 4679 nulls_first = is_nulls_first or False 4680 explicitly_null_ordered = is_nulls_first or is_nulls_last 4681 4682 if ( 4683 not explicitly_null_ordered 4684 and ( 4685 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4686 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4687 ) 4688 and self.dialect.NULL_ORDERING != "nulls_are_last" 4689 ): 4690 nulls_first = True 4691 4692 if self._match_text_seq("WITH", "FILL"): 4693 with_fill = self.expression( 4694 exp.WithFill, 4695 **{ # type: ignore 4696 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4697 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4698 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4699 "interpolate": self._parse_interpolate(), 4700 }, 4701 ) 4702 else: 4703 with_fill = None 4704 4705 return self.expression( 4706 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4707 ) 4708 4709 def _parse_limit_options(self) -> exp.LimitOptions: 4710 percent = self._match(TokenType.PERCENT) 4711 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4712 self._match_text_seq("ONLY") 4713 with_ties = self._match_text_seq("WITH", "TIES") 4714 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4715 4716 def _parse_limit( 4717 self, 4718 this: t.Optional[exp.Expression] = None, 4719 top: bool = False, 4720 skip_limit_token: bool = False, 4721 ) -> t.Optional[exp.Expression]: 4722 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4723 comments = self._prev_comments 4724 if top: 4725 limit_paren = self._match(TokenType.L_PAREN) 4726 expression = self._parse_term() if limit_paren else self._parse_number() 4727 4728 if limit_paren: 4729 self._match_r_paren() 4730 4731 limit_options = self._parse_limit_options() 4732 else: 4733 limit_options = None 4734 expression = self._parse_term() 4735 4736 if self._match(TokenType.COMMA): 4737 offset = expression 4738 expression = self._parse_term() 4739 else: 4740 offset = None 4741 4742 limit_exp = self.expression( 4743 exp.Limit, 4744 this=this, 4745 expression=expression, 4746 offset=offset, 4747 comments=comments, 4748 limit_options=limit_options, 4749 expressions=self._parse_limit_by(), 4750 ) 4751 4752 return limit_exp 4753 4754 if self._match(TokenType.FETCH): 4755 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4756 direction = self._prev.text.upper() if direction else "FIRST" 4757 4758 count = self._parse_field(tokens=self.FETCH_TOKENS) 4759 4760 return self.expression( 4761 exp.Fetch, 4762 direction=direction, 4763 count=count, 4764 limit_options=self._parse_limit_options(), 4765 ) 4766 4767 return this 4768 4769 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4770 if not self._match(TokenType.OFFSET): 4771 return this 4772 4773 count = self._parse_term() 4774 self._match_set((TokenType.ROW, TokenType.ROWS)) 4775 4776 return self.expression( 4777 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4778 ) 4779 4780 def _can_parse_limit_or_offset(self) -> bool: 4781 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4782 return False 4783 4784 index = self._index 4785 result = bool( 4786 self._try_parse(self._parse_limit, retreat=True) 4787 or self._try_parse(self._parse_offset, retreat=True) 4788 ) 4789 self._retreat(index) 4790 return result 4791 4792 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4793 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4794 4795 def _parse_locks(self) -> t.List[exp.Lock]: 4796 locks = [] 4797 while True: 4798 update, key = None, None 4799 if self._match_text_seq("FOR", "UPDATE"): 4800 update = True 4801 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4802 "LOCK", "IN", "SHARE", "MODE" 4803 ): 4804 update = False 4805 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4806 update, key = False, True 4807 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4808 update, key = True, True 4809 else: 4810 break 4811 4812 expressions = None 4813 if self._match_text_seq("OF"): 4814 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4815 4816 wait: t.Optional[bool | exp.Expression] = None 4817 if self._match_text_seq("NOWAIT"): 4818 wait = True 4819 elif self._match_text_seq("WAIT"): 4820 wait = self._parse_primary() 4821 elif self._match_text_seq("SKIP", "LOCKED"): 4822 wait = False 4823 4824 locks.append( 4825 self.expression( 4826 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4827 ) 4828 ) 4829 4830 return locks 4831 4832 def parse_set_operation( 4833 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4834 ) -> t.Optional[exp.Expression]: 4835 start = self._index 4836 _, side_token, kind_token = self._parse_join_parts() 4837 4838 side = side_token.text if side_token else None 4839 kind = kind_token.text if kind_token else None 4840 4841 if not self._match_set(self.SET_OPERATIONS): 4842 self._retreat(start) 4843 return None 4844 4845 token_type = self._prev.token_type 4846 4847 if token_type == TokenType.UNION: 4848 operation: t.Type[exp.SetOperation] = exp.Union 4849 elif token_type == TokenType.EXCEPT: 4850 operation = exp.Except 4851 else: 4852 operation = exp.Intersect 4853 4854 comments = self._prev.comments 4855 4856 if self._match(TokenType.DISTINCT): 4857 distinct: t.Optional[bool] = True 4858 elif self._match(TokenType.ALL): 4859 distinct = False 4860 else: 4861 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4862 if distinct is None: 4863 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4864 4865 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4866 "STRICT", "CORRESPONDING" 4867 ) 4868 if self._match_text_seq("CORRESPONDING"): 4869 by_name = True 4870 if not side and not kind: 4871 kind = "INNER" 4872 4873 on_column_list = None 4874 if by_name and self._match_texts(("ON", "BY")): 4875 on_column_list = self._parse_wrapped_csv(self._parse_column) 4876 4877 expression = self._parse_select( 4878 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4879 ) 4880 4881 return self.expression( 4882 operation, 4883 comments=comments, 4884 this=this, 4885 distinct=distinct, 4886 by_name=by_name, 4887 expression=expression, 4888 side=side, 4889 kind=kind, 4890 on=on_column_list, 4891 ) 4892 4893 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4894 while this: 4895 setop = self.parse_set_operation(this) 4896 if not setop: 4897 break 4898 this = setop 4899 4900 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4901 expression = this.expression 4902 4903 if expression: 4904 for arg in self.SET_OP_MODIFIERS: 4905 expr = expression.args.get(arg) 4906 if expr: 4907 this.set(arg, expr.pop()) 4908 4909 return this 4910 4911 def _parse_expression(self) -> t.Optional[exp.Expression]: 4912 return self._parse_alias(self._parse_assignment()) 4913 4914 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4915 this = self._parse_disjunction() 4916 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4917 # This allows us to parse <non-identifier token> := <expr> 4918 this = exp.column( 4919 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4920 ) 4921 4922 while self._match_set(self.ASSIGNMENT): 4923 if isinstance(this, exp.Column) and len(this.parts) == 1: 4924 this = this.this 4925 4926 this = self.expression( 4927 self.ASSIGNMENT[self._prev.token_type], 4928 this=this, 4929 comments=self._prev_comments, 4930 expression=self._parse_assignment(), 4931 ) 4932 4933 return this 4934 4935 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4936 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4937 4938 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4939 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4940 4941 def _parse_equality(self) -> t.Optional[exp.Expression]: 4942 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4943 4944 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4945 return self._parse_tokens(self._parse_range, self.COMPARISON) 4946 4947 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4948 this = this or self._parse_bitwise() 4949 negate = self._match(TokenType.NOT) 4950 4951 if self._match_set(self.RANGE_PARSERS): 4952 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4953 if not expression: 4954 return this 4955 4956 this = expression 4957 elif self._match(TokenType.ISNULL): 4958 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4959 4960 # Postgres supports ISNULL and NOTNULL for conditions. 4961 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4962 if self._match(TokenType.NOTNULL): 4963 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4964 this = self.expression(exp.Not, this=this) 4965 4966 if negate: 4967 this = self._negate_range(this) 4968 4969 if self._match(TokenType.IS): 4970 this = self._parse_is(this) 4971 4972 return this 4973 4974 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4975 if not this: 4976 return this 4977 4978 return self.expression(exp.Not, this=this) 4979 4980 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4981 index = self._index - 1 4982 negate = self._match(TokenType.NOT) 4983 4984 if self._match_text_seq("DISTINCT", "FROM"): 4985 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4986 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4987 4988 if self._match(TokenType.JSON): 4989 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4990 4991 if self._match_text_seq("WITH"): 4992 _with = True 4993 elif self._match_text_seq("WITHOUT"): 4994 _with = False 4995 else: 4996 _with = None 4997 4998 unique = self._match(TokenType.UNIQUE) 4999 self._match_text_seq("KEYS") 5000 expression: t.Optional[exp.Expression] = self.expression( 5001 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5002 ) 5003 else: 5004 expression = self._parse_primary() or self._parse_null() 5005 if not expression: 5006 self._retreat(index) 5007 return None 5008 5009 this = self.expression(exp.Is, this=this, expression=expression) 5010 return self.expression(exp.Not, this=this) if negate else this 5011 5012 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5013 unnest = self._parse_unnest(with_alias=False) 5014 if unnest: 5015 this = self.expression(exp.In, this=this, unnest=unnest) 5016 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5017 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5018 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5019 5020 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5021 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5022 else: 5023 this = self.expression(exp.In, this=this, expressions=expressions) 5024 5025 if matched_l_paren: 5026 self._match_r_paren(this) 5027 elif not self._match(TokenType.R_BRACKET, expression=this): 5028 self.raise_error("Expecting ]") 5029 else: 5030 this = self.expression(exp.In, this=this, field=self._parse_column()) 5031 5032 return this 5033 5034 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5035 symmetric = None 5036 if self._match_text_seq("SYMMETRIC"): 5037 symmetric = True 5038 elif self._match_text_seq("ASYMMETRIC"): 5039 symmetric = False 5040 5041 low = self._parse_bitwise() 5042 self._match(TokenType.AND) 5043 high = self._parse_bitwise() 5044 5045 return self.expression( 5046 exp.Between, 5047 this=this, 5048 low=low, 5049 high=high, 5050 symmetric=symmetric, 5051 ) 5052 5053 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5054 if not self._match(TokenType.ESCAPE): 5055 return this 5056 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5057 5058 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5059 index = self._index 5060 5061 if not self._match(TokenType.INTERVAL) and match_interval: 5062 return None 5063 5064 if self._match(TokenType.STRING, advance=False): 5065 this = self._parse_primary() 5066 else: 5067 this = self._parse_term() 5068 5069 if not this or ( 5070 isinstance(this, exp.Column) 5071 and not this.table 5072 and not this.this.quoted 5073 and this.name.upper() == "IS" 5074 ): 5075 self._retreat(index) 5076 return None 5077 5078 unit = self._parse_function() or ( 5079 not self._match(TokenType.ALIAS, advance=False) 5080 and self._parse_var(any_token=True, upper=True) 5081 ) 5082 5083 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5084 # each INTERVAL expression into this canonical form so it's easy to transpile 5085 if this and this.is_number: 5086 this = exp.Literal.string(this.to_py()) 5087 elif this and this.is_string: 5088 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5089 if parts and unit: 5090 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5091 unit = None 5092 self._retreat(self._index - 1) 5093 5094 if len(parts) == 1: 5095 this = exp.Literal.string(parts[0][0]) 5096 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5097 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5098 unit = self.expression( 5099 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5100 ) 5101 5102 interval = self.expression(exp.Interval, this=this, unit=unit) 5103 5104 index = self._index 5105 self._match(TokenType.PLUS) 5106 5107 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5108 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5109 return self.expression( 5110 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5111 ) 5112 5113 self._retreat(index) 5114 return interval 5115 5116 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5117 this = self._parse_term() 5118 5119 while True: 5120 if self._match_set(self.BITWISE): 5121 this = self.expression( 5122 self.BITWISE[self._prev.token_type], 5123 this=this, 5124 expression=self._parse_term(), 5125 ) 5126 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5127 this = self.expression( 5128 exp.DPipe, 5129 this=this, 5130 expression=self._parse_term(), 5131 safe=not self.dialect.STRICT_STRING_CONCAT, 5132 ) 5133 elif self._match(TokenType.DQMARK): 5134 this = self.expression( 5135 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5136 ) 5137 elif self._match_pair(TokenType.LT, TokenType.LT): 5138 this = self.expression( 5139 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5140 ) 5141 elif self._match_pair(TokenType.GT, TokenType.GT): 5142 this = self.expression( 5143 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5144 ) 5145 else: 5146 break 5147 5148 return this 5149 5150 def _parse_term(self) -> t.Optional[exp.Expression]: 5151 this = self._parse_factor() 5152 5153 while self._match_set(self.TERM): 5154 klass = self.TERM[self._prev.token_type] 5155 comments = self._prev_comments 5156 expression = self._parse_factor() 5157 5158 this = self.expression(klass, this=this, comments=comments, expression=expression) 5159 5160 if isinstance(this, exp.Collate): 5161 expr = this.expression 5162 5163 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5164 # fallback to Identifier / Var 5165 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5166 ident = expr.this 5167 if isinstance(ident, exp.Identifier): 5168 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5169 5170 return this 5171 5172 def _parse_factor(self) -> t.Optional[exp.Expression]: 5173 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5174 this = parse_method() 5175 5176 while self._match_set(self.FACTOR): 5177 klass = self.FACTOR[self._prev.token_type] 5178 comments = self._prev_comments 5179 expression = parse_method() 5180 5181 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5182 self._retreat(self._index - 1) 5183 return this 5184 5185 this = self.expression(klass, this=this, comments=comments, expression=expression) 5186 5187 if isinstance(this, exp.Div): 5188 this.args["typed"] = self.dialect.TYPED_DIVISION 5189 this.args["safe"] = self.dialect.SAFE_DIVISION 5190 5191 return this 5192 5193 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5194 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5195 5196 def _parse_unary(self) -> t.Optional[exp.Expression]: 5197 if self._match_set(self.UNARY_PARSERS): 5198 return self.UNARY_PARSERS[self._prev.token_type](self) 5199 return self._parse_at_time_zone(self._parse_type()) 5200 5201 def _parse_type( 5202 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5203 ) -> t.Optional[exp.Expression]: 5204 interval = parse_interval and self._parse_interval() 5205 if interval: 5206 return interval 5207 5208 index = self._index 5209 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5210 5211 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5212 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5213 if isinstance(data_type, exp.Cast): 5214 # This constructor can contain ops directly after it, for instance struct unnesting: 5215 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5216 return self._parse_column_ops(data_type) 5217 5218 if data_type: 5219 index2 = self._index 5220 this = self._parse_primary() 5221 5222 if isinstance(this, exp.Literal): 5223 literal = this.name 5224 this = self._parse_column_ops(this) 5225 5226 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5227 if parser: 5228 return parser(self, this, data_type) 5229 5230 if ( 5231 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5232 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5233 and TIME_ZONE_RE.search(literal) 5234 ): 5235 data_type = exp.DataType.build("TIMESTAMPTZ") 5236 5237 return self.expression(exp.Cast, this=this, to=data_type) 5238 5239 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5240 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5241 # 5242 # If the index difference here is greater than 1, that means the parser itself must have 5243 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5244 # 5245 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5246 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5247 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5248 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5249 # 5250 # In these cases, we don't really want to return the converted type, but instead retreat 5251 # and try to parse a Column or Identifier in the section below. 5252 if data_type.expressions and index2 - index > 1: 5253 self._retreat(index2) 5254 return self._parse_column_ops(data_type) 5255 5256 self._retreat(index) 5257 5258 if fallback_to_identifier: 5259 return self._parse_id_var() 5260 5261 this = self._parse_column() 5262 return this and self._parse_column_ops(this) 5263 5264 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5265 this = self._parse_type() 5266 if not this: 5267 return None 5268 5269 if isinstance(this, exp.Column) and not this.table: 5270 this = exp.var(this.name.upper()) 5271 5272 return self.expression( 5273 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5274 ) 5275 5276 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5277 type_name = identifier.name 5278 5279 while self._match(TokenType.DOT): 5280 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5281 5282 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5283 5284 def _parse_types( 5285 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5286 ) -> t.Optional[exp.Expression]: 5287 index = self._index 5288 5289 this: t.Optional[exp.Expression] = None 5290 prefix = self._match_text_seq("SYSUDTLIB", ".") 5291 5292 if not self._match_set(self.TYPE_TOKENS): 5293 identifier = allow_identifiers and self._parse_id_var( 5294 any_token=False, tokens=(TokenType.VAR,) 5295 ) 5296 if isinstance(identifier, exp.Identifier): 5297 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5298 5299 if len(tokens) != 1: 5300 self.raise_error("Unexpected identifier", self._prev) 5301 5302 if tokens[0].token_type in self.TYPE_TOKENS: 5303 self._prev = tokens[0] 5304 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5305 this = self._parse_user_defined_type(identifier) 5306 else: 5307 self._retreat(self._index - 1) 5308 return None 5309 else: 5310 return None 5311 5312 type_token = self._prev.token_type 5313 5314 if type_token == TokenType.PSEUDO_TYPE: 5315 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5316 5317 if type_token == TokenType.OBJECT_IDENTIFIER: 5318 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5319 5320 # https://materialize.com/docs/sql/types/map/ 5321 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5322 key_type = self._parse_types( 5323 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5324 ) 5325 if not self._match(TokenType.FARROW): 5326 self._retreat(index) 5327 return None 5328 5329 value_type = self._parse_types( 5330 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5331 ) 5332 if not self._match(TokenType.R_BRACKET): 5333 self._retreat(index) 5334 return None 5335 5336 return exp.DataType( 5337 this=exp.DataType.Type.MAP, 5338 expressions=[key_type, value_type], 5339 nested=True, 5340 prefix=prefix, 5341 ) 5342 5343 nested = type_token in self.NESTED_TYPE_TOKENS 5344 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5345 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5346 expressions = None 5347 maybe_func = False 5348 5349 if self._match(TokenType.L_PAREN): 5350 if is_struct: 5351 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5352 elif nested: 5353 expressions = self._parse_csv( 5354 lambda: self._parse_types( 5355 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5356 ) 5357 ) 5358 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5359 this = expressions[0] 5360 this.set("nullable", True) 5361 self._match_r_paren() 5362 return this 5363 elif type_token in self.ENUM_TYPE_TOKENS: 5364 expressions = self._parse_csv(self._parse_equality) 5365 elif is_aggregate: 5366 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5367 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5368 ) 5369 if not func_or_ident: 5370 return None 5371 expressions = [func_or_ident] 5372 if self._match(TokenType.COMMA): 5373 expressions.extend( 5374 self._parse_csv( 5375 lambda: self._parse_types( 5376 check_func=check_func, 5377 schema=schema, 5378 allow_identifiers=allow_identifiers, 5379 ) 5380 ) 5381 ) 5382 else: 5383 expressions = self._parse_csv(self._parse_type_size) 5384 5385 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5386 if type_token == TokenType.VECTOR and len(expressions) == 2: 5387 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5388 5389 if not expressions or not self._match(TokenType.R_PAREN): 5390 self._retreat(index) 5391 return None 5392 5393 maybe_func = True 5394 5395 values: t.Optional[t.List[exp.Expression]] = None 5396 5397 if nested and self._match(TokenType.LT): 5398 if is_struct: 5399 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5400 else: 5401 expressions = self._parse_csv( 5402 lambda: self._parse_types( 5403 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5404 ) 5405 ) 5406 5407 if not self._match(TokenType.GT): 5408 self.raise_error("Expecting >") 5409 5410 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5411 values = self._parse_csv(self._parse_assignment) 5412 if not values and is_struct: 5413 values = None 5414 self._retreat(self._index - 1) 5415 else: 5416 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5417 5418 if type_token in self.TIMESTAMPS: 5419 if self._match_text_seq("WITH", "TIME", "ZONE"): 5420 maybe_func = False 5421 tz_type = ( 5422 exp.DataType.Type.TIMETZ 5423 if type_token in self.TIMES 5424 else exp.DataType.Type.TIMESTAMPTZ 5425 ) 5426 this = exp.DataType(this=tz_type, expressions=expressions) 5427 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5428 maybe_func = False 5429 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5430 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5431 maybe_func = False 5432 elif type_token == TokenType.INTERVAL: 5433 unit = self._parse_var(upper=True) 5434 if unit: 5435 if self._match_text_seq("TO"): 5436 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5437 5438 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5439 else: 5440 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5441 elif type_token == TokenType.VOID: 5442 this = exp.DataType(this=exp.DataType.Type.NULL) 5443 5444 if maybe_func and check_func: 5445 index2 = self._index 5446 peek = self._parse_string() 5447 5448 if not peek: 5449 self._retreat(index) 5450 return None 5451 5452 self._retreat(index2) 5453 5454 if not this: 5455 if self._match_text_seq("UNSIGNED"): 5456 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5457 if not unsigned_type_token: 5458 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5459 5460 type_token = unsigned_type_token or type_token 5461 5462 this = exp.DataType( 5463 this=exp.DataType.Type[type_token.value], 5464 expressions=expressions, 5465 nested=nested, 5466 prefix=prefix, 5467 ) 5468 5469 # Empty arrays/structs are allowed 5470 if values is not None: 5471 cls = exp.Struct if is_struct else exp.Array 5472 this = exp.cast(cls(expressions=values), this, copy=False) 5473 5474 elif expressions: 5475 this.set("expressions", expressions) 5476 5477 # https://materialize.com/docs/sql/types/list/#type-name 5478 while self._match(TokenType.LIST): 5479 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5480 5481 index = self._index 5482 5483 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5484 matched_array = self._match(TokenType.ARRAY) 5485 5486 while self._curr: 5487 datatype_token = self._prev.token_type 5488 matched_l_bracket = self._match(TokenType.L_BRACKET) 5489 5490 if (not matched_l_bracket and not matched_array) or ( 5491 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5492 ): 5493 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5494 # not to be confused with the fixed size array parsing 5495 break 5496 5497 matched_array = False 5498 values = self._parse_csv(self._parse_assignment) or None 5499 if ( 5500 values 5501 and not schema 5502 and ( 5503 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5504 ) 5505 ): 5506 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5507 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5508 self._retreat(index) 5509 break 5510 5511 this = exp.DataType( 5512 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5513 ) 5514 self._match(TokenType.R_BRACKET) 5515 5516 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5517 converter = self.TYPE_CONVERTERS.get(this.this) 5518 if converter: 5519 this = converter(t.cast(exp.DataType, this)) 5520 5521 return this 5522 5523 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5524 index = self._index 5525 5526 if ( 5527 self._curr 5528 and self._next 5529 and self._curr.token_type in self.TYPE_TOKENS 5530 and self._next.token_type in self.TYPE_TOKENS 5531 ): 5532 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5533 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5534 this = self._parse_id_var() 5535 else: 5536 this = ( 5537 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5538 or self._parse_id_var() 5539 ) 5540 5541 self._match(TokenType.COLON) 5542 5543 if ( 5544 type_required 5545 and not isinstance(this, exp.DataType) 5546 and not self._match_set(self.TYPE_TOKENS, advance=False) 5547 ): 5548 self._retreat(index) 5549 return self._parse_types() 5550 5551 return self._parse_column_def(this) 5552 5553 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5554 if not self._match_text_seq("AT", "TIME", "ZONE"): 5555 return this 5556 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5557 5558 def _parse_column(self) -> t.Optional[exp.Expression]: 5559 this = self._parse_column_reference() 5560 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5561 5562 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5563 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5564 5565 return column 5566 5567 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5568 this = self._parse_field() 5569 if ( 5570 not this 5571 and self._match(TokenType.VALUES, advance=False) 5572 and self.VALUES_FOLLOWED_BY_PAREN 5573 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5574 ): 5575 this = self._parse_id_var() 5576 5577 if isinstance(this, exp.Identifier): 5578 # We bubble up comments from the Identifier to the Column 5579 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5580 5581 return this 5582 5583 def _parse_colon_as_variant_extract( 5584 self, this: t.Optional[exp.Expression] 5585 ) -> t.Optional[exp.Expression]: 5586 casts = [] 5587 json_path = [] 5588 escape = None 5589 5590 while self._match(TokenType.COLON): 5591 start_index = self._index 5592 5593 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5594 path = self._parse_column_ops( 5595 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5596 ) 5597 5598 # The cast :: operator has a lower precedence than the extraction operator :, so 5599 # we rearrange the AST appropriately to avoid casting the JSON path 5600 while isinstance(path, exp.Cast): 5601 casts.append(path.to) 5602 path = path.this 5603 5604 if casts: 5605 dcolon_offset = next( 5606 i 5607 for i, t in enumerate(self._tokens[start_index:]) 5608 if t.token_type == TokenType.DCOLON 5609 ) 5610 end_token = self._tokens[start_index + dcolon_offset - 1] 5611 else: 5612 end_token = self._prev 5613 5614 if path: 5615 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5616 # it'll roundtrip to a string literal in GET_PATH 5617 if isinstance(path, exp.Identifier) and path.quoted: 5618 escape = True 5619 5620 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5621 5622 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5623 # Databricks transforms it back to the colon/dot notation 5624 if json_path: 5625 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5626 5627 if json_path_expr: 5628 json_path_expr.set("escape", escape) 5629 5630 this = self.expression( 5631 exp.JSONExtract, 5632 this=this, 5633 expression=json_path_expr, 5634 variant_extract=True, 5635 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5636 ) 5637 5638 while casts: 5639 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5640 5641 return this 5642 5643 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5644 return self._parse_types() 5645 5646 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5647 this = self._parse_bracket(this) 5648 5649 while self._match_set(self.COLUMN_OPERATORS): 5650 op_token = self._prev.token_type 5651 op = self.COLUMN_OPERATORS.get(op_token) 5652 5653 if op_token in self.CAST_COLUMN_OPERATORS: 5654 field = self._parse_dcolon() 5655 if not field: 5656 self.raise_error("Expected type") 5657 elif op and self._curr: 5658 field = self._parse_column_reference() or self._parse_bracket() 5659 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5660 field = self._parse_column_ops(field) 5661 else: 5662 field = self._parse_field(any_token=True, anonymous_func=True) 5663 5664 # Function calls can be qualified, e.g., x.y.FOO() 5665 # This converts the final AST to a series of Dots leading to the function call 5666 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5667 if isinstance(field, (exp.Func, exp.Window)) and this: 5668 this = this.transform( 5669 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5670 ) 5671 5672 if op: 5673 this = op(self, this, field) 5674 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5675 this = self.expression( 5676 exp.Column, 5677 comments=this.comments, 5678 this=field, 5679 table=this.this, 5680 db=this.args.get("table"), 5681 catalog=this.args.get("db"), 5682 ) 5683 elif isinstance(field, exp.Window): 5684 # Move the exp.Dot's to the window's function 5685 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5686 field.set("this", window_func) 5687 this = field 5688 else: 5689 this = self.expression(exp.Dot, this=this, expression=field) 5690 5691 if field and field.comments: 5692 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5693 5694 this = self._parse_bracket(this) 5695 5696 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5697 5698 def _parse_paren(self) -> t.Optional[exp.Expression]: 5699 if not self._match(TokenType.L_PAREN): 5700 return None 5701 5702 comments = self._prev_comments 5703 query = self._parse_select() 5704 5705 if query: 5706 expressions = [query] 5707 else: 5708 expressions = self._parse_expressions() 5709 5710 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5711 5712 if not this and self._match(TokenType.R_PAREN, advance=False): 5713 this = self.expression(exp.Tuple) 5714 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5715 this = self._parse_subquery(this=this, parse_alias=False) 5716 elif isinstance(this, exp.Subquery): 5717 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5718 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5719 this = self.expression(exp.Tuple, expressions=expressions) 5720 else: 5721 this = self.expression(exp.Paren, this=this) 5722 5723 if this: 5724 this.add_comments(comments) 5725 5726 self._match_r_paren(expression=this) 5727 return this 5728 5729 def _parse_primary(self) -> t.Optional[exp.Expression]: 5730 if self._match_set(self.PRIMARY_PARSERS): 5731 token_type = self._prev.token_type 5732 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5733 5734 if token_type == TokenType.STRING: 5735 expressions = [primary] 5736 while self._match(TokenType.STRING): 5737 expressions.append(exp.Literal.string(self._prev.text)) 5738 5739 if len(expressions) > 1: 5740 return self.expression(exp.Concat, expressions=expressions) 5741 5742 return primary 5743 5744 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5745 return exp.Literal.number(f"0.{self._prev.text}") 5746 5747 return self._parse_paren() 5748 5749 def _parse_field( 5750 self, 5751 any_token: bool = False, 5752 tokens: t.Optional[t.Collection[TokenType]] = None, 5753 anonymous_func: bool = False, 5754 ) -> t.Optional[exp.Expression]: 5755 if anonymous_func: 5756 field = ( 5757 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5758 or self._parse_primary() 5759 ) 5760 else: 5761 field = self._parse_primary() or self._parse_function( 5762 anonymous=anonymous_func, any_token=any_token 5763 ) 5764 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5765 5766 def _parse_function( 5767 self, 5768 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5769 anonymous: bool = False, 5770 optional_parens: bool = True, 5771 any_token: bool = False, 5772 ) -> t.Optional[exp.Expression]: 5773 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5774 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5775 fn_syntax = False 5776 if ( 5777 self._match(TokenType.L_BRACE, advance=False) 5778 and self._next 5779 and self._next.text.upper() == "FN" 5780 ): 5781 self._advance(2) 5782 fn_syntax = True 5783 5784 func = self._parse_function_call( 5785 functions=functions, 5786 anonymous=anonymous, 5787 optional_parens=optional_parens, 5788 any_token=any_token, 5789 ) 5790 5791 if fn_syntax: 5792 self._match(TokenType.R_BRACE) 5793 5794 return func 5795 5796 def _parse_function_call( 5797 self, 5798 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5799 anonymous: bool = False, 5800 optional_parens: bool = True, 5801 any_token: bool = False, 5802 ) -> t.Optional[exp.Expression]: 5803 if not self._curr: 5804 return None 5805 5806 comments = self._curr.comments 5807 prev = self._prev 5808 token = self._curr 5809 token_type = self._curr.token_type 5810 this = self._curr.text 5811 upper = this.upper() 5812 5813 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5814 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5815 self._advance() 5816 return self._parse_window(parser(self)) 5817 5818 if not self._next or self._next.token_type != TokenType.L_PAREN: 5819 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5820 self._advance() 5821 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5822 5823 return None 5824 5825 if any_token: 5826 if token_type in self.RESERVED_TOKENS: 5827 return None 5828 elif token_type not in self.FUNC_TOKENS: 5829 return None 5830 5831 self._advance(2) 5832 5833 parser = self.FUNCTION_PARSERS.get(upper) 5834 if parser and not anonymous: 5835 this = parser(self) 5836 else: 5837 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5838 5839 if subquery_predicate: 5840 expr = None 5841 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5842 expr = self._parse_select() 5843 self._match_r_paren() 5844 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5845 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5846 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5847 self._advance(-1) 5848 expr = self._parse_bitwise() 5849 5850 if expr: 5851 return self.expression(subquery_predicate, comments=comments, this=expr) 5852 5853 if functions is None: 5854 functions = self.FUNCTIONS 5855 5856 function = functions.get(upper) 5857 known_function = function and not anonymous 5858 5859 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5860 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5861 5862 post_func_comments = self._curr and self._curr.comments 5863 if known_function and post_func_comments: 5864 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5865 # call we'll construct it as exp.Anonymous, even if it's "known" 5866 if any( 5867 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5868 for comment in post_func_comments 5869 ): 5870 known_function = False 5871 5872 if alias and known_function: 5873 args = self._kv_to_prop_eq(args) 5874 5875 if known_function: 5876 func_builder = t.cast(t.Callable, function) 5877 5878 if "dialect" in func_builder.__code__.co_varnames: 5879 func = func_builder(args, dialect=self.dialect) 5880 else: 5881 func = func_builder(args) 5882 5883 func = self.validate_expression(func, args) 5884 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5885 func.meta["name"] = this 5886 5887 this = func 5888 else: 5889 if token_type == TokenType.IDENTIFIER: 5890 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5891 5892 this = self.expression(exp.Anonymous, this=this, expressions=args) 5893 this = this.update_positions(token) 5894 5895 if isinstance(this, exp.Expression): 5896 this.add_comments(comments) 5897 5898 self._match_r_paren(this) 5899 return self._parse_window(this) 5900 5901 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5902 return expression 5903 5904 def _kv_to_prop_eq( 5905 self, expressions: t.List[exp.Expression], parse_map: bool = False 5906 ) -> t.List[exp.Expression]: 5907 transformed = [] 5908 5909 for index, e in enumerate(expressions): 5910 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5911 if isinstance(e, exp.Alias): 5912 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5913 5914 if not isinstance(e, exp.PropertyEQ): 5915 e = self.expression( 5916 exp.PropertyEQ, 5917 this=e.this if parse_map else exp.to_identifier(e.this.name), 5918 expression=e.expression, 5919 ) 5920 5921 if isinstance(e.this, exp.Column): 5922 e.this.replace(e.this.this) 5923 else: 5924 e = self._to_prop_eq(e, index) 5925 5926 transformed.append(e) 5927 5928 return transformed 5929 5930 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5931 return self._parse_statement() 5932 5933 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5934 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5935 5936 def _parse_user_defined_function( 5937 self, kind: t.Optional[TokenType] = None 5938 ) -> t.Optional[exp.Expression]: 5939 this = self._parse_table_parts(schema=True) 5940 5941 if not self._match(TokenType.L_PAREN): 5942 return this 5943 5944 expressions = self._parse_csv(self._parse_function_parameter) 5945 self._match_r_paren() 5946 return self.expression( 5947 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5948 ) 5949 5950 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5951 literal = self._parse_primary() 5952 if literal: 5953 return self.expression(exp.Introducer, this=token.text, expression=literal) 5954 5955 return self._identifier_expression(token) 5956 5957 def _parse_session_parameter(self) -> exp.SessionParameter: 5958 kind = None 5959 this = self._parse_id_var() or self._parse_primary() 5960 5961 if this and self._match(TokenType.DOT): 5962 kind = this.name 5963 this = self._parse_var() or self._parse_primary() 5964 5965 return self.expression(exp.SessionParameter, this=this, kind=kind) 5966 5967 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5968 return self._parse_id_var() 5969 5970 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5971 index = self._index 5972 5973 if self._match(TokenType.L_PAREN): 5974 expressions = t.cast( 5975 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5976 ) 5977 5978 if not self._match(TokenType.R_PAREN): 5979 self._retreat(index) 5980 else: 5981 expressions = [self._parse_lambda_arg()] 5982 5983 if self._match_set(self.LAMBDAS): 5984 return self.LAMBDAS[self._prev.token_type](self, expressions) 5985 5986 self._retreat(index) 5987 5988 this: t.Optional[exp.Expression] 5989 5990 if self._match(TokenType.DISTINCT): 5991 this = self.expression( 5992 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5993 ) 5994 else: 5995 this = self._parse_select_or_expression(alias=alias) 5996 5997 return self._parse_limit( 5998 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5999 ) 6000 6001 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6002 index = self._index 6003 if not self._match(TokenType.L_PAREN): 6004 return this 6005 6006 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6007 # expr can be of both types 6008 if self._match_set(self.SELECT_START_TOKENS): 6009 self._retreat(index) 6010 return this 6011 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6012 self._match_r_paren() 6013 return self.expression(exp.Schema, this=this, expressions=args) 6014 6015 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6016 return self._parse_column_def(self._parse_field(any_token=True)) 6017 6018 def _parse_column_def( 6019 self, this: t.Optional[exp.Expression], computed_column: bool = True 6020 ) -> t.Optional[exp.Expression]: 6021 # column defs are not really columns, they're identifiers 6022 if isinstance(this, exp.Column): 6023 this = this.this 6024 6025 if not computed_column: 6026 self._match(TokenType.ALIAS) 6027 6028 kind = self._parse_types(schema=True) 6029 6030 if self._match_text_seq("FOR", "ORDINALITY"): 6031 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6032 6033 constraints: t.List[exp.Expression] = [] 6034 6035 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6036 ("ALIAS", "MATERIALIZED") 6037 ): 6038 persisted = self._prev.text.upper() == "MATERIALIZED" 6039 constraint_kind = exp.ComputedColumnConstraint( 6040 this=self._parse_assignment(), 6041 persisted=persisted or self._match_text_seq("PERSISTED"), 6042 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6043 ) 6044 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6045 elif ( 6046 kind 6047 and self._match(TokenType.ALIAS, advance=False) 6048 and ( 6049 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6050 or (self._next and self._next.token_type == TokenType.L_PAREN) 6051 ) 6052 ): 6053 self._advance() 6054 constraints.append( 6055 self.expression( 6056 exp.ColumnConstraint, 6057 kind=exp.ComputedColumnConstraint( 6058 this=self._parse_disjunction(), 6059 persisted=self._match_texts(("STORED", "VIRTUAL")) 6060 and self._prev.text.upper() == "STORED", 6061 ), 6062 ) 6063 ) 6064 6065 while True: 6066 constraint = self._parse_column_constraint() 6067 if not constraint: 6068 break 6069 constraints.append(constraint) 6070 6071 if not kind and not constraints: 6072 return this 6073 6074 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6075 6076 def _parse_auto_increment( 6077 self, 6078 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6079 start = None 6080 increment = None 6081 order = None 6082 6083 if self._match(TokenType.L_PAREN, advance=False): 6084 args = self._parse_wrapped_csv(self._parse_bitwise) 6085 start = seq_get(args, 0) 6086 increment = seq_get(args, 1) 6087 elif self._match_text_seq("START"): 6088 start = self._parse_bitwise() 6089 self._match_text_seq("INCREMENT") 6090 increment = self._parse_bitwise() 6091 if self._match_text_seq("ORDER"): 6092 order = True 6093 elif self._match_text_seq("NOORDER"): 6094 order = False 6095 6096 if start and increment: 6097 return exp.GeneratedAsIdentityColumnConstraint( 6098 start=start, increment=increment, this=False, order=order 6099 ) 6100 6101 return exp.AutoIncrementColumnConstraint() 6102 6103 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6104 if not self._match_text_seq("REFRESH"): 6105 self._retreat(self._index - 1) 6106 return None 6107 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6108 6109 def _parse_compress(self) -> exp.CompressColumnConstraint: 6110 if self._match(TokenType.L_PAREN, advance=False): 6111 return self.expression( 6112 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6113 ) 6114 6115 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6116 6117 def _parse_generated_as_identity( 6118 self, 6119 ) -> ( 6120 exp.GeneratedAsIdentityColumnConstraint 6121 | exp.ComputedColumnConstraint 6122 | exp.GeneratedAsRowColumnConstraint 6123 ): 6124 if self._match_text_seq("BY", "DEFAULT"): 6125 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6126 this = self.expression( 6127 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6128 ) 6129 else: 6130 self._match_text_seq("ALWAYS") 6131 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6132 6133 self._match(TokenType.ALIAS) 6134 6135 if self._match_text_seq("ROW"): 6136 start = self._match_text_seq("START") 6137 if not start: 6138 self._match(TokenType.END) 6139 hidden = self._match_text_seq("HIDDEN") 6140 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6141 6142 identity = self._match_text_seq("IDENTITY") 6143 6144 if self._match(TokenType.L_PAREN): 6145 if self._match(TokenType.START_WITH): 6146 this.set("start", self._parse_bitwise()) 6147 if self._match_text_seq("INCREMENT", "BY"): 6148 this.set("increment", self._parse_bitwise()) 6149 if self._match_text_seq("MINVALUE"): 6150 this.set("minvalue", self._parse_bitwise()) 6151 if self._match_text_seq("MAXVALUE"): 6152 this.set("maxvalue", self._parse_bitwise()) 6153 6154 if self._match_text_seq("CYCLE"): 6155 this.set("cycle", True) 6156 elif self._match_text_seq("NO", "CYCLE"): 6157 this.set("cycle", False) 6158 6159 if not identity: 6160 this.set("expression", self._parse_range()) 6161 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6162 args = self._parse_csv(self._parse_bitwise) 6163 this.set("start", seq_get(args, 0)) 6164 this.set("increment", seq_get(args, 1)) 6165 6166 self._match_r_paren() 6167 6168 return this 6169 6170 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6171 self._match_text_seq("LENGTH") 6172 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6173 6174 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6175 if self._match_text_seq("NULL"): 6176 return self.expression(exp.NotNullColumnConstraint) 6177 if self._match_text_seq("CASESPECIFIC"): 6178 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6179 if self._match_text_seq("FOR", "REPLICATION"): 6180 return self.expression(exp.NotForReplicationColumnConstraint) 6181 6182 # Unconsume the `NOT` token 6183 self._retreat(self._index - 1) 6184 return None 6185 6186 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6187 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6188 6189 procedure_option_follows = ( 6190 self._match(TokenType.WITH, advance=False) 6191 and self._next 6192 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6193 ) 6194 6195 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6196 return self.expression( 6197 exp.ColumnConstraint, 6198 this=this, 6199 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6200 ) 6201 6202 return this 6203 6204 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6205 if not self._match(TokenType.CONSTRAINT): 6206 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6207 6208 return self.expression( 6209 exp.Constraint, 6210 this=self._parse_id_var(), 6211 expressions=self._parse_unnamed_constraints(), 6212 ) 6213 6214 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6215 constraints = [] 6216 while True: 6217 constraint = self._parse_unnamed_constraint() or self._parse_function() 6218 if not constraint: 6219 break 6220 constraints.append(constraint) 6221 6222 return constraints 6223 6224 def _parse_unnamed_constraint( 6225 self, constraints: t.Optional[t.Collection[str]] = None 6226 ) -> t.Optional[exp.Expression]: 6227 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6228 constraints or self.CONSTRAINT_PARSERS 6229 ): 6230 return None 6231 6232 constraint = self._prev.text.upper() 6233 if constraint not in self.CONSTRAINT_PARSERS: 6234 self.raise_error(f"No parser found for schema constraint {constraint}.") 6235 6236 return self.CONSTRAINT_PARSERS[constraint](self) 6237 6238 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6239 return self._parse_id_var(any_token=False) 6240 6241 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6242 self._match_texts(("KEY", "INDEX")) 6243 return self.expression( 6244 exp.UniqueColumnConstraint, 6245 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6246 this=self._parse_schema(self._parse_unique_key()), 6247 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6248 on_conflict=self._parse_on_conflict(), 6249 options=self._parse_key_constraint_options(), 6250 ) 6251 6252 def _parse_key_constraint_options(self) -> t.List[str]: 6253 options = [] 6254 while True: 6255 if not self._curr: 6256 break 6257 6258 if self._match(TokenType.ON): 6259 action = None 6260 on = self._advance_any() and self._prev.text 6261 6262 if self._match_text_seq("NO", "ACTION"): 6263 action = "NO ACTION" 6264 elif self._match_text_seq("CASCADE"): 6265 action = "CASCADE" 6266 elif self._match_text_seq("RESTRICT"): 6267 action = "RESTRICT" 6268 elif self._match_pair(TokenType.SET, TokenType.NULL): 6269 action = "SET NULL" 6270 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6271 action = "SET DEFAULT" 6272 else: 6273 self.raise_error("Invalid key constraint") 6274 6275 options.append(f"ON {on} {action}") 6276 else: 6277 var = self._parse_var_from_options( 6278 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6279 ) 6280 if not var: 6281 break 6282 options.append(var.name) 6283 6284 return options 6285 6286 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6287 if match and not self._match(TokenType.REFERENCES): 6288 return None 6289 6290 expressions = None 6291 this = self._parse_table(schema=True) 6292 options = self._parse_key_constraint_options() 6293 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6294 6295 def _parse_foreign_key(self) -> exp.ForeignKey: 6296 expressions = ( 6297 self._parse_wrapped_id_vars() 6298 if not self._match(TokenType.REFERENCES, advance=False) 6299 else None 6300 ) 6301 reference = self._parse_references() 6302 on_options = {} 6303 6304 while self._match(TokenType.ON): 6305 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6306 self.raise_error("Expected DELETE or UPDATE") 6307 6308 kind = self._prev.text.lower() 6309 6310 if self._match_text_seq("NO", "ACTION"): 6311 action = "NO ACTION" 6312 elif self._match(TokenType.SET): 6313 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6314 action = "SET " + self._prev.text.upper() 6315 else: 6316 self._advance() 6317 action = self._prev.text.upper() 6318 6319 on_options[kind] = action 6320 6321 return self.expression( 6322 exp.ForeignKey, 6323 expressions=expressions, 6324 reference=reference, 6325 options=self._parse_key_constraint_options(), 6326 **on_options, # type: ignore 6327 ) 6328 6329 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6330 return self._parse_ordered() or self._parse_field() 6331 6332 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6333 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6334 self._retreat(self._index - 1) 6335 return None 6336 6337 id_vars = self._parse_wrapped_id_vars() 6338 return self.expression( 6339 exp.PeriodForSystemTimeConstraint, 6340 this=seq_get(id_vars, 0), 6341 expression=seq_get(id_vars, 1), 6342 ) 6343 6344 def _parse_primary_key( 6345 self, wrapped_optional: bool = False, in_props: bool = False 6346 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6347 desc = ( 6348 self._match_set((TokenType.ASC, TokenType.DESC)) 6349 and self._prev.token_type == TokenType.DESC 6350 ) 6351 6352 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6353 return self.expression( 6354 exp.PrimaryKeyColumnConstraint, 6355 desc=desc, 6356 options=self._parse_key_constraint_options(), 6357 ) 6358 6359 expressions = self._parse_wrapped_csv( 6360 self._parse_primary_key_part, optional=wrapped_optional 6361 ) 6362 6363 return self.expression( 6364 exp.PrimaryKey, 6365 expressions=expressions, 6366 include=self._parse_index_params(), 6367 options=self._parse_key_constraint_options(), 6368 ) 6369 6370 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6371 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6372 6373 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6374 """ 6375 Parses a datetime column in ODBC format. We parse the column into the corresponding 6376 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6377 same as we did for `DATE('yyyy-mm-dd')`. 6378 6379 Reference: 6380 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6381 """ 6382 self._match(TokenType.VAR) 6383 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6384 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6385 if not self._match(TokenType.R_BRACE): 6386 self.raise_error("Expected }") 6387 return expression 6388 6389 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6390 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6391 return this 6392 6393 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6394 map_token = seq_get(self._tokens, self._index - 2) 6395 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6396 else: 6397 parse_map = False 6398 6399 bracket_kind = self._prev.token_type 6400 if ( 6401 bracket_kind == TokenType.L_BRACE 6402 and self._curr 6403 and self._curr.token_type == TokenType.VAR 6404 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6405 ): 6406 return self._parse_odbc_datetime_literal() 6407 6408 expressions = self._parse_csv( 6409 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6410 ) 6411 6412 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6413 self.raise_error("Expected ]") 6414 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6415 self.raise_error("Expected }") 6416 6417 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6418 if bracket_kind == TokenType.L_BRACE: 6419 this = self.expression( 6420 exp.Struct, 6421 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6422 ) 6423 elif not this: 6424 this = build_array_constructor( 6425 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6426 ) 6427 else: 6428 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6429 if constructor_type: 6430 return build_array_constructor( 6431 constructor_type, 6432 args=expressions, 6433 bracket_kind=bracket_kind, 6434 dialect=self.dialect, 6435 ) 6436 6437 expressions = apply_index_offset( 6438 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6439 ) 6440 this = self.expression( 6441 exp.Bracket, 6442 this=this, 6443 expressions=expressions, 6444 comments=this.pop_comments(), 6445 ) 6446 6447 self._add_comments(this) 6448 return self._parse_bracket(this) 6449 6450 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6451 if self._match(TokenType.COLON): 6452 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6453 return this 6454 6455 def _parse_case(self) -> t.Optional[exp.Expression]: 6456 ifs = [] 6457 default = None 6458 6459 comments = self._prev_comments 6460 expression = self._parse_assignment() 6461 6462 while self._match(TokenType.WHEN): 6463 this = self._parse_assignment() 6464 self._match(TokenType.THEN) 6465 then = self._parse_assignment() 6466 ifs.append(self.expression(exp.If, this=this, true=then)) 6467 6468 if self._match(TokenType.ELSE): 6469 default = self._parse_assignment() 6470 6471 if not self._match(TokenType.END): 6472 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6473 default = exp.column("interval") 6474 else: 6475 self.raise_error("Expected END after CASE", self._prev) 6476 6477 return self.expression( 6478 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6479 ) 6480 6481 def _parse_if(self) -> t.Optional[exp.Expression]: 6482 if self._match(TokenType.L_PAREN): 6483 args = self._parse_csv( 6484 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6485 ) 6486 this = self.validate_expression(exp.If.from_arg_list(args), args) 6487 self._match_r_paren() 6488 else: 6489 index = self._index - 1 6490 6491 if self.NO_PAREN_IF_COMMANDS and index == 0: 6492 return self._parse_as_command(self._prev) 6493 6494 condition = self._parse_assignment() 6495 6496 if not condition: 6497 self._retreat(index) 6498 return None 6499 6500 self._match(TokenType.THEN) 6501 true = self._parse_assignment() 6502 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6503 self._match(TokenType.END) 6504 this = self.expression(exp.If, this=condition, true=true, false=false) 6505 6506 return this 6507 6508 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6509 if not self._match_text_seq("VALUE", "FOR"): 6510 self._retreat(self._index - 1) 6511 return None 6512 6513 return self.expression( 6514 exp.NextValueFor, 6515 this=self._parse_column(), 6516 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6517 ) 6518 6519 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6520 this = self._parse_function() or self._parse_var_or_string(upper=True) 6521 6522 if self._match(TokenType.FROM): 6523 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6524 6525 if not self._match(TokenType.COMMA): 6526 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6527 6528 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6529 6530 def _parse_gap_fill(self) -> exp.GapFill: 6531 self._match(TokenType.TABLE) 6532 this = self._parse_table() 6533 6534 self._match(TokenType.COMMA) 6535 args = [this, *self._parse_csv(self._parse_lambda)] 6536 6537 gap_fill = exp.GapFill.from_arg_list(args) 6538 return self.validate_expression(gap_fill, args) 6539 6540 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6541 this = self._parse_assignment() 6542 6543 if not self._match(TokenType.ALIAS): 6544 if self._match(TokenType.COMMA): 6545 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6546 6547 self.raise_error("Expected AS after CAST") 6548 6549 fmt = None 6550 to = self._parse_types() 6551 6552 default = self._match(TokenType.DEFAULT) 6553 if default: 6554 default = self._parse_bitwise() 6555 self._match_text_seq("ON", "CONVERSION", "ERROR") 6556 6557 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6558 fmt_string = self._parse_string() 6559 fmt = self._parse_at_time_zone(fmt_string) 6560 6561 if not to: 6562 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6563 if to.this in exp.DataType.TEMPORAL_TYPES: 6564 this = self.expression( 6565 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6566 this=this, 6567 format=exp.Literal.string( 6568 format_time( 6569 fmt_string.this if fmt_string else "", 6570 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6571 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6572 ) 6573 ), 6574 safe=safe, 6575 ) 6576 6577 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6578 this.set("zone", fmt.args["zone"]) 6579 return this 6580 elif not to: 6581 self.raise_error("Expected TYPE after CAST") 6582 elif isinstance(to, exp.Identifier): 6583 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6584 elif to.this == exp.DataType.Type.CHAR: 6585 if self._match(TokenType.CHARACTER_SET): 6586 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6587 6588 return self.build_cast( 6589 strict=strict, 6590 this=this, 6591 to=to, 6592 format=fmt, 6593 safe=safe, 6594 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6595 default=default, 6596 ) 6597 6598 def _parse_string_agg(self) -> exp.GroupConcat: 6599 if self._match(TokenType.DISTINCT): 6600 args: t.List[t.Optional[exp.Expression]] = [ 6601 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6602 ] 6603 if self._match(TokenType.COMMA): 6604 args.extend(self._parse_csv(self._parse_assignment)) 6605 else: 6606 args = self._parse_csv(self._parse_assignment) # type: ignore 6607 6608 if self._match_text_seq("ON", "OVERFLOW"): 6609 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6610 if self._match_text_seq("ERROR"): 6611 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6612 else: 6613 self._match_text_seq("TRUNCATE") 6614 on_overflow = self.expression( 6615 exp.OverflowTruncateBehavior, 6616 this=self._parse_string(), 6617 with_count=( 6618 self._match_text_seq("WITH", "COUNT") 6619 or not self._match_text_seq("WITHOUT", "COUNT") 6620 ), 6621 ) 6622 else: 6623 on_overflow = None 6624 6625 index = self._index 6626 if not self._match(TokenType.R_PAREN) and args: 6627 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6628 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6629 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6630 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6631 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6632 6633 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6634 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6635 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6636 if not self._match_text_seq("WITHIN", "GROUP"): 6637 self._retreat(index) 6638 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6639 6640 # The corresponding match_r_paren will be called in parse_function (caller) 6641 self._match_l_paren() 6642 6643 return self.expression( 6644 exp.GroupConcat, 6645 this=self._parse_order(this=seq_get(args, 0)), 6646 separator=seq_get(args, 1), 6647 on_overflow=on_overflow, 6648 ) 6649 6650 def _parse_convert( 6651 self, strict: bool, safe: t.Optional[bool] = None 6652 ) -> t.Optional[exp.Expression]: 6653 this = self._parse_bitwise() 6654 6655 if self._match(TokenType.USING): 6656 to: t.Optional[exp.Expression] = self.expression( 6657 exp.CharacterSet, this=self._parse_var() 6658 ) 6659 elif self._match(TokenType.COMMA): 6660 to = self._parse_types() 6661 else: 6662 to = None 6663 6664 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6665 6666 def _parse_xml_table(self) -> exp.XMLTable: 6667 namespaces = None 6668 passing = None 6669 columns = None 6670 6671 if self._match_text_seq("XMLNAMESPACES", "("): 6672 namespaces = self._parse_xml_namespace() 6673 self._match_text_seq(")", ",") 6674 6675 this = self._parse_string() 6676 6677 if self._match_text_seq("PASSING"): 6678 # The BY VALUE keywords are optional and are provided for semantic clarity 6679 self._match_text_seq("BY", "VALUE") 6680 passing = self._parse_csv(self._parse_column) 6681 6682 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6683 6684 if self._match_text_seq("COLUMNS"): 6685 columns = self._parse_csv(self._parse_field_def) 6686 6687 return self.expression( 6688 exp.XMLTable, 6689 this=this, 6690 namespaces=namespaces, 6691 passing=passing, 6692 columns=columns, 6693 by_ref=by_ref, 6694 ) 6695 6696 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6697 namespaces = [] 6698 6699 while True: 6700 if self._match(TokenType.DEFAULT): 6701 uri = self._parse_string() 6702 else: 6703 uri = self._parse_alias(self._parse_string()) 6704 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6705 if not self._match(TokenType.COMMA): 6706 break 6707 6708 return namespaces 6709 6710 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6711 args = self._parse_csv(self._parse_assignment) 6712 6713 if len(args) < 3: 6714 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6715 6716 return self.expression(exp.DecodeCase, expressions=args) 6717 6718 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6719 self._match_text_seq("KEY") 6720 key = self._parse_column() 6721 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6722 self._match_text_seq("VALUE") 6723 value = self._parse_bitwise() 6724 6725 if not key and not value: 6726 return None 6727 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6728 6729 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6730 if not this or not self._match_text_seq("FORMAT", "JSON"): 6731 return this 6732 6733 return self.expression(exp.FormatJson, this=this) 6734 6735 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6736 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6737 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6738 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6739 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6740 else: 6741 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6742 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6743 6744 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6745 6746 if not empty and not error and not null: 6747 return None 6748 6749 return self.expression( 6750 exp.OnCondition, 6751 empty=empty, 6752 error=error, 6753 null=null, 6754 ) 6755 6756 def _parse_on_handling( 6757 self, on: str, *values: str 6758 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6759 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6760 for value in values: 6761 if self._match_text_seq(value, "ON", on): 6762 return f"{value} ON {on}" 6763 6764 index = self._index 6765 if self._match(TokenType.DEFAULT): 6766 default_value = self._parse_bitwise() 6767 if self._match_text_seq("ON", on): 6768 return default_value 6769 6770 self._retreat(index) 6771 6772 return None 6773 6774 @t.overload 6775 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6776 6777 @t.overload 6778 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6779 6780 def _parse_json_object(self, agg=False): 6781 star = self._parse_star() 6782 expressions = ( 6783 [star] 6784 if star 6785 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6786 ) 6787 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6788 6789 unique_keys = None 6790 if self._match_text_seq("WITH", "UNIQUE"): 6791 unique_keys = True 6792 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6793 unique_keys = False 6794 6795 self._match_text_seq("KEYS") 6796 6797 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6798 self._parse_type() 6799 ) 6800 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6801 6802 return self.expression( 6803 exp.JSONObjectAgg if agg else exp.JSONObject, 6804 expressions=expressions, 6805 null_handling=null_handling, 6806 unique_keys=unique_keys, 6807 return_type=return_type, 6808 encoding=encoding, 6809 ) 6810 6811 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6812 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6813 if not self._match_text_seq("NESTED"): 6814 this = self._parse_id_var() 6815 kind = self._parse_types(allow_identifiers=False) 6816 nested = None 6817 else: 6818 this = None 6819 kind = None 6820 nested = True 6821 6822 path = self._match_text_seq("PATH") and self._parse_string() 6823 nested_schema = nested and self._parse_json_schema() 6824 6825 return self.expression( 6826 exp.JSONColumnDef, 6827 this=this, 6828 kind=kind, 6829 path=path, 6830 nested_schema=nested_schema, 6831 ) 6832 6833 def _parse_json_schema(self) -> exp.JSONSchema: 6834 self._match_text_seq("COLUMNS") 6835 return self.expression( 6836 exp.JSONSchema, 6837 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6838 ) 6839 6840 def _parse_json_table(self) -> exp.JSONTable: 6841 this = self._parse_format_json(self._parse_bitwise()) 6842 path = self._match(TokenType.COMMA) and self._parse_string() 6843 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6844 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6845 schema = self._parse_json_schema() 6846 6847 return exp.JSONTable( 6848 this=this, 6849 schema=schema, 6850 path=path, 6851 error_handling=error_handling, 6852 empty_handling=empty_handling, 6853 ) 6854 6855 def _parse_match_against(self) -> exp.MatchAgainst: 6856 expressions = self._parse_csv(self._parse_column) 6857 6858 self._match_text_seq(")", "AGAINST", "(") 6859 6860 this = self._parse_string() 6861 6862 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6863 modifier = "IN NATURAL LANGUAGE MODE" 6864 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6865 modifier = f"{modifier} WITH QUERY EXPANSION" 6866 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6867 modifier = "IN BOOLEAN MODE" 6868 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6869 modifier = "WITH QUERY EXPANSION" 6870 else: 6871 modifier = None 6872 6873 return self.expression( 6874 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6875 ) 6876 6877 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6878 def _parse_open_json(self) -> exp.OpenJSON: 6879 this = self._parse_bitwise() 6880 path = self._match(TokenType.COMMA) and self._parse_string() 6881 6882 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6883 this = self._parse_field(any_token=True) 6884 kind = self._parse_types() 6885 path = self._parse_string() 6886 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6887 6888 return self.expression( 6889 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6890 ) 6891 6892 expressions = None 6893 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6894 self._match_l_paren() 6895 expressions = self._parse_csv(_parse_open_json_column_def) 6896 6897 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6898 6899 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6900 args = self._parse_csv(self._parse_bitwise) 6901 6902 if self._match(TokenType.IN): 6903 return self.expression( 6904 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6905 ) 6906 6907 if haystack_first: 6908 haystack = seq_get(args, 0) 6909 needle = seq_get(args, 1) 6910 else: 6911 haystack = seq_get(args, 1) 6912 needle = seq_get(args, 0) 6913 6914 return self.expression( 6915 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6916 ) 6917 6918 def _parse_predict(self) -> exp.Predict: 6919 self._match_text_seq("MODEL") 6920 this = self._parse_table() 6921 6922 self._match(TokenType.COMMA) 6923 self._match_text_seq("TABLE") 6924 6925 return self.expression( 6926 exp.Predict, 6927 this=this, 6928 expression=self._parse_table(), 6929 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6930 ) 6931 6932 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6933 args = self._parse_csv(self._parse_table) 6934 return exp.JoinHint(this=func_name.upper(), expressions=args) 6935 6936 def _parse_substring(self) -> exp.Substring: 6937 # Postgres supports the form: substring(string [from int] [for int]) 6938 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6939 6940 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6941 6942 if self._match(TokenType.FROM): 6943 args.append(self._parse_bitwise()) 6944 if self._match(TokenType.FOR): 6945 if len(args) == 1: 6946 args.append(exp.Literal.number(1)) 6947 args.append(self._parse_bitwise()) 6948 6949 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6950 6951 def _parse_trim(self) -> exp.Trim: 6952 # https://www.w3resource.com/sql/character-functions/trim.php 6953 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6954 6955 position = None 6956 collation = None 6957 expression = None 6958 6959 if self._match_texts(self.TRIM_TYPES): 6960 position = self._prev.text.upper() 6961 6962 this = self._parse_bitwise() 6963 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6964 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6965 expression = self._parse_bitwise() 6966 6967 if invert_order: 6968 this, expression = expression, this 6969 6970 if self._match(TokenType.COLLATE): 6971 collation = self._parse_bitwise() 6972 6973 return self.expression( 6974 exp.Trim, this=this, position=position, expression=expression, collation=collation 6975 ) 6976 6977 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6978 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6979 6980 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6981 return self._parse_window(self._parse_id_var(), alias=True) 6982 6983 def _parse_respect_or_ignore_nulls( 6984 self, this: t.Optional[exp.Expression] 6985 ) -> t.Optional[exp.Expression]: 6986 if self._match_text_seq("IGNORE", "NULLS"): 6987 return self.expression(exp.IgnoreNulls, this=this) 6988 if self._match_text_seq("RESPECT", "NULLS"): 6989 return self.expression(exp.RespectNulls, this=this) 6990 return this 6991 6992 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6993 if self._match(TokenType.HAVING): 6994 self._match_texts(("MAX", "MIN")) 6995 max = self._prev.text.upper() != "MIN" 6996 return self.expression( 6997 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6998 ) 6999 7000 return this 7001 7002 def _parse_window( 7003 self, this: t.Optional[exp.Expression], alias: bool = False 7004 ) -> t.Optional[exp.Expression]: 7005 func = this 7006 comments = func.comments if isinstance(func, exp.Expression) else None 7007 7008 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7009 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7010 if self._match_text_seq("WITHIN", "GROUP"): 7011 order = self._parse_wrapped(self._parse_order) 7012 this = self.expression(exp.WithinGroup, this=this, expression=order) 7013 7014 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7015 self._match(TokenType.WHERE) 7016 this = self.expression( 7017 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7018 ) 7019 self._match_r_paren() 7020 7021 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7022 # Some dialects choose to implement and some do not. 7023 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7024 7025 # There is some code above in _parse_lambda that handles 7026 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7027 7028 # The below changes handle 7029 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7030 7031 # Oracle allows both formats 7032 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7033 # and Snowflake chose to do the same for familiarity 7034 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7035 if isinstance(this, exp.AggFunc): 7036 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7037 7038 if ignore_respect and ignore_respect is not this: 7039 ignore_respect.replace(ignore_respect.this) 7040 this = self.expression(ignore_respect.__class__, this=this) 7041 7042 this = self._parse_respect_or_ignore_nulls(this) 7043 7044 # bigquery select from window x AS (partition by ...) 7045 if alias: 7046 over = None 7047 self._match(TokenType.ALIAS) 7048 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7049 return this 7050 else: 7051 over = self._prev.text.upper() 7052 7053 if comments and isinstance(func, exp.Expression): 7054 func.pop_comments() 7055 7056 if not self._match(TokenType.L_PAREN): 7057 return self.expression( 7058 exp.Window, 7059 comments=comments, 7060 this=this, 7061 alias=self._parse_id_var(False), 7062 over=over, 7063 ) 7064 7065 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7066 7067 first = self._match(TokenType.FIRST) 7068 if self._match_text_seq("LAST"): 7069 first = False 7070 7071 partition, order = self._parse_partition_and_order() 7072 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7073 7074 if kind: 7075 self._match(TokenType.BETWEEN) 7076 start = self._parse_window_spec() 7077 self._match(TokenType.AND) 7078 end = self._parse_window_spec() 7079 exclude = ( 7080 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7081 if self._match_text_seq("EXCLUDE") 7082 else None 7083 ) 7084 7085 spec = self.expression( 7086 exp.WindowSpec, 7087 kind=kind, 7088 start=start["value"], 7089 start_side=start["side"], 7090 end=end["value"], 7091 end_side=end["side"], 7092 exclude=exclude, 7093 ) 7094 else: 7095 spec = None 7096 7097 self._match_r_paren() 7098 7099 window = self.expression( 7100 exp.Window, 7101 comments=comments, 7102 this=this, 7103 partition_by=partition, 7104 order=order, 7105 spec=spec, 7106 alias=window_alias, 7107 over=over, 7108 first=first, 7109 ) 7110 7111 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7112 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7113 return self._parse_window(window, alias=alias) 7114 7115 return window 7116 7117 def _parse_partition_and_order( 7118 self, 7119 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7120 return self._parse_partition_by(), self._parse_order() 7121 7122 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7123 self._match(TokenType.BETWEEN) 7124 7125 return { 7126 "value": ( 7127 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7128 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7129 or self._parse_bitwise() 7130 ), 7131 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7132 } 7133 7134 def _parse_alias( 7135 self, this: t.Optional[exp.Expression], explicit: bool = False 7136 ) -> t.Optional[exp.Expression]: 7137 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7138 # so this section tries to parse the clause version and if it fails, it treats the token 7139 # as an identifier (alias) 7140 if self._can_parse_limit_or_offset(): 7141 return this 7142 7143 any_token = self._match(TokenType.ALIAS) 7144 comments = self._prev_comments or [] 7145 7146 if explicit and not any_token: 7147 return this 7148 7149 if self._match(TokenType.L_PAREN): 7150 aliases = self.expression( 7151 exp.Aliases, 7152 comments=comments, 7153 this=this, 7154 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7155 ) 7156 self._match_r_paren(aliases) 7157 return aliases 7158 7159 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7160 self.STRING_ALIASES and self._parse_string_as_identifier() 7161 ) 7162 7163 if alias: 7164 comments.extend(alias.pop_comments()) 7165 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7166 column = this.this 7167 7168 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7169 if not this.comments and column and column.comments: 7170 this.comments = column.pop_comments() 7171 7172 return this 7173 7174 def _parse_id_var( 7175 self, 7176 any_token: bool = True, 7177 tokens: t.Optional[t.Collection[TokenType]] = None, 7178 ) -> t.Optional[exp.Expression]: 7179 expression = self._parse_identifier() 7180 if not expression and ( 7181 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7182 ): 7183 quoted = self._prev.token_type == TokenType.STRING 7184 expression = self._identifier_expression(quoted=quoted) 7185 7186 return expression 7187 7188 def _parse_string(self) -> t.Optional[exp.Expression]: 7189 if self._match_set(self.STRING_PARSERS): 7190 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7191 return self._parse_placeholder() 7192 7193 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7194 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7195 if output: 7196 output.update_positions(self._prev) 7197 return output 7198 7199 def _parse_number(self) -> t.Optional[exp.Expression]: 7200 if self._match_set(self.NUMERIC_PARSERS): 7201 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7202 return self._parse_placeholder() 7203 7204 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7205 if self._match(TokenType.IDENTIFIER): 7206 return self._identifier_expression(quoted=True) 7207 return self._parse_placeholder() 7208 7209 def _parse_var( 7210 self, 7211 any_token: bool = False, 7212 tokens: t.Optional[t.Collection[TokenType]] = None, 7213 upper: bool = False, 7214 ) -> t.Optional[exp.Expression]: 7215 if ( 7216 (any_token and self._advance_any()) 7217 or self._match(TokenType.VAR) 7218 or (self._match_set(tokens) if tokens else False) 7219 ): 7220 return self.expression( 7221 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7222 ) 7223 return self._parse_placeholder() 7224 7225 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7226 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7227 self._advance() 7228 return self._prev 7229 return None 7230 7231 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7232 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7233 7234 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7235 return self._parse_primary() or self._parse_var(any_token=True) 7236 7237 def _parse_null(self) -> t.Optional[exp.Expression]: 7238 if self._match_set(self.NULL_TOKENS): 7239 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7240 return self._parse_placeholder() 7241 7242 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7243 if self._match(TokenType.TRUE): 7244 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7245 if self._match(TokenType.FALSE): 7246 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7247 return self._parse_placeholder() 7248 7249 def _parse_star(self) -> t.Optional[exp.Expression]: 7250 if self._match(TokenType.STAR): 7251 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7252 return self._parse_placeholder() 7253 7254 def _parse_parameter(self) -> exp.Parameter: 7255 this = self._parse_identifier() or self._parse_primary_or_var() 7256 return self.expression(exp.Parameter, this=this) 7257 7258 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7259 if self._match_set(self.PLACEHOLDER_PARSERS): 7260 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7261 if placeholder: 7262 return placeholder 7263 self._advance(-1) 7264 return None 7265 7266 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7267 if not self._match_texts(keywords): 7268 return None 7269 if self._match(TokenType.L_PAREN, advance=False): 7270 return self._parse_wrapped_csv(self._parse_expression) 7271 7272 expression = self._parse_expression() 7273 return [expression] if expression else None 7274 7275 def _parse_csv( 7276 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7277 ) -> t.List[exp.Expression]: 7278 parse_result = parse_method() 7279 items = [parse_result] if parse_result is not None else [] 7280 7281 while self._match(sep): 7282 self._add_comments(parse_result) 7283 parse_result = parse_method() 7284 if parse_result is not None: 7285 items.append(parse_result) 7286 7287 return items 7288 7289 def _parse_tokens( 7290 self, parse_method: t.Callable, expressions: t.Dict 7291 ) -> t.Optional[exp.Expression]: 7292 this = parse_method() 7293 7294 while self._match_set(expressions): 7295 this = self.expression( 7296 expressions[self._prev.token_type], 7297 this=this, 7298 comments=self._prev_comments, 7299 expression=parse_method(), 7300 ) 7301 7302 return this 7303 7304 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7305 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7306 7307 def _parse_wrapped_csv( 7308 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7309 ) -> t.List[exp.Expression]: 7310 return self._parse_wrapped( 7311 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7312 ) 7313 7314 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7315 wrapped = self._match(TokenType.L_PAREN) 7316 if not wrapped and not optional: 7317 self.raise_error("Expecting (") 7318 parse_result = parse_method() 7319 if wrapped: 7320 self._match_r_paren() 7321 return parse_result 7322 7323 def _parse_expressions(self) -> t.List[exp.Expression]: 7324 return self._parse_csv(self._parse_expression) 7325 7326 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7327 return self._parse_select() or self._parse_set_operations( 7328 self._parse_alias(self._parse_assignment(), explicit=True) 7329 if alias 7330 else self._parse_assignment() 7331 ) 7332 7333 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7334 return self._parse_query_modifiers( 7335 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7336 ) 7337 7338 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7339 this = None 7340 if self._match_texts(self.TRANSACTION_KIND): 7341 this = self._prev.text 7342 7343 self._match_texts(("TRANSACTION", "WORK")) 7344 7345 modes = [] 7346 while True: 7347 mode = [] 7348 while self._match(TokenType.VAR): 7349 mode.append(self._prev.text) 7350 7351 if mode: 7352 modes.append(" ".join(mode)) 7353 if not self._match(TokenType.COMMA): 7354 break 7355 7356 return self.expression(exp.Transaction, this=this, modes=modes) 7357 7358 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7359 chain = None 7360 savepoint = None 7361 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7362 7363 self._match_texts(("TRANSACTION", "WORK")) 7364 7365 if self._match_text_seq("TO"): 7366 self._match_text_seq("SAVEPOINT") 7367 savepoint = self._parse_id_var() 7368 7369 if self._match(TokenType.AND): 7370 chain = not self._match_text_seq("NO") 7371 self._match_text_seq("CHAIN") 7372 7373 if is_rollback: 7374 return self.expression(exp.Rollback, savepoint=savepoint) 7375 7376 return self.expression(exp.Commit, chain=chain) 7377 7378 def _parse_refresh(self) -> exp.Refresh: 7379 self._match(TokenType.TABLE) 7380 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7381 7382 def _parse_column_def_with_exists(self): 7383 start = self._index 7384 self._match(TokenType.COLUMN) 7385 7386 exists_column = self._parse_exists(not_=True) 7387 expression = self._parse_field_def() 7388 7389 if not isinstance(expression, exp.ColumnDef): 7390 self._retreat(start) 7391 return None 7392 7393 expression.set("exists", exists_column) 7394 7395 return expression 7396 7397 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7398 if not self._prev.text.upper() == "ADD": 7399 return None 7400 7401 expression = self._parse_column_def_with_exists() 7402 if not expression: 7403 return None 7404 7405 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7406 if self._match_texts(("FIRST", "AFTER")): 7407 position = self._prev.text 7408 column_position = self.expression( 7409 exp.ColumnPosition, this=self._parse_column(), position=position 7410 ) 7411 expression.set("position", column_position) 7412 7413 return expression 7414 7415 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7416 drop = self._match(TokenType.DROP) and self._parse_drop() 7417 if drop and not isinstance(drop, exp.Command): 7418 drop.set("kind", drop.args.get("kind", "COLUMN")) 7419 return drop 7420 7421 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7422 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7423 return self.expression( 7424 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7425 ) 7426 7427 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7428 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7429 self._match_text_seq("ADD") 7430 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7431 return self.expression( 7432 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7433 ) 7434 7435 column_def = self._parse_add_column() 7436 if isinstance(column_def, exp.ColumnDef): 7437 return column_def 7438 7439 exists = self._parse_exists(not_=True) 7440 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7441 return self.expression( 7442 exp.AddPartition, 7443 exists=exists, 7444 this=self._parse_field(any_token=True), 7445 location=self._match_text_seq("LOCATION", advance=False) 7446 and self._parse_property(), 7447 ) 7448 7449 return None 7450 7451 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7452 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7453 or self._match_text_seq("COLUMNS") 7454 ): 7455 schema = self._parse_schema() 7456 7457 return ( 7458 ensure_list(schema) 7459 if schema 7460 else self._parse_csv(self._parse_column_def_with_exists) 7461 ) 7462 7463 return self._parse_csv(_parse_add_alteration) 7464 7465 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7466 if self._match_texts(self.ALTER_ALTER_PARSERS): 7467 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7468 7469 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7470 # keyword after ALTER we default to parsing this statement 7471 self._match(TokenType.COLUMN) 7472 column = self._parse_field(any_token=True) 7473 7474 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7475 return self.expression(exp.AlterColumn, this=column, drop=True) 7476 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7477 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7478 if self._match(TokenType.COMMENT): 7479 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7480 if self._match_text_seq("DROP", "NOT", "NULL"): 7481 return self.expression( 7482 exp.AlterColumn, 7483 this=column, 7484 drop=True, 7485 allow_null=True, 7486 ) 7487 if self._match_text_seq("SET", "NOT", "NULL"): 7488 return self.expression( 7489 exp.AlterColumn, 7490 this=column, 7491 allow_null=False, 7492 ) 7493 7494 if self._match_text_seq("SET", "VISIBLE"): 7495 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7496 if self._match_text_seq("SET", "INVISIBLE"): 7497 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7498 7499 self._match_text_seq("SET", "DATA") 7500 self._match_text_seq("TYPE") 7501 return self.expression( 7502 exp.AlterColumn, 7503 this=column, 7504 dtype=self._parse_types(), 7505 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7506 using=self._match(TokenType.USING) and self._parse_assignment(), 7507 ) 7508 7509 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7510 if self._match_texts(("ALL", "EVEN", "AUTO")): 7511 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7512 7513 self._match_text_seq("KEY", "DISTKEY") 7514 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7515 7516 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7517 if compound: 7518 self._match_text_seq("SORTKEY") 7519 7520 if self._match(TokenType.L_PAREN, advance=False): 7521 return self.expression( 7522 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7523 ) 7524 7525 self._match_texts(("AUTO", "NONE")) 7526 return self.expression( 7527 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7528 ) 7529 7530 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7531 index = self._index - 1 7532 7533 partition_exists = self._parse_exists() 7534 if self._match(TokenType.PARTITION, advance=False): 7535 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7536 7537 self._retreat(index) 7538 return self._parse_csv(self._parse_drop_column) 7539 7540 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7541 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7542 exists = self._parse_exists() 7543 old_column = self._parse_column() 7544 to = self._match_text_seq("TO") 7545 new_column = self._parse_column() 7546 7547 if old_column is None or to is None or new_column is None: 7548 return None 7549 7550 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7551 7552 self._match_text_seq("TO") 7553 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7554 7555 def _parse_alter_table_set(self) -> exp.AlterSet: 7556 alter_set = self.expression(exp.AlterSet) 7557 7558 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7559 "TABLE", "PROPERTIES" 7560 ): 7561 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7562 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7563 alter_set.set("expressions", [self._parse_assignment()]) 7564 elif self._match_texts(("LOGGED", "UNLOGGED")): 7565 alter_set.set("option", exp.var(self._prev.text.upper())) 7566 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7567 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7568 elif self._match_text_seq("LOCATION"): 7569 alter_set.set("location", self._parse_field()) 7570 elif self._match_text_seq("ACCESS", "METHOD"): 7571 alter_set.set("access_method", self._parse_field()) 7572 elif self._match_text_seq("TABLESPACE"): 7573 alter_set.set("tablespace", self._parse_field()) 7574 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7575 alter_set.set("file_format", [self._parse_field()]) 7576 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7577 alter_set.set("file_format", self._parse_wrapped_options()) 7578 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7579 alter_set.set("copy_options", self._parse_wrapped_options()) 7580 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7581 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7582 else: 7583 if self._match_text_seq("SERDE"): 7584 alter_set.set("serde", self._parse_field()) 7585 7586 properties = self._parse_wrapped(self._parse_properties, optional=True) 7587 alter_set.set("expressions", [properties]) 7588 7589 return alter_set 7590 7591 def _parse_alter(self) -> exp.Alter | exp.Command: 7592 start = self._prev 7593 7594 alter_token = self._match_set(self.ALTERABLES) and self._prev 7595 if not alter_token: 7596 return self._parse_as_command(start) 7597 7598 exists = self._parse_exists() 7599 only = self._match_text_seq("ONLY") 7600 this = self._parse_table(schema=True) 7601 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7602 7603 if self._next: 7604 self._advance() 7605 7606 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7607 if parser: 7608 actions = ensure_list(parser(self)) 7609 not_valid = self._match_text_seq("NOT", "VALID") 7610 options = self._parse_csv(self._parse_property) 7611 7612 if not self._curr and actions: 7613 return self.expression( 7614 exp.Alter, 7615 this=this, 7616 kind=alter_token.text.upper(), 7617 exists=exists, 7618 actions=actions, 7619 only=only, 7620 options=options, 7621 cluster=cluster, 7622 not_valid=not_valid, 7623 ) 7624 7625 return self._parse_as_command(start) 7626 7627 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7628 start = self._prev 7629 # https://duckdb.org/docs/sql/statements/analyze 7630 if not self._curr: 7631 return self.expression(exp.Analyze) 7632 7633 options = [] 7634 while self._match_texts(self.ANALYZE_STYLES): 7635 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7636 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7637 else: 7638 options.append(self._prev.text.upper()) 7639 7640 this: t.Optional[exp.Expression] = None 7641 inner_expression: t.Optional[exp.Expression] = None 7642 7643 kind = self._curr and self._curr.text.upper() 7644 7645 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7646 this = self._parse_table_parts() 7647 elif self._match_text_seq("TABLES"): 7648 if self._match_set((TokenType.FROM, TokenType.IN)): 7649 kind = f"{kind} {self._prev.text.upper()}" 7650 this = self._parse_table(schema=True, is_db_reference=True) 7651 elif self._match_text_seq("DATABASE"): 7652 this = self._parse_table(schema=True, is_db_reference=True) 7653 elif self._match_text_seq("CLUSTER"): 7654 this = self._parse_table() 7655 # Try matching inner expr keywords before fallback to parse table. 7656 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7657 kind = None 7658 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7659 else: 7660 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7661 kind = None 7662 this = self._parse_table_parts() 7663 7664 partition = self._try_parse(self._parse_partition) 7665 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7666 return self._parse_as_command(start) 7667 7668 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7669 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7670 "WITH", "ASYNC", "MODE" 7671 ): 7672 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7673 else: 7674 mode = None 7675 7676 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7677 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7678 7679 properties = self._parse_properties() 7680 return self.expression( 7681 exp.Analyze, 7682 kind=kind, 7683 this=this, 7684 mode=mode, 7685 partition=partition, 7686 properties=properties, 7687 expression=inner_expression, 7688 options=options, 7689 ) 7690 7691 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7692 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7693 this = None 7694 kind = self._prev.text.upper() 7695 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7696 expressions = [] 7697 7698 if not self._match_text_seq("STATISTICS"): 7699 self.raise_error("Expecting token STATISTICS") 7700 7701 if self._match_text_seq("NOSCAN"): 7702 this = "NOSCAN" 7703 elif self._match(TokenType.FOR): 7704 if self._match_text_seq("ALL", "COLUMNS"): 7705 this = "FOR ALL COLUMNS" 7706 if self._match_texts("COLUMNS"): 7707 this = "FOR COLUMNS" 7708 expressions = self._parse_csv(self._parse_column_reference) 7709 elif self._match_text_seq("SAMPLE"): 7710 sample = self._parse_number() 7711 expressions = [ 7712 self.expression( 7713 exp.AnalyzeSample, 7714 sample=sample, 7715 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7716 ) 7717 ] 7718 7719 return self.expression( 7720 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7721 ) 7722 7723 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7724 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7725 kind = None 7726 this = None 7727 expression: t.Optional[exp.Expression] = None 7728 if self._match_text_seq("REF", "UPDATE"): 7729 kind = "REF" 7730 this = "UPDATE" 7731 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7732 this = "UPDATE SET DANGLING TO NULL" 7733 elif self._match_text_seq("STRUCTURE"): 7734 kind = "STRUCTURE" 7735 if self._match_text_seq("CASCADE", "FAST"): 7736 this = "CASCADE FAST" 7737 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7738 ("ONLINE", "OFFLINE") 7739 ): 7740 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7741 expression = self._parse_into() 7742 7743 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7744 7745 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7746 this = self._prev.text.upper() 7747 if self._match_text_seq("COLUMNS"): 7748 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7749 return None 7750 7751 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7752 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7753 if self._match_text_seq("STATISTICS"): 7754 return self.expression(exp.AnalyzeDelete, kind=kind) 7755 return None 7756 7757 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7758 if self._match_text_seq("CHAINED", "ROWS"): 7759 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7760 return None 7761 7762 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7763 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7764 this = self._prev.text.upper() 7765 expression: t.Optional[exp.Expression] = None 7766 expressions = [] 7767 update_options = None 7768 7769 if self._match_text_seq("HISTOGRAM", "ON"): 7770 expressions = self._parse_csv(self._parse_column_reference) 7771 with_expressions = [] 7772 while self._match(TokenType.WITH): 7773 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7774 if self._match_texts(("SYNC", "ASYNC")): 7775 if self._match_text_seq("MODE", advance=False): 7776 with_expressions.append(f"{self._prev.text.upper()} MODE") 7777 self._advance() 7778 else: 7779 buckets = self._parse_number() 7780 if self._match_text_seq("BUCKETS"): 7781 with_expressions.append(f"{buckets} BUCKETS") 7782 if with_expressions: 7783 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7784 7785 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7786 TokenType.UPDATE, advance=False 7787 ): 7788 update_options = self._prev.text.upper() 7789 self._advance() 7790 elif self._match_text_seq("USING", "DATA"): 7791 expression = self.expression(exp.UsingData, this=self._parse_string()) 7792 7793 return self.expression( 7794 exp.AnalyzeHistogram, 7795 this=this, 7796 expressions=expressions, 7797 expression=expression, 7798 update_options=update_options, 7799 ) 7800 7801 def _parse_merge(self) -> exp.Merge: 7802 self._match(TokenType.INTO) 7803 target = self._parse_table() 7804 7805 if target and self._match(TokenType.ALIAS, advance=False): 7806 target.set("alias", self._parse_table_alias()) 7807 7808 self._match(TokenType.USING) 7809 using = self._parse_table() 7810 7811 self._match(TokenType.ON) 7812 on = self._parse_assignment() 7813 7814 return self.expression( 7815 exp.Merge, 7816 this=target, 7817 using=using, 7818 on=on, 7819 whens=self._parse_when_matched(), 7820 returning=self._parse_returning(), 7821 ) 7822 7823 def _parse_when_matched(self) -> exp.Whens: 7824 whens = [] 7825 7826 while self._match(TokenType.WHEN): 7827 matched = not self._match(TokenType.NOT) 7828 self._match_text_seq("MATCHED") 7829 source = ( 7830 False 7831 if self._match_text_seq("BY", "TARGET") 7832 else self._match_text_seq("BY", "SOURCE") 7833 ) 7834 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7835 7836 self._match(TokenType.THEN) 7837 7838 if self._match(TokenType.INSERT): 7839 this = self._parse_star() 7840 if this: 7841 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7842 else: 7843 then = self.expression( 7844 exp.Insert, 7845 this=exp.var("ROW") 7846 if self._match_text_seq("ROW") 7847 else self._parse_value(values=False), 7848 expression=self._match_text_seq("VALUES") and self._parse_value(), 7849 ) 7850 elif self._match(TokenType.UPDATE): 7851 expressions = self._parse_star() 7852 if expressions: 7853 then = self.expression(exp.Update, expressions=expressions) 7854 else: 7855 then = self.expression( 7856 exp.Update, 7857 expressions=self._match(TokenType.SET) 7858 and self._parse_csv(self._parse_equality), 7859 ) 7860 elif self._match(TokenType.DELETE): 7861 then = self.expression(exp.Var, this=self._prev.text) 7862 else: 7863 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7864 7865 whens.append( 7866 self.expression( 7867 exp.When, 7868 matched=matched, 7869 source=source, 7870 condition=condition, 7871 then=then, 7872 ) 7873 ) 7874 return self.expression(exp.Whens, expressions=whens) 7875 7876 def _parse_show(self) -> t.Optional[exp.Expression]: 7877 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7878 if parser: 7879 return parser(self) 7880 return self._parse_as_command(self._prev) 7881 7882 def _parse_set_item_assignment( 7883 self, kind: t.Optional[str] = None 7884 ) -> t.Optional[exp.Expression]: 7885 index = self._index 7886 7887 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7888 return self._parse_set_transaction(global_=kind == "GLOBAL") 7889 7890 left = self._parse_primary() or self._parse_column() 7891 assignment_delimiter = self._match_texts(("=", "TO")) 7892 7893 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7894 self._retreat(index) 7895 return None 7896 7897 right = self._parse_statement() or self._parse_id_var() 7898 if isinstance(right, (exp.Column, exp.Identifier)): 7899 right = exp.var(right.name) 7900 7901 this = self.expression(exp.EQ, this=left, expression=right) 7902 return self.expression(exp.SetItem, this=this, kind=kind) 7903 7904 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7905 self._match_text_seq("TRANSACTION") 7906 characteristics = self._parse_csv( 7907 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7908 ) 7909 return self.expression( 7910 exp.SetItem, 7911 expressions=characteristics, 7912 kind="TRANSACTION", 7913 **{"global": global_}, # type: ignore 7914 ) 7915 7916 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7917 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7918 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7919 7920 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7921 index = self._index 7922 set_ = self.expression( 7923 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7924 ) 7925 7926 if self._curr: 7927 self._retreat(index) 7928 return self._parse_as_command(self._prev) 7929 7930 return set_ 7931 7932 def _parse_var_from_options( 7933 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7934 ) -> t.Optional[exp.Var]: 7935 start = self._curr 7936 if not start: 7937 return None 7938 7939 option = start.text.upper() 7940 continuations = options.get(option) 7941 7942 index = self._index 7943 self._advance() 7944 for keywords in continuations or []: 7945 if isinstance(keywords, str): 7946 keywords = (keywords,) 7947 7948 if self._match_text_seq(*keywords): 7949 option = f"{option} {' '.join(keywords)}" 7950 break 7951 else: 7952 if continuations or continuations is None: 7953 if raise_unmatched: 7954 self.raise_error(f"Unknown option {option}") 7955 7956 self._retreat(index) 7957 return None 7958 7959 return exp.var(option) 7960 7961 def _parse_as_command(self, start: Token) -> exp.Command: 7962 while self._curr: 7963 self._advance() 7964 text = self._find_sql(start, self._prev) 7965 size = len(start.text) 7966 self._warn_unsupported() 7967 return exp.Command(this=text[:size], expression=text[size:]) 7968 7969 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7970 settings = [] 7971 7972 self._match_l_paren() 7973 kind = self._parse_id_var() 7974 7975 if self._match(TokenType.L_PAREN): 7976 while True: 7977 key = self._parse_id_var() 7978 value = self._parse_primary() 7979 if not key and value is None: 7980 break 7981 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7982 self._match(TokenType.R_PAREN) 7983 7984 self._match_r_paren() 7985 7986 return self.expression( 7987 exp.DictProperty, 7988 this=this, 7989 kind=kind.this if kind else None, 7990 settings=settings, 7991 ) 7992 7993 def _parse_dict_range(self, this: str) -> exp.DictRange: 7994 self._match_l_paren() 7995 has_min = self._match_text_seq("MIN") 7996 if has_min: 7997 min = self._parse_var() or self._parse_primary() 7998 self._match_text_seq("MAX") 7999 max = self._parse_var() or self._parse_primary() 8000 else: 8001 max = self._parse_var() or self._parse_primary() 8002 min = exp.Literal.number(0) 8003 self._match_r_paren() 8004 return self.expression(exp.DictRange, this=this, min=min, max=max) 8005 8006 def _parse_comprehension( 8007 self, this: t.Optional[exp.Expression] 8008 ) -> t.Optional[exp.Comprehension]: 8009 index = self._index 8010 expression = self._parse_column() 8011 if not self._match(TokenType.IN): 8012 self._retreat(index - 1) 8013 return None 8014 iterator = self._parse_column() 8015 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8016 return self.expression( 8017 exp.Comprehension, 8018 this=this, 8019 expression=expression, 8020 iterator=iterator, 8021 condition=condition, 8022 ) 8023 8024 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8025 if self._match(TokenType.HEREDOC_STRING): 8026 return self.expression(exp.Heredoc, this=self._prev.text) 8027 8028 if not self._match_text_seq("$"): 8029 return None 8030 8031 tags = ["$"] 8032 tag_text = None 8033 8034 if self._is_connected(): 8035 self._advance() 8036 tags.append(self._prev.text.upper()) 8037 else: 8038 self.raise_error("No closing $ found") 8039 8040 if tags[-1] != "$": 8041 if self._is_connected() and self._match_text_seq("$"): 8042 tag_text = tags[-1] 8043 tags.append("$") 8044 else: 8045 self.raise_error("No closing $ found") 8046 8047 heredoc_start = self._curr 8048 8049 while self._curr: 8050 if self._match_text_seq(*tags, advance=False): 8051 this = self._find_sql(heredoc_start, self._prev) 8052 self._advance(len(tags)) 8053 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8054 8055 self._advance() 8056 8057 self.raise_error(f"No closing {''.join(tags)} found") 8058 return None 8059 8060 def _find_parser( 8061 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8062 ) -> t.Optional[t.Callable]: 8063 if not self._curr: 8064 return None 8065 8066 index = self._index 8067 this = [] 8068 while True: 8069 # The current token might be multiple words 8070 curr = self._curr.text.upper() 8071 key = curr.split(" ") 8072 this.append(curr) 8073 8074 self._advance() 8075 result, trie = in_trie(trie, key) 8076 if result == TrieResult.FAILED: 8077 break 8078 8079 if result == TrieResult.EXISTS: 8080 subparser = parsers[" ".join(this)] 8081 return subparser 8082 8083 self._retreat(index) 8084 return None 8085 8086 def _match(self, token_type, advance=True, expression=None): 8087 if not self._curr: 8088 return None 8089 8090 if self._curr.token_type == token_type: 8091 if advance: 8092 self._advance() 8093 self._add_comments(expression) 8094 return True 8095 8096 return None 8097 8098 def _match_set(self, types, advance=True): 8099 if not self._curr: 8100 return None 8101 8102 if self._curr.token_type in types: 8103 if advance: 8104 self._advance() 8105 return True 8106 8107 return None 8108 8109 def _match_pair(self, token_type_a, token_type_b, advance=True): 8110 if not self._curr or not self._next: 8111 return None 8112 8113 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8114 if advance: 8115 self._advance(2) 8116 return True 8117 8118 return None 8119 8120 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8121 if not self._match(TokenType.L_PAREN, expression=expression): 8122 self.raise_error("Expecting (") 8123 8124 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8125 if not self._match(TokenType.R_PAREN, expression=expression): 8126 self.raise_error("Expecting )") 8127 8128 def _match_texts(self, texts, advance=True): 8129 if ( 8130 self._curr 8131 and self._curr.token_type != TokenType.STRING 8132 and self._curr.text.upper() in texts 8133 ): 8134 if advance: 8135 self._advance() 8136 return True 8137 return None 8138 8139 def _match_text_seq(self, *texts, advance=True): 8140 index = self._index 8141 for text in texts: 8142 if ( 8143 self._curr 8144 and self._curr.token_type != TokenType.STRING 8145 and self._curr.text.upper() == text 8146 ): 8147 self._advance() 8148 else: 8149 self._retreat(index) 8150 return None 8151 8152 if not advance: 8153 self._retreat(index) 8154 8155 return True 8156 8157 def _replace_lambda( 8158 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8159 ) -> t.Optional[exp.Expression]: 8160 if not node: 8161 return node 8162 8163 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8164 8165 for column in node.find_all(exp.Column): 8166 typ = lambda_types.get(column.parts[0].name) 8167 if typ is not None: 8168 dot_or_id = column.to_dot() if column.table else column.this 8169 8170 if typ: 8171 dot_or_id = self.expression( 8172 exp.Cast, 8173 this=dot_or_id, 8174 to=typ, 8175 ) 8176 8177 parent = column.parent 8178 8179 while isinstance(parent, exp.Dot): 8180 if not isinstance(parent.parent, exp.Dot): 8181 parent.replace(dot_or_id) 8182 break 8183 parent = parent.parent 8184 else: 8185 if column is node: 8186 node = dot_or_id 8187 else: 8188 column.replace(dot_or_id) 8189 return node 8190 8191 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8192 start = self._prev 8193 8194 # Not to be confused with TRUNCATE(number, decimals) function call 8195 if self._match(TokenType.L_PAREN): 8196 self._retreat(self._index - 2) 8197 return self._parse_function() 8198 8199 # Clickhouse supports TRUNCATE DATABASE as well 8200 is_database = self._match(TokenType.DATABASE) 8201 8202 self._match(TokenType.TABLE) 8203 8204 exists = self._parse_exists(not_=False) 8205 8206 expressions = self._parse_csv( 8207 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8208 ) 8209 8210 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8211 8212 if self._match_text_seq("RESTART", "IDENTITY"): 8213 identity = "RESTART" 8214 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8215 identity = "CONTINUE" 8216 else: 8217 identity = None 8218 8219 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8220 option = self._prev.text 8221 else: 8222 option = None 8223 8224 partition = self._parse_partition() 8225 8226 # Fallback case 8227 if self._curr: 8228 return self._parse_as_command(start) 8229 8230 return self.expression( 8231 exp.TruncateTable, 8232 expressions=expressions, 8233 is_database=is_database, 8234 exists=exists, 8235 cluster=cluster, 8236 identity=identity, 8237 option=option, 8238 partition=partition, 8239 ) 8240 8241 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8242 this = self._parse_ordered(self._parse_opclass) 8243 8244 if not self._match(TokenType.WITH): 8245 return this 8246 8247 op = self._parse_var(any_token=True) 8248 8249 return self.expression(exp.WithOperator, this=this, op=op) 8250 8251 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8252 self._match(TokenType.EQ) 8253 self._match(TokenType.L_PAREN) 8254 8255 opts: t.List[t.Optional[exp.Expression]] = [] 8256 option: exp.Expression | None 8257 while self._curr and not self._match(TokenType.R_PAREN): 8258 if self._match_text_seq("FORMAT_NAME", "="): 8259 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8260 option = self._parse_format_name() 8261 else: 8262 option = self._parse_property() 8263 8264 if option is None: 8265 self.raise_error("Unable to parse option") 8266 break 8267 8268 opts.append(option) 8269 8270 return opts 8271 8272 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8273 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8274 8275 options = [] 8276 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8277 option = self._parse_var(any_token=True) 8278 prev = self._prev.text.upper() 8279 8280 # Different dialects might separate options and values by white space, "=" and "AS" 8281 self._match(TokenType.EQ) 8282 self._match(TokenType.ALIAS) 8283 8284 param = self.expression(exp.CopyParameter, this=option) 8285 8286 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8287 TokenType.L_PAREN, advance=False 8288 ): 8289 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8290 param.set("expressions", self._parse_wrapped_options()) 8291 elif prev == "FILE_FORMAT": 8292 # T-SQL's external file format case 8293 param.set("expression", self._parse_field()) 8294 else: 8295 param.set("expression", self._parse_unquoted_field()) 8296 8297 options.append(param) 8298 self._match(sep) 8299 8300 return options 8301 8302 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8303 expr = self.expression(exp.Credentials) 8304 8305 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8306 expr.set("storage", self._parse_field()) 8307 if self._match_text_seq("CREDENTIALS"): 8308 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8309 creds = ( 8310 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8311 ) 8312 expr.set("credentials", creds) 8313 if self._match_text_seq("ENCRYPTION"): 8314 expr.set("encryption", self._parse_wrapped_options()) 8315 if self._match_text_seq("IAM_ROLE"): 8316 expr.set("iam_role", self._parse_field()) 8317 if self._match_text_seq("REGION"): 8318 expr.set("region", self._parse_field()) 8319 8320 return expr 8321 8322 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8323 return self._parse_field() 8324 8325 def _parse_copy(self) -> exp.Copy | exp.Command: 8326 start = self._prev 8327 8328 self._match(TokenType.INTO) 8329 8330 this = ( 8331 self._parse_select(nested=True, parse_subquery_alias=False) 8332 if self._match(TokenType.L_PAREN, advance=False) 8333 else self._parse_table(schema=True) 8334 ) 8335 8336 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8337 8338 files = self._parse_csv(self._parse_file_location) 8339 credentials = self._parse_credentials() 8340 8341 self._match_text_seq("WITH") 8342 8343 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8344 8345 # Fallback case 8346 if self._curr: 8347 return self._parse_as_command(start) 8348 8349 return self.expression( 8350 exp.Copy, 8351 this=this, 8352 kind=kind, 8353 credentials=credentials, 8354 files=files, 8355 params=params, 8356 ) 8357 8358 def _parse_normalize(self) -> exp.Normalize: 8359 return self.expression( 8360 exp.Normalize, 8361 this=self._parse_bitwise(), 8362 form=self._match(TokenType.COMMA) and self._parse_var(), 8363 ) 8364 8365 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8366 args = self._parse_csv(lambda: self._parse_lambda()) 8367 8368 this = seq_get(args, 0) 8369 decimals = seq_get(args, 1) 8370 8371 return expr_type( 8372 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8373 ) 8374 8375 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8376 star_token = self._prev 8377 8378 if self._match_text_seq("COLUMNS", "(", advance=False): 8379 this = self._parse_function() 8380 if isinstance(this, exp.Columns): 8381 this.set("unpack", True) 8382 return this 8383 8384 return self.expression( 8385 exp.Star, 8386 **{ # type: ignore 8387 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8388 "replace": self._parse_star_op("REPLACE"), 8389 "rename": self._parse_star_op("RENAME"), 8390 }, 8391 ).update_positions(star_token) 8392 8393 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8394 privilege_parts = [] 8395 8396 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8397 # (end of privilege list) or L_PAREN (start of column list) are met 8398 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8399 privilege_parts.append(self._curr.text.upper()) 8400 self._advance() 8401 8402 this = exp.var(" ".join(privilege_parts)) 8403 expressions = ( 8404 self._parse_wrapped_csv(self._parse_column) 8405 if self._match(TokenType.L_PAREN, advance=False) 8406 else None 8407 ) 8408 8409 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8410 8411 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8412 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8413 principal = self._parse_id_var() 8414 8415 if not principal: 8416 return None 8417 8418 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8419 8420 def _parse_grant(self) -> exp.Grant | exp.Command: 8421 start = self._prev 8422 8423 privileges = self._parse_csv(self._parse_grant_privilege) 8424 8425 self._match(TokenType.ON) 8426 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8427 8428 # Attempt to parse the securable e.g. MySQL allows names 8429 # such as "foo.*", "*.*" which are not easily parseable yet 8430 securable = self._try_parse(self._parse_table_parts) 8431 8432 if not securable or not self._match_text_seq("TO"): 8433 return self._parse_as_command(start) 8434 8435 principals = self._parse_csv(self._parse_grant_principal) 8436 8437 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8438 8439 if self._curr: 8440 return self._parse_as_command(start) 8441 8442 return self.expression( 8443 exp.Grant, 8444 privileges=privileges, 8445 kind=kind, 8446 securable=securable, 8447 principals=principals, 8448 grant_option=grant_option, 8449 ) 8450 8451 def _parse_overlay(self) -> exp.Overlay: 8452 return self.expression( 8453 exp.Overlay, 8454 **{ # type: ignore 8455 "this": self._parse_bitwise(), 8456 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8457 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8458 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8459 }, 8460 ) 8461 8462 def _parse_format_name(self) -> exp.Property: 8463 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8464 # for FILE_FORMAT = <format_name> 8465 return self.expression( 8466 exp.Property, 8467 this=exp.var("FORMAT_NAME"), 8468 value=self._parse_string() or self._parse_table_parts(), 8469 ) 8470 8471 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8472 args: t.List[exp.Expression] = [] 8473 8474 if self._match(TokenType.DISTINCT): 8475 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8476 self._match(TokenType.COMMA) 8477 8478 args.extend(self._parse_csv(self._parse_assignment)) 8479 8480 return self.expression( 8481 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8482 ) 8483 8484 def _identifier_expression( 8485 self, token: t.Optional[Token] = None, **kwargs: t.Any 8486 ) -> exp.Identifier: 8487 token = token or self._prev 8488 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8489 expression.update_positions(token) 8490 return expression 8491 8492 def _build_pipe_cte( 8493 self, 8494 query: exp.Query, 8495 expressions: t.List[exp.Expression], 8496 alias_cte: t.Optional[exp.TableAlias] = None, 8497 ) -> exp.Select: 8498 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8499 if alias_cte: 8500 new_cte = alias_cte 8501 else: 8502 self._pipe_cte_counter += 1 8503 new_cte = f"__tmp{self._pipe_cte_counter}" 8504 8505 with_ = query.args.get("with") 8506 ctes = with_.pop() if with_ else None 8507 8508 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8509 if ctes: 8510 new_select.set("with", ctes) 8511 8512 return new_select.with_(new_cte, as_=query, copy=False) 8513 8514 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8515 select = self._parse_select(consume_pipe=False) 8516 if not select: 8517 return query 8518 8519 return self._build_pipe_cte( 8520 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8521 ) 8522 8523 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8524 limit = self._parse_limit() 8525 offset = self._parse_offset() 8526 if limit: 8527 curr_limit = query.args.get("limit", limit) 8528 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8529 query.limit(limit, copy=False) 8530 if offset: 8531 curr_offset = query.args.get("offset") 8532 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8533 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8534 8535 return query 8536 8537 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8538 this = self._parse_assignment() 8539 if self._match_text_seq("GROUP", "AND", advance=False): 8540 return this 8541 8542 this = self._parse_alias(this) 8543 8544 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8545 return self._parse_ordered(lambda: this) 8546 8547 return this 8548 8549 def _parse_pipe_syntax_aggregate_group_order_by( 8550 self, query: exp.Select, group_by_exists: bool = True 8551 ) -> exp.Select: 8552 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8553 aggregates_or_groups, orders = [], [] 8554 for element in expr: 8555 if isinstance(element, exp.Ordered): 8556 this = element.this 8557 if isinstance(this, exp.Alias): 8558 element.set("this", this.args["alias"]) 8559 orders.append(element) 8560 else: 8561 this = element 8562 aggregates_or_groups.append(this) 8563 8564 if group_by_exists: 8565 query.select(*aggregates_or_groups, copy=False).group_by( 8566 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8567 copy=False, 8568 ) 8569 else: 8570 query.select(*aggregates_or_groups, append=False, copy=False) 8571 8572 if orders: 8573 return query.order_by(*orders, append=False, copy=False) 8574 8575 return query 8576 8577 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8578 self._match_text_seq("AGGREGATE") 8579 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8580 8581 if self._match(TokenType.GROUP_BY) or ( 8582 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8583 ): 8584 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8585 8586 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8587 8588 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8589 first_setop = self.parse_set_operation(this=query) 8590 if not first_setop: 8591 return None 8592 8593 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8594 expr = self._parse_paren() 8595 return expr.assert_is(exp.Subquery).unnest() if expr else None 8596 8597 first_setop.this.pop() 8598 8599 setops = [ 8600 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8601 *self._parse_csv(_parse_and_unwrap_query), 8602 ] 8603 8604 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8605 with_ = query.args.get("with") 8606 ctes = with_.pop() if with_ else None 8607 8608 if isinstance(first_setop, exp.Union): 8609 query = query.union(*setops, copy=False, **first_setop.args) 8610 elif isinstance(first_setop, exp.Except): 8611 query = query.except_(*setops, copy=False, **first_setop.args) 8612 else: 8613 query = query.intersect(*setops, copy=False, **first_setop.args) 8614 8615 query.set("with", ctes) 8616 8617 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8618 8619 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8620 join = self._parse_join() 8621 if not join: 8622 return None 8623 8624 if isinstance(query, exp.Select): 8625 return query.join(join, copy=False) 8626 8627 return query 8628 8629 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8630 pivots = self._parse_pivots() 8631 if not pivots: 8632 return query 8633 8634 from_ = query.args.get("from") 8635 if from_: 8636 from_.this.set("pivots", pivots) 8637 8638 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8639 8640 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8641 self._match_text_seq("EXTEND") 8642 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8643 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8644 8645 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8646 sample = self._parse_table_sample() 8647 8648 with_ = query.args.get("with") 8649 if with_: 8650 with_.expressions[-1].this.set("sample", sample) 8651 else: 8652 query.set("sample", sample) 8653 8654 return query 8655 8656 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8657 if isinstance(query, exp.Subquery): 8658 query = exp.select("*").from_(query, copy=False) 8659 8660 if not query.args.get("from"): 8661 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8662 8663 while self._match(TokenType.PIPE_GT): 8664 start = self._curr 8665 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8666 if not parser: 8667 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8668 # keywords, making it tricky to disambiguate them without lookahead. The approach 8669 # here is to try and parse a set operation and if that fails, then try to parse a 8670 # join operator. If that fails as well, then the operator is not supported. 8671 parsed_query = self._parse_pipe_syntax_set_operator(query) 8672 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8673 if not parsed_query: 8674 self._retreat(start) 8675 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8676 break 8677 query = parsed_query 8678 else: 8679 query = parser(self, query) 8680 8681 return query 8682 8683 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8684 vars = self._parse_csv(self._parse_id_var) 8685 if not vars: 8686 return None 8687 8688 return self.expression( 8689 exp.DeclareItem, 8690 this=vars, 8691 kind=self._parse_types(), 8692 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8693 ) 8694 8695 def _parse_declare(self) -> exp.Declare | exp.Command: 8696 start = self._prev 8697 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8698 8699 if not expressions or self._curr: 8700 return self._parse_as_command(start) 8701 8702 return self.expression(exp.Declare, expressions=expressions) 8703 8704 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8705 exp_class = exp.Cast if strict else exp.TryCast 8706 8707 if exp_class == exp.TryCast: 8708 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8709 8710 return self.expression(exp_class, **kwargs) 8711 8712 def _parse_json_value(self) -> exp.JSONValue: 8713 this = self._parse_bitwise() 8714 self._match(TokenType.COMMA) 8715 path = self._parse_bitwise() 8716 8717 returning = self._match(TokenType.RETURNING) and self._parse_type() 8718 8719 return self.expression( 8720 exp.JSONValue, 8721 this=this, 8722 path=self.dialect.to_json_path(path), 8723 returning=returning, 8724 on_condition=self._parse_on_condition(), 8725 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1574 def __init__( 1575 self, 1576 error_level: t.Optional[ErrorLevel] = None, 1577 error_message_context: int = 100, 1578 max_errors: int = 3, 1579 dialect: DialectType = None, 1580 ): 1581 from sqlglot.dialects import Dialect 1582 1583 self.error_level = error_level or ErrorLevel.IMMEDIATE 1584 self.error_message_context = error_message_context 1585 self.max_errors = max_errors 1586 self.dialect = Dialect.get_or_raise(dialect) 1587 self.reset()
1600 def parse( 1601 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens and returns a list of syntax trees, one tree 1605 per parsed SQL statement. 1606 1607 Args: 1608 raw_tokens: The list of tokens. 1609 sql: The original SQL string, used to produce helpful debug messages. 1610 1611 Returns: 1612 The list of the produced syntax trees. 1613 """ 1614 return self._parse( 1615 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1616 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1618 def parse_into( 1619 self, 1620 expression_types: exp.IntoType, 1621 raw_tokens: t.List[Token], 1622 sql: t.Optional[str] = None, 1623 ) -> t.List[t.Optional[exp.Expression]]: 1624 """ 1625 Parses a list of tokens into a given Expression type. If a collection of Expression 1626 types is given instead, this method will try to parse the token list into each one 1627 of them, stopping at the first for which the parsing succeeds. 1628 1629 Args: 1630 expression_types: The expression type(s) to try and parse the token list into. 1631 raw_tokens: The list of tokens. 1632 sql: The original SQL string, used to produce helpful debug messages. 1633 1634 Returns: 1635 The target Expression. 1636 """ 1637 errors = [] 1638 for expression_type in ensure_list(expression_types): 1639 parser = self.EXPRESSION_PARSERS.get(expression_type) 1640 if not parser: 1641 raise TypeError(f"No parser registered for {expression_type}") 1642 1643 try: 1644 return self._parse(parser, raw_tokens, sql) 1645 except ParseError as e: 1646 e.errors[0]["into_expression"] = expression_type 1647 errors.append(e) 1648 1649 raise ParseError( 1650 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1651 errors=merge_errors(errors), 1652 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1692 def check_errors(self) -> None: 1693 """Logs or raises any found errors, depending on the chosen error level setting.""" 1694 if self.error_level == ErrorLevel.WARN: 1695 for error in self.errors: 1696 logger.error(str(error)) 1697 elif self.error_level == ErrorLevel.RAISE and self.errors: 1698 raise ParseError( 1699 concat_messages(self.errors, self.max_errors), 1700 errors=merge_errors(self.errors), 1701 )
Logs or raises any found errors, depending on the chosen error level setting.
1703 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1704 """ 1705 Appends an error in the list of recorded errors or raises it, depending on the chosen 1706 error level setting. 1707 """ 1708 token = token or self._curr or self._prev or Token.string("") 1709 start = token.start 1710 end = token.end + 1 1711 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1712 highlight = self.sql[start:end] 1713 end_context = self.sql[end : end + self.error_message_context] 1714 1715 error = ParseError.new( 1716 f"{message}. Line {token.line}, Col: {token.col}.\n" 1717 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1718 description=message, 1719 line=token.line, 1720 col=token.col, 1721 start_context=start_context, 1722 highlight=highlight, 1723 end_context=end_context, 1724 ) 1725 1726 if self.error_level == ErrorLevel.IMMEDIATE: 1727 raise error 1728 1729 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1731 def expression( 1732 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1733 ) -> E: 1734 """ 1735 Creates a new, validated Expression. 1736 1737 Args: 1738 exp_class: The expression class to instantiate. 1739 comments: An optional list of comments to attach to the expression. 1740 kwargs: The arguments to set for the expression along with their respective values. 1741 1742 Returns: 1743 The target expression. 1744 """ 1745 instance = exp_class(**kwargs) 1746 instance.add_comments(comments) if comments else self._add_comments(instance) 1747 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1754 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1755 """ 1756 Validates an Expression, making sure that all its mandatory arguments are set. 1757 1758 Args: 1759 expression: The expression to validate. 1760 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1761 1762 Returns: 1763 The validated expression. 1764 """ 1765 if self.error_level != ErrorLevel.IGNORE: 1766 for error_message in expression.error_messages(args): 1767 self.raise_error(error_message) 1768 1769 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4832 def parse_set_operation( 4833 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4834 ) -> t.Optional[exp.Expression]: 4835 start = self._index 4836 _, side_token, kind_token = self._parse_join_parts() 4837 4838 side = side_token.text if side_token else None 4839 kind = kind_token.text if kind_token else None 4840 4841 if not self._match_set(self.SET_OPERATIONS): 4842 self._retreat(start) 4843 return None 4844 4845 token_type = self._prev.token_type 4846 4847 if token_type == TokenType.UNION: 4848 operation: t.Type[exp.SetOperation] = exp.Union 4849 elif token_type == TokenType.EXCEPT: 4850 operation = exp.Except 4851 else: 4852 operation = exp.Intersect 4853 4854 comments = self._prev.comments 4855 4856 if self._match(TokenType.DISTINCT): 4857 distinct: t.Optional[bool] = True 4858 elif self._match(TokenType.ALL): 4859 distinct = False 4860 else: 4861 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4862 if distinct is None: 4863 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4864 4865 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4866 "STRICT", "CORRESPONDING" 4867 ) 4868 if self._match_text_seq("CORRESPONDING"): 4869 by_name = True 4870 if not side and not kind: 4871 kind = "INNER" 4872 4873 on_column_list = None 4874 if by_name and self._match_texts(("ON", "BY")): 4875 on_column_list = self._parse_wrapped_csv(self._parse_column) 4876 4877 expression = self._parse_select( 4878 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4879 ) 4880 4881 return self.expression( 4882 operation, 4883 comments=comments, 4884 this=this, 4885 distinct=distinct, 4886 by_name=by_name, 4887 expression=expression, 4888 side=side, 4889 kind=kind, 4890 on=on_column_list, 4891 )