Skip to content

Commit f200db9

Browse files
committed
Add support for DuckDB underscore numeric literals
- Add numberRegex option to TokenizerOptions to allow custom number patterns - Update Tokenizer to use custom number regex when provided - Configure DuckDB formatter to support underscore separators in numbers (1_000_000) - Add test for underscore numeric literals in DuckDB
1 parent 9c3feb4 commit f200db9

File tree

4 files changed

+17
-0
lines changed

4 files changed

+17
-0
lines changed

src/languages/duckdb/duckdb.formatter.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ export const duckdb: DialectOptions = {
155155
reservedFunctionNames: functions,
156156
nestedBlockComments: true,
157157
extraParens: ['[]', '{}'],
158+
// Support underscore separators in numeric literals (e.g., 1_000_000)
159+
numberRegex:
160+
/(?:0x[0-9a-fA-F_]+|0b[01_]+|(?:-\s*)?(?:[0-9_]*\.[0-9_]+|[0-9_]+(?:\.[0-9_]*)?)(?:[eE][-+]?[0-9_]+(?:\.[0-9_]+)?)?)(?![\w\p{Alphabetic}])/uy,
158161
stringTypes: [
159162
'$$',
160163
"''-qq",

src/lexer/Tokenizer.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ export default class Tokenizer {
5151
{
5252
type: TokenType.NUMBER,
5353
regex:
54+
cfg.numberRegex ??
5455
/(?:0x[0-9a-fA-F]+|0b[01]+|(?:-\s*)?(?:[0-9]*\.[0-9]+|[0-9]+(?:\.[0-9]*)?)(?:[eE][-+]?[0-9]+(?:\.[0-9]+)?)?)(?![\w\p{Alphabetic}])/uy,
5556
},
5657
// RESERVED_PHRASE is matched before all other keyword tokens

src/lexer/TokenizerOptions.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ export interface TokenizerOptions {
100100
propertyAccessOperators?: string[];
101101
// Enables PostgreSQL-specific OPERATOR(...) syntax
102102
operatorKeyword?: boolean;
103+
// Custom regex pattern for number tokens (defaults to standard SQL number pattern)
104+
numberRegex?: RegExp;
103105
// Allows custom modifications on the token array.
104106
// Called after the whole input string has been split into tokens.
105107
// The result of this will be the output of the tokenizer.

test/duckdb.test.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,4 +214,15 @@ describe('DuckDBFormatter', () => {
214214
1 IS NOT NULL;
215215
`);
216216
});
217+
218+
it('supports underscore separators in numeric literals', () => {
219+
expect(format('SELECT 1_000_000, 3.14_159, 0x1A_2B_3C, 0b1010_0001, 1.5e+1_0;')).toBe(dedent`
220+
SELECT
221+
1_000_000,
222+
3.14_159,
223+
0x1A_2B_3C,
224+
0b1010_0001,
225+
1.5e+1_0;
226+
`);
227+
});
217228
});

0 commit comments

Comments
 (0)