Skip to content

Commit 6c4221d

Browse files
authored
fix: support hexadecimal numbers and words with underscores (#57)
1 parent 095cd2a commit 6c4221d

File tree

5 files changed

+144
-10
lines changed

5 files changed

+144
-10
lines changed

src/ast.rs

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,31 @@ impl<'a> From<Value<'a>> for serde_json::Value {
6868
}
6969
Value::BooleanLit(b) => serde_json::Value::Bool(b.value),
7070
Value::NullKeyword(_) => serde_json::Value::Null,
71-
Value::NumberLit(num) => match serde_json::Number::from_str(num.value) {
72-
Ok(number) => serde_json::Value::Number(number),
73-
Err(_) => serde_json::Value::String(num.value.to_string()),
74-
},
71+
Value::NumberLit(num) => {
72+
// Check if this is a hexadecimal literal (0x or 0X prefix)
73+
let num_str = num.value.trim_start_matches('-');
74+
if num_str.len() > 2 && (num_str.starts_with("0x") || num_str.starts_with("0X")) {
75+
// Parse hexadecimal and convert to decimal
76+
let hex_part = &num_str[2..];
77+
match i64::from_str_radix(hex_part, 16) {
78+
Ok(decimal_value) => {
79+
let final_value = if num.value.starts_with('-') {
80+
-decimal_value
81+
} else {
82+
decimal_value
83+
};
84+
serde_json::Value::Number(serde_json::Number::from(final_value))
85+
}
86+
Err(_) => serde_json::Value::String(num.value.to_string()),
87+
}
88+
} else {
89+
// Standard decimal number
90+
match serde_json::Number::from_str(num.value) {
91+
Ok(number) => serde_json::Value::Number(number),
92+
Err(_) => serde_json::Value::String(num.value.to_string()),
93+
}
94+
}
95+
}
7596
Value::Object(obj) => {
7697
let mut map = serde_json::map::Map::new();
7798
for prop in obj.properties {

src/cst/mod.rs

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,10 +1442,30 @@ impl CstNumberLit {
14421442
pub fn to_serde_value(&self) -> Option<serde_json::Value> {
14431443
use std::str::FromStr;
14441444
let raw = self.0.borrow().value.clone();
1445-
match serde_json::Number::from_str(&raw) {
1446-
Ok(number) => Some(serde_json::Value::Number(number)),
1447-
// If the number is invalid, return it as a string (same behavior as AST conversion)
1448-
Err(_) => Some(serde_json::Value::String(raw)),
1445+
1446+
// check if this is a hexadecimal literal (0x or 0X prefix)
1447+
let num_str = raw.trim_start_matches('-');
1448+
if num_str.len() > 2 && (num_str.starts_with("0x") || num_str.starts_with("0X")) {
1449+
// parse hexadecimal and convert to decimal
1450+
let hex_part = &num_str[2..];
1451+
match i64::from_str_radix(hex_part, 16) {
1452+
Ok(decimal_value) => {
1453+
let final_value = if raw.starts_with('-') {
1454+
-decimal_value
1455+
} else {
1456+
decimal_value
1457+
};
1458+
Some(serde_json::Value::Number(serde_json::Number::from(final_value)))
1459+
}
1460+
Err(_) => Some(serde_json::Value::String(raw)),
1461+
}
1462+
} else {
1463+
// standard decimal number
1464+
match serde_json::Number::from_str(&raw) {
1465+
Ok(number) => Some(serde_json::Value::Number(number)),
1466+
// if the number is invalid, return it as a string (same behavior as AST conversion)
1467+
Err(_) => Some(serde_json::Value::String(raw)),
1468+
}
14491469
}
14501470
}
14511471
}

src/parse_to_ast.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,4 +573,33 @@ mod tests {
573573
fn error_correct_line_column_unicode_width() {
574574
assert_has_strict_error(r#"["🧑‍🦰", ["#, "Unterminated array on line 1 column 10");
575575
}
576+
577+
#[test]
578+
fn it_should_parse_unquoted_keys_with_hex_and_trailing_comma() {
579+
let text = r#"{
580+
CP_CanFuncReqId: 0x7DF, // 2015
581+
}"#;
582+
{
583+
let parse_result = parse_to_ast(text, &Default::default(), &Default::default()).unwrap();
584+
585+
let value = parse_result.value.unwrap();
586+
let obj = value.as_object().unwrap();
587+
assert_eq!(obj.properties.len(), 1);
588+
assert_eq!(obj.properties[0].name.as_str(), "CP_CanFuncReqId");
589+
590+
let number_value = obj.properties[0].value.as_number_lit().unwrap();
591+
assert_eq!(number_value.value, "0x7DF");
592+
}
593+
#[cfg(feature = "serde")]
594+
{
595+
let value = crate::parse_to_serde_value(text, &Default::default()).unwrap().unwrap();
596+
// hexadecimal numbers are converted to decimal in serde output
597+
assert_eq!(
598+
value,
599+
serde_json::json!({
600+
"CP_CanFuncReqId": 2015
601+
})
602+
);
603+
}
604+
}
576605
}

src/scanner.rs

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,23 @@ impl<'a> Scanner<'a> {
160160

161161
if self.is_zero() {
162162
self.move_next_char();
163+
164+
// check for hexadecimal literal (0x or 0X)
165+
if matches!(self.current_char(), Some('x') | Some('X')) {
166+
self.move_next_char();
167+
168+
// must have at least one hex digit
169+
if !self.is_hex_digit() {
170+
return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigit));
171+
}
172+
173+
while self.is_hex_digit() {
174+
self.move_next_char();
175+
}
176+
177+
let end_byte_index = self.byte_index;
178+
return Ok(Token::Number(&self.file_text[start_byte_index..end_byte_index]));
179+
}
163180
} else if self.is_one_nine() {
164181
self.move_next_char();
165182
while self.is_digit() {
@@ -288,10 +305,12 @@ impl<'a> Scanner<'a> {
288305
let start_byte_index = self.byte_index;
289306

290307
while let Some(current_char) = self.current_char() {
291-
if current_char.is_whitespace() || current_char == '\r' || current_char == '\n' || current_char == ':' {
308+
// check for word terminators
309+
if current_char.is_whitespace() || current_char == ':' {
292310
break;
293311
}
294-
if !current_char.is_alphanumeric() && current_char != '-' {
312+
// validate that the character is allowed in a word literal
313+
if !current_char.is_alphanumeric() && current_char != '-' && current_char != '_' {
295314
return Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken));
296315
}
297316

@@ -382,6 +401,13 @@ impl<'a> Scanner<'a> {
382401
self.is_one_nine() || self.is_zero()
383402
}
384403

404+
fn is_hex_digit(&self) -> bool {
405+
match self.current_char() {
406+
Some(current_char) => current_char.is_ascii_hexdigit(),
407+
_ => false,
408+
}
409+
}
410+
385411
fn is_zero(&self) -> bool {
386412
self.current_char() == Some('0')
387413
}
@@ -496,6 +522,24 @@ mod tests {
496522
);
497523
}
498524

525+
#[test]
526+
fn it_tokenizes_hexadecimal_numbers() {
527+
assert_has_tokens(
528+
"0x7DF, 0xFF, 0x123ABC, 0xabc, 0X1F",
529+
vec![
530+
Token::Number("0x7DF"),
531+
Token::Comma,
532+
Token::Number("0xFF"),
533+
Token::Comma,
534+
Token::Number("0x123ABC"),
535+
Token::Comma,
536+
Token::Number("0xabc"),
537+
Token::Comma,
538+
Token::Number("0X1F"),
539+
],
540+
);
541+
}
542+
499543
#[test]
500544
fn it_errors_invalid_exponent() {
501545
assert_has_error(

src/serde.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,24 @@ mod tests {
8484

8585
assert_eq!(result, Some(SerdeValue::Object(expected_value)));
8686
}
87+
88+
#[test]
89+
fn it_should_parse_hexadecimal_numbers_to_decimal() {
90+
let result = parse_to_serde_value(
91+
r#"{
92+
"hex1": 0x7DF,
93+
"hex2": 0xFF,
94+
"hex3": 0x10
95+
}"#,
96+
&Default::default(),
97+
)
98+
.unwrap();
99+
100+
let mut expected_value = serde_json::map::Map::new();
101+
expected_value.insert("hex1".to_string(), SerdeValue::Number(serde_json::Number::from(2015)));
102+
expected_value.insert("hex2".to_string(), SerdeValue::Number(serde_json::Number::from(255)));
103+
expected_value.insert("hex3".to_string(), SerdeValue::Number(serde_json::Number::from(16)));
104+
105+
assert_eq!(result, Some(SerdeValue::Object(expected_value)));
106+
}
87107
}

0 commit comments

Comments
 (0)