feat: add unicode escape sequences to lexer

Support \xNN (hex) and \u{NNNN} (unicode) escape sequences in strings.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 03:53:49 -05:00
parent f33ea5f7f7
commit 6511c289f0

View File

@@ -493,6 +493,77 @@ impl<'a> Lexer<'a> {
Some('"') => '"',
Some('0') => '\0',
Some('\'') => '\'',
Some('x') => {
// Hex escape \xNN
let h1 = self.advance().and_then(|c| c.to_digit(16));
let h2 = self.advance().and_then(|c| c.to_digit(16));
match (h1, h2) {
(Some(d1), Some(d2)) => {
let byte = (d1 * 16 + d2) as u8;
byte as char
}
_ => {
return Err(LexError {
message: "Invalid hex escape sequence: expected \\xNN".into(),
span: Span::new(escape_start - 1, self.pos),
});
}
}
}
Some('u') => {
// Unicode escape \u{NNNN} or \uNNNN
if self.peek() == Some('{') {
self.advance(); // consume '{'
let mut hex = String::new();
while let Some(c) = self.peek() {
if c == '}' {
self.advance();
break;
}
if c.is_ascii_hexdigit() {
hex.push(c);
self.advance();
} else {
return Err(LexError {
message: format!("Invalid unicode escape: expected hex digit, got '{}'", c),
span: Span::new(escape_start - 1, self.pos),
});
}
}
match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) {
Some(c) => c,
None => {
return Err(LexError {
message: format!("Invalid unicode escape: \\u{{{}}}", hex),
span: Span::new(escape_start - 1, self.pos),
});
}
}
} else {
// \uNNNN format (4 hex digits)
let mut hex = String::new();
for _ in 0..4 {
match self.advance() {
Some(c) if c.is_ascii_hexdigit() => hex.push(c),
_ => {
return Err(LexError {
message: "Invalid unicode escape: expected 4 hex digits".into(),
span: Span::new(escape_start - 1, self.pos),
});
}
}
}
match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) {
Some(c) => c,
None => {
return Err(LexError {
message: format!("Invalid unicode escape: \\u{}", hex),
span: Span::new(escape_start - 1, self.pos),
});
}
}
}
}
Some(c) => {
return Err(LexError {
message: format!("Invalid escape sequence: \\{}", c),