feat: add unicode escape sequences to lexer
Support \xNN (hex) and \u{NNNN} (unicode) escape sequences in strings.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
71
src/lexer.rs
71
src/lexer.rs
@@ -493,6 +493,77 @@ impl<'a> Lexer<'a> {
|
||||
Some('"') => '"',
|
||||
Some('0') => '\0',
|
||||
Some('\'') => '\'',
|
||||
Some('x') => {
|
||||
// Hex escape \xNN
|
||||
let h1 = self.advance().and_then(|c| c.to_digit(16));
|
||||
let h2 = self.advance().and_then(|c| c.to_digit(16));
|
||||
match (h1, h2) {
|
||||
(Some(d1), Some(d2)) => {
|
||||
let byte = (d1 * 16 + d2) as u8;
|
||||
byte as char
|
||||
}
|
||||
_ => {
|
||||
return Err(LexError {
|
||||
message: "Invalid hex escape sequence: expected \\xNN".into(),
|
||||
span: Span::new(escape_start - 1, self.pos),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Some('u') => {
|
||||
// Unicode escape \u{NNNN} or \uNNNN
|
||||
if self.peek() == Some('{') {
|
||||
self.advance(); // consume '{'
|
||||
let mut hex = String::new();
|
||||
while let Some(c) = self.peek() {
|
||||
if c == '}' {
|
||||
self.advance();
|
||||
break;
|
||||
}
|
||||
if c.is_ascii_hexdigit() {
|
||||
hex.push(c);
|
||||
self.advance();
|
||||
} else {
|
||||
return Err(LexError {
|
||||
message: format!("Invalid unicode escape: expected hex digit, got '{}'", c),
|
||||
span: Span::new(escape_start - 1, self.pos),
|
||||
});
|
||||
}
|
||||
}
|
||||
match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Err(LexError {
|
||||
message: format!("Invalid unicode escape: \\u{{{}}}", hex),
|
||||
span: Span::new(escape_start - 1, self.pos),
|
||||
});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// \uNNNN format (4 hex digits)
|
||||
let mut hex = String::new();
|
||||
for _ in 0..4 {
|
||||
match self.advance() {
|
||||
Some(c) if c.is_ascii_hexdigit() => hex.push(c),
|
||||
_ => {
|
||||
return Err(LexError {
|
||||
message: "Invalid unicode escape: expected 4 hex digits".into(),
|
||||
span: Span::new(escape_start - 1, self.pos),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Err(LexError {
|
||||
message: format!("Invalid unicode escape: \\u{}", hex),
|
||||
span: Span::new(escape_start - 1, self.pos),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(c) => {
|
||||
return Err(LexError {
|
||||
message: format!("Invalid escape sequence: \\{}", c),
|
||||
|
||||
Reference in New Issue
Block a user