diff --git a/src/lexer.rs b/src/lexer.rs index 1e370b8..db0c98f 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -493,6 +493,77 @@ impl<'a> Lexer<'a> { Some('"') => '"', Some('0') => '\0', Some('\'') => '\'', + Some('x') => { + // Hex escape \xNN + let h1 = self.advance().and_then(|c| c.to_digit(16)); + let h2 = self.advance().and_then(|c| c.to_digit(16)); + match (h1, h2) { + (Some(d1), Some(d2)) => { + let byte = (d1 * 16 + d2) as u8; + byte as char + } + _ => { + return Err(LexError { + message: "Invalid hex escape sequence: expected \\xNN".into(), + span: Span::new(escape_start - 1, self.pos), + }); + } + } + } + Some('u') => { + // Unicode escape \u{NNNN} or \uNNNN + if self.peek() == Some('{') { + self.advance(); // consume '{' + let mut hex = String::new(); + while let Some(c) = self.peek() { + if c == '}' { + self.advance(); + break; + } + if c.is_ascii_hexdigit() { + hex.push(c); + self.advance(); + } else { + return Err(LexError { + message: format!("Invalid unicode escape: expected hex digit, got '{}'", c), + span: Span::new(escape_start - 1, self.pos), + }); + } + } + match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) { + Some(c) => c, + None => { + return Err(LexError { + message: format!("Invalid unicode escape: \\u{{{}}}", hex), + span: Span::new(escape_start - 1, self.pos), + }); + } + } + } else { + // \uNNNN format (4 hex digits) + let mut hex = String::new(); + for _ in 0..4 { + match self.advance() { + Some(c) if c.is_ascii_hexdigit() => hex.push(c), + _ => { + return Err(LexError { + message: "Invalid unicode escape: expected 4 hex digits".into(), + span: Span::new(escape_start - 1, self.pos), + }); + } + } + } + match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) { + Some(c) => c, + None => { + return Err(LexError { + message: format!("Invalid unicode escape: \\u{}", hex), + span: Span::new(escape_start - 1, self.pos), + }); + } + } + } + } Some(c) => { return Err(LexError { message: format!("Invalid escape sequence: \\{}", c),