feat: implement better error messages with 'Did you mean?' suggestions
Add Levenshtein distance-based similarity matching for undefined variables, unknown types, unknown effects, and unknown traits. When a name is not found, the error now suggests similar names within edit distance 2. Changes: - Add levenshtein_distance() function to diagnostics module - Add find_similar_names() and format_did_you_mean() helpers - Update typechecker to suggest similar names for: - Undefined variables - Unknown types - Unknown effects - Unknown traits - Add 17 new tests for similarity matching Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -84,6 +84,80 @@ impl Diagnostic {
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the Levenshtein edit distance between two strings
|
||||
pub fn levenshtein_distance(a: &str, b: &str) -> usize {
|
||||
let a_len = a.chars().count();
|
||||
let b_len = b.chars().count();
|
||||
|
||||
if a_len == 0 {
|
||||
return b_len;
|
||||
}
|
||||
if b_len == 0 {
|
||||
return a_len;
|
||||
}
|
||||
|
||||
let a_chars: Vec<char> = a.chars().collect();
|
||||
let b_chars: Vec<char> = b.chars().collect();
|
||||
|
||||
let mut matrix = vec![vec![0usize; b_len + 1]; a_len + 1];
|
||||
|
||||
for i in 0..=a_len {
|
||||
matrix[i][0] = i;
|
||||
}
|
||||
for j in 0..=b_len {
|
||||
matrix[0][j] = j;
|
||||
}
|
||||
|
||||
for i in 1..=a_len {
|
||||
for j in 1..=b_len {
|
||||
let cost = if a_chars[i - 1] == b_chars[j - 1] { 0 } else { 1 };
|
||||
matrix[i][j] = std::cmp::min(
|
||||
std::cmp::min(
|
||||
matrix[i - 1][j] + 1, // deletion
|
||||
matrix[i][j - 1] + 1, // insertion
|
||||
),
|
||||
matrix[i - 1][j - 1] + cost, // substitution
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
matrix[a_len][b_len]
|
||||
}
|
||||
|
||||
/// Find similar names from a list of candidates
|
||||
/// Returns names within the given edit distance, sorted by similarity
|
||||
pub fn find_similar_names<'a>(
|
||||
target: &str,
|
||||
candidates: impl IntoIterator<Item = &'a str>,
|
||||
max_distance: usize,
|
||||
) -> Vec<String> {
|
||||
let mut matches: Vec<(String, usize)> = candidates
|
||||
.into_iter()
|
||||
.filter(|&c| c != target) // Don't suggest the same name
|
||||
.map(|c| (c.to_string(), levenshtein_distance(target, c)))
|
||||
.filter(|(_, dist)| *dist <= max_distance && *dist > 0)
|
||||
.collect();
|
||||
|
||||
// Sort by distance (closest first), then alphabetically
|
||||
matches.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
|
||||
|
||||
// Return just the names, limited to top 3
|
||||
matches.into_iter().take(3).map(|(name, _)| name).collect()
|
||||
}
|
||||
|
||||
/// Format "Did you mean?" hint from suggestions
|
||||
pub fn format_did_you_mean(suggestions: &[String]) -> Option<String> {
|
||||
match suggestions.len() {
|
||||
0 => None,
|
||||
1 => Some(format!("Did you mean '{}'?", suggestions[0])),
|
||||
2 => Some(format!("Did you mean '{}' or '{}'?", suggestions[0], suggestions[1])),
|
||||
_ => Some(format!(
|
||||
"Did you mean '{}', '{}', or '{}'?",
|
||||
suggestions[0], suggestions[1], suggestions[2]
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts byte offset to (line, column) - 1-indexed
|
||||
pub fn offset_to_line_col(source: &str, offset: usize) -> (usize, usize) {
|
||||
let mut line = 1;
|
||||
@@ -425,4 +499,76 @@ mod tests {
|
||||
assert!(output.contains("+"));
|
||||
assert!(output.contains("^"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_levenshtein_distance_identical() {
|
||||
assert_eq!(super::levenshtein_distance("hello", "hello"), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_levenshtein_distance_one_char() {
|
||||
assert_eq!(super::levenshtein_distance("hello", "hallo"), 1);
|
||||
assert_eq!(super::levenshtein_distance("cat", "car"), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_levenshtein_distance_insertion() {
|
||||
assert_eq!(super::levenshtein_distance("cat", "cats"), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_levenshtein_distance_deletion() {
|
||||
assert_eq!(super::levenshtein_distance("cats", "cat"), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_levenshtein_distance_empty() {
|
||||
assert_eq!(super::levenshtein_distance("", "hello"), 5);
|
||||
assert_eq!(super::levenshtein_distance("hello", ""), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_similar_names_basic() {
|
||||
let candidates = vec!["println", "print", "printf", "sprint"];
|
||||
let similar = super::find_similar_names("prnt", candidates.into_iter(), 2);
|
||||
assert!(similar.contains(&"print".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_similar_names_no_match() {
|
||||
let candidates = vec!["apple", "banana", "cherry"];
|
||||
let similar = super::find_similar_names("xyz", candidates.into_iter(), 2);
|
||||
assert!(similar.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_similar_names_excludes_exact() {
|
||||
let candidates = vec!["hello", "hallo", "world"];
|
||||
let similar = super::find_similar_names("hello", candidates.into_iter(), 2);
|
||||
assert!(!similar.contains(&"hello".to_string()));
|
||||
assert!(similar.contains(&"hallo".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_did_you_mean_none() {
|
||||
assert_eq!(super::format_did_you_mean(&[]), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_did_you_mean_one() {
|
||||
let hint = super::format_did_you_mean(&["print".to_string()]);
|
||||
assert_eq!(hint, Some("Did you mean 'print'?".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_did_you_mean_two() {
|
||||
let hint = super::format_did_you_mean(&["print".to_string(), "println".to_string()]);
|
||||
assert_eq!(hint, Some("Did you mean 'print' or 'println'?".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_did_you_mean_three() {
|
||||
let hint = super::format_did_you_mean(&["a".to_string(), "b".to_string(), "c".to_string()]);
|
||||
assert_eq!(hint, Some("Did you mean 'a', 'b', or 'c'?".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user