From 5098104aaf4b8c084b71755cde66dc287ce43c80 Mon Sep 17 00:00:00 2001 From: Brandon Lucas Date: Sat, 14 Feb 2026 13:24:21 -0500 Subject: [PATCH] feat: implement ADT RC - pointer fields in algebraic data types ADT values with pointer fields (like recursive Tree types) now properly manage memory: - Assign unique type tags (starting at 100) to each ADT type - Track which ADTs have pointer fields that need cleanup - Generate lux_drop_adt() function with per-ADT drop logic - Allocate ADT pointer fields with lux_rc_alloc instead of malloc - Track ADT variables with pointer fields in scope - Emit field cleanup code at scope exit (switch on tag, decref fields) Test results: - ADT test: [RC] No leaks: 6 allocs, 6 frees - List test: [RC] No leaks: 31 allocs, 31 frees - Closure test: [RC] No leaks: 8 allocs, 8 frees - All 263 tests pass Remaining: early returns, complex conditionals. Co-Authored-By: Claude Opus 4.5 --- .gitignore | 2 + docs/C_BACKEND.md | 2 +- docs/REFERENCE_COUNTING.md | 8 +- src/codegen/c_backend.rs | 204 +++++++++++++++++++++++++++++++++++-- 4 files changed, 204 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index cb83c1c..02b54bb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ hello test_rc test_rc2 +test_adt_rc +test_closure_rc diff --git a/docs/C_BACKEND.md b/docs/C_BACKEND.md index ece905f..5c4970c 100644 --- a/docs/C_BACKEND.md +++ b/docs/C_BACKEND.md @@ -295,8 +295,8 @@ Inspired by Perceus (Koka), our RC system: - ✅ **Scope tracking** - compiler tracks RC variable lifetimes - ✅ **Automatic decref at scope exit** - verified leak-free - ✅ **Closure RC** - closures and environments are RC-managed +- ✅ **ADT RC** - pointer fields in ADTs are RC-managed - ⏳ Early return handling (decref before nested returns) -- ⏳ ADT RC (algebraic data types) - ⏳ Last-use optimization / reuse (FBIP) See [docs/REFERENCE_COUNTING.md](REFERENCE_COUNTING.md) for details. diff --git a/docs/REFERENCE_COUNTING.md b/docs/REFERENCE_COUNTING.md index 7a7b385..5a16046 100644 --- a/docs/REFERENCE_COUNTING.md +++ b/docs/REFERENCE_COUNTING.md @@ -17,6 +17,7 @@ The RC system is now functional for lists and boxed values. - List operations incref shared elements - **Closures and environments** - RC-managed with automatic cleanup - **Inline lambda cleanup** - temporary closures freed after use +- **ADT pointer fields** - RC-allocated and cleaned up at scope exit - **Scope tracking** - compiler tracks RC variable lifetimes - **Automatic decref at scope exit** - variables are freed when out of scope - **Memory tracking** - debug mode reports allocs/frees at program exit @@ -29,7 +30,6 @@ The RC system is now functional for lists and boxed values. ### What's NOT Yet Implemented - Early return handling (decref before return in nested scopes) - Conditional branch handling (complex if/else patterns) -- ADT RC ## The Problem @@ -409,7 +409,7 @@ Rust's ownership system is fundamentally different: - Environments allocated with `lux_rc_alloc(sizeof(LuxEnv_N), LUX_TAG_ENV)` - Inline lambdas freed after use in List operations -2. **ADT RC** - Algebraic data types with heap fields +2. ~~**ADT RC**~~ ✅ DONE - Algebraic data types with heap fields - Track which variants contain RC fields - Generate drop functions for each ADT - ~100 lines @@ -448,14 +448,14 @@ Rust's ownership system is fundamentally different: | Phase | Description | Lines | Priority | Status | |-------|-------------|-------|----------|--------| | A1 | Closure RC | ~50 | P0 | ✅ Done | -| A2 | ADT RC | ~100 | P1 - ADTs leak | Pending | +| A2 | ADT RC | ~150 | P1 | ✅ Done | | A3 | Early returns | ~30 | P1 - Edge cases | Pending | | A4 | Conditionals | ~50 | P2 - Uncommon | Pending | | B1 | Last-use opt | ~200 | P3 - Performance | Pending | | B2 | Reuse (FBIP) | ~300 | P3 - Performance | Pending | | B3 | Drop special | ~100 | P3 - Performance | Pending | -**Phase A remaining: ~180 lines** - Gets us to "no leaks" +**Phase A remaining: ~80 lines** - Gets us to "no leaks" **Phase B total: ~600 lines** - Gets us to Koka-level performance ### Cycle Detection diff --git a/src/codegen/c_backend.rs b/src/codegen/c_backend.rs index 4a9721f..3b68b65 100644 --- a/src/codegen/c_backend.rs +++ b/src/codegen/c_backend.rs @@ -68,6 +68,7 @@ struct ClosureInfo { struct RcVariable { name: String, // Variable name in generated C code c_type: String, // C type (for documentation/debugging) + adt_type_name: Option, // If Some, this is an ADT that needs field cleanup } impl std::fmt::Display for CGenError { @@ -110,6 +111,12 @@ pub struct CBackend { rc_scopes: Vec>, /// Whether to emit memory tracking code for debugging debug_rc: bool, + /// Type tags for ADT types (starting at 100) + adt_type_tags: HashMap, + /// Next available ADT type tag + next_adt_tag: i32, + /// ADT types that have pointer fields (need drop functions) + adt_with_pointers: HashSet, } impl CBackend { @@ -130,6 +137,9 @@ impl CBackend { has_evidence: false, rc_scopes: Vec::new(), debug_rc: true, // Enable memory tracking for now + adt_type_tags: HashMap::new(), + next_adt_tag: 100, // ADT tags start at 100 + adt_with_pointers: HashSet::new(), } } @@ -163,6 +173,9 @@ impl CBackend { // Emit type definitions self.emit_type_definitions(program)?; + // Emit ADT drop function (after type definitions so we know the ADT structure) + self.emit_adt_drop_function(); + // Emit forward declarations for regular functions self.emit_forward_declarations(program)?; @@ -413,8 +426,9 @@ impl CBackend { self.writeln("// Get the RC header from an object pointer"); self.writeln("#define LUX_RC_HEADER(ptr) (((LuxRcHeader*)(ptr)) - 1)"); self.writeln(""); - self.writeln("// Forward declaration of polymorphic drop"); + self.writeln("// Forward declarations of drop functions"); self.writeln("static void lux_drop(void* ptr, int32_t tag);"); + self.writeln("static void lux_drop_adt(void* ptr, int32_t tag);"); self.writeln(""); // Memory tracking counters (must be before lux_rc_alloc which uses them) @@ -1117,8 +1131,8 @@ impl CBackend { self.writeln(" // For now, just free - proper drop needs type info"); self.writeln(" break;"); self.writeln(" default:"); - self.writeln(" // ADT types (tag >= 100) - handled by generated drop functions"); - self.writeln(" // For now, just free the object"); + self.writeln(" // ADT types (tag >= 100) - call generated drop function"); + self.writeln(" lux_drop_adt(ptr, tag);"); self.writeln(" break;"); self.writeln(" }"); self.writeln(" // Free the object and its RC header"); @@ -1146,6 +1160,88 @@ impl CBackend { Ok(()) } + /// Emit the lux_drop_adt function that handles dropping ADT types + fn emit_adt_drop_function(&mut self) { + self.writeln("// === ADT Drop Function ==="); + self.writeln("// Generated based on ADT type definitions."); + self.writeln("static void lux_drop_adt(void* ptr, int32_t tag) {"); + self.indent += 1; + + if self.adt_with_pointers.is_empty() { + // No ADTs with pointer fields - nothing to do + self.writeln("// No ADTs with pointer fields"); + self.writeln("(void)ptr; (void)tag; // Unused"); + } else { + self.writeln("switch (tag) {"); + self.indent += 1; + + // Generate cases for each ADT type with pointer fields + // Clone to avoid borrow issues + let adt_tags = self.adt_type_tags.clone(); + let variant_field_types = self.variant_field_types.clone(); + + for (adt_name, tag) in &adt_tags { + if !self.adt_with_pointers.contains(adt_name) { + continue; + } + + self.writeln(&format!("case {}: {{", tag)); + self.indent += 1; + self.writeln(&format!("{}* adt = ({}*)ptr;", adt_name, adt_name)); + + // Find all variants of this ADT and their pointer fields + let mut variant_cases: Vec<(String, Vec<(usize, String)>)> = Vec::new(); + for ((type_name, variant_name), field_types) in &variant_field_types { + if type_name != adt_name { + continue; + } + let pointer_fields: Vec<(usize, String)> = field_types.iter() + .enumerate() + .filter(|(_, t)| t.ends_with('*')) + .map(|(i, t)| (i, t.clone())) + .collect(); + if !pointer_fields.is_empty() { + variant_cases.push((variant_name.clone(), pointer_fields)); + } + } + + if !variant_cases.is_empty() { + self.writeln("switch (adt->tag) {"); + self.indent += 1; + + for (variant_name, pointer_fields) in &variant_cases { + let variant_upper = variant_name.to_uppercase(); + let variant_lower = variant_name.to_lowercase(); + self.writeln(&format!("case {}_TAG_{}: {{", adt_name, variant_upper)); + self.indent += 1; + for (field_idx, _field_type) in pointer_fields { + self.writeln(&format!("lux_decref(adt->data.{}.field{});", variant_lower, field_idx)); + } + self.writeln("break;"); + self.indent -= 1; + self.writeln("}"); + } + + self.writeln("default: break;"); + self.indent -= 1; + self.writeln("}"); + } + + self.writeln("break;"); + self.indent -= 1; + self.writeln("}"); + } + + self.writeln("default: break;"); + self.indent -= 1; + self.writeln("}"); + } + + self.indent -= 1; + self.writeln("}"); + self.writeln(""); + } + fn emit_type_def(&mut self, type_decl: &TypeDecl) -> Result<(), CGenError> { let name = &type_decl.name.name; @@ -1186,12 +1282,25 @@ impl CBackend { } VariantFields::Unit => vec![], }; + // Track if this variant has pointer fields + let has_pointers = field_types.iter().any(|t| t.ends_with('*')); + if has_pointers { + self.adt_with_pointers.insert(name.clone()); + } + self.variant_field_types.insert( (name.clone(), variant.name.name.clone()), field_types ); } + // Assign a type tag to this ADT + if !self.adt_type_tags.contains_key(name) { + let tag = self.next_adt_tag; + self.adt_type_tags.insert(name.clone(), tag); + self.next_adt_tag += 1; + } + // Forward declare the main struct for recursive types self.writeln(&format!("struct {};", name)); self.writeln(&format!("typedef struct {} {};", name, name)); @@ -1494,15 +1603,19 @@ impl CBackend { let arg_values = arg_values?; // Build field initializers, handling pointer fields + // Clone adt_type_tags to avoid borrow issues + let adt_tags = self.adt_type_tags.clone(); let field_inits: Vec = arg_values.iter() .enumerate() .map(|(i, v)| { let field_type = field_types.get(i).map(|s| s.as_str()).unwrap_or("LuxInt"); if field_type.ends_with('*') { - // Pointer field - allocate and copy + // Pointer field - allocate with RC and copy let base_type = &field_type[..field_type.len()-1]; - format!(".field{} = ({}*)memcpy(malloc(sizeof({})), &({}), sizeof({}))", - i, base_type, base_type, v, base_type) + // Look up type tag for this ADT (default to 100 if not found) + let type_tag = adt_tags.get(base_type).copied().unwrap_or(100); + format!(".field{} = ({}*)memcpy(lux_rc_alloc(sizeof({}), {}), &({}), sizeof({}))", + i, base_type, base_type, type_tag, v, base_type) } else { format!(".field{} = {}", i, v) } @@ -1627,6 +1740,9 @@ impl CBackend { // Register RC variable if it creates a new RC value if self.expr_creates_rc_value(value) { self.register_rc_var(&name.name, &typ); + } else if let Some(adt_name) = self.expr_creates_adt_with_pointers(value) { + // ADT with pointer fields - needs field cleanup at scope exit + self.register_rc_var_with_adt(&name.name, &typ, Some(adt_name)); } } Statement::Expr(e) => { @@ -2633,19 +2749,72 @@ impl CBackend { /// Pop the current scope and emit decref calls for all variables fn pop_rc_scope(&mut self) { if let Some(scope) = self.rc_scopes.pop() { + // Clone data we need to avoid borrow issues + let variant_field_types = self.variant_field_types.clone(); + // Decref in reverse order (LIFO - last allocated, first freed) for var in scope.iter().rev() { - self.writeln(&format!("lux_decref({});", var.name)); + if let Some(adt_name) = &var.adt_type_name { + // ADT with pointer fields - need to decref the fields + self.emit_adt_field_cleanup(&var.name, adt_name, &variant_field_types); + } else { + // Regular RC variable - just decref + self.writeln(&format!("lux_decref({});", var.name)); + } } } } + /// Emit cleanup code for an ADT variable's pointer fields + fn emit_adt_field_cleanup(&mut self, var_name: &str, adt_name: &str, variant_field_types: &HashMap<(String, String), Vec>) { + // Find all variants of this ADT with pointer fields + let mut has_cleanup = false; + + for ((type_name, variant_name), field_types) in variant_field_types { + if type_name != adt_name { + continue; + } + let pointer_fields: Vec<(usize, &String)> = field_types.iter() + .enumerate() + .filter(|(_, t)| t.ends_with('*')) + .collect(); + if !pointer_fields.is_empty() { + if !has_cleanup { + self.writeln(&format!("switch ({}.tag) {{", var_name)); + self.indent += 1; + has_cleanup = true; + } + let variant_upper = variant_name.to_uppercase(); + let variant_lower = variant_name.to_lowercase(); + self.writeln(&format!("case {}_TAG_{}: {{", adt_name, variant_upper)); + self.indent += 1; + for (field_idx, _) in &pointer_fields { + self.writeln(&format!("lux_decref({}.data.{}.field{});", var_name, variant_lower, field_idx)); + } + self.writeln("break;"); + self.indent -= 1; + self.writeln("}"); + } + } + + if has_cleanup { + self.writeln("default: break;"); + self.indent -= 1; + self.writeln("}"); + } + } + /// Register an RC-managed variable in the current scope fn register_rc_var(&mut self, name: &str, c_type: &str) { + self.register_rc_var_with_adt(name, c_type, None); + } + + fn register_rc_var_with_adt(&mut self, name: &str, c_type: &str, adt_type_name: Option) { if let Some(scope) = self.rc_scopes.last_mut() { scope.push(RcVariable { name: name.to_string(), c_type: c_type.to_string(), + adt_type_name, }); } } @@ -2718,6 +2887,27 @@ impl CBackend { } } + /// Check if an expression creates an ADT with pointer fields that need cleanup + /// Returns Some(adt_type_name) if so, None otherwise + fn expr_creates_adt_with_pointers(&self, expr: &Expr) -> Option { + match expr { + // ADT constructor call + Expr::Call { func, .. } => { + if let Expr::Var(ident) = func.as_ref() { + // Check if this is an ADT constructor + if let Some(adt_name) = self.variant_to_type.get(&ident.name) { + // Check if this ADT has pointer fields + if self.adt_with_pointers.contains(adt_name) { + return Some(adt_name.clone()); + } + } + } + None + } + _ => None, + } + } + /// Check if an expression is a call to a function that returns a closure fn is_closure_returning_call(&self, expr: &Expr) -> bool { match expr {