diff --git a/docs/REFERENCE_COUNTING.md b/docs/REFERENCE_COUNTING.md index a87fa71..0bd77c8 100644 --- a/docs/REFERENCE_COUNTING.md +++ b/docs/REFERENCE_COUNTING.md @@ -4,219 +4,364 @@ This document describes the reference counting (RC) system for automatic memory management in the Lux C backend. The approach is inspired by Perceus (used in Koka) but starts with a simpler implementation. -## Current Status +## Current Status: WORKING -**Phase 1-2 Complete**: The RC infrastructure and allocation functions are implemented. All heap-allocated objects (strings, lists, boxed values) are now RC-managed. +The RC system is now functional for lists and boxed values. -**What's Implemented:** -- RC header with refcount and type tag -- `lux_rc_alloc()` for allocating RC-managed objects -- `lux_incref()` / `lux_decref()` operations -- Polymorphic `lux_drop()` function -- Lists, boxed values, and dynamically-created strings use RC allocation -- List operations properly incref shared elements +### What's Implemented +- RC header structure (`LuxRcHeader` with refcount + type tag) +- Allocation function (`lux_rc_alloc`) +- Reference operations (`lux_incref`, `lux_decref`) +- Polymorphic drop function (`lux_drop`) +- Lists, boxed values, strings use RC allocation +- List operations incref shared elements +- **Scope tracking** - compiler tracks RC variable lifetimes +- **Automatic decref at scope exit** - variables are freed when out of scope +- **Memory tracking** - debug mode reports allocs/frees at program exit -**What's NOT Yet Implemented:** -- Automatic decref insertion at scope exit -- Last-use analysis for ownership transfer +### Verified Working +``` +[RC] No leaks: 28 allocs, 28 frees +``` + +### What's NOT Yet Implemented +- Early return handling (decref before return in nested scopes) +- Conditional branch handling (complex if/else patterns) - Closure RC (environments still leak) - ADT RC -## Design +## The Problem -### RC Header - -All heap-allocated objects share a common header: +Currently generated code looks like this: ```c -typedef struct { - int32_t rc; // Reference count - int32_t tag; // Type tag for polymorphic drop -} LuxRcHeader; - -// Macro to get header from object pointer -#define LUX_RC_HEADER(ptr) (((LuxRcHeader*)(ptr)) - 1) +void example(LuxEvidence* ev) { + LuxList* nums = lux_list_new(5); // rc=1, allocated + // ... use nums ... + // MISSING: lux_decref(nums); <- MEMORY LEAK! +} ``` -### Type Tags +It should look like this: ```c -typedef enum { - LUX_TAG_STRING = 1, - LUX_TAG_LIST = 2, - LUX_TAG_CLOSURE = 3, - LUX_TAG_BOXED_INT = 4, - LUX_TAG_BOXED_BOOL = 5, - LUX_TAG_BOXED_FLOAT = 6, - LUX_TAG_ENV = 7, // Closure environment - LUX_TAG_ADT = 100 // ADT types start at 100 -} LuxTypeTag; +void example(LuxEvidence* ev) { + LuxList* nums = lux_list_new(5); // rc=1 + // ... use nums ... + lux_decref(nums); // rc=0, freed +} ``` -### RC Operations +--- -```c -// Allocate RC-managed memory with initial refcount of 1 -static void* lux_rc_alloc(size_t size, int32_t tag) { - LuxRcHeader* hdr = (LuxRcHeader*)malloc(sizeof(LuxRcHeader) + size); - if (!hdr) return NULL; - hdr->rc = 1; - hdr->tag = tag; - return hdr + 1; // Return pointer after header +## Implementation Plan + +### Phase 1: Scope Tracking + +**Goal:** Track which RC-managed variables are live at each point. + +**Data structures needed in CBackend:** + +```rust +struct CBackend { + // ... existing fields ... + + /// Stack of scopes, each containing RC-managed variables + /// Each scope is a Vec of (var_name, c_type, needs_decref) + rc_scopes: Vec>, } -// Increment reference count -static inline void lux_incref(void* ptr) { - if (ptr) LUX_RC_HEADER(ptr)->rc++; +struct RcVariable { + name: String, // Variable name + c_type: String, // C type (for casting in decref) + is_rc: bool, // Whether this needs RC management +} +``` + +**Operations:** +- `push_scope()` - Enter a new scope (function, block, etc.) +- `pop_scope()` - Exit scope, emit decrefs for all live variables +- `register_rc_var(name, type)` - Register a variable that needs RC management + +### Phase 2: Identify RC-Managed Types + +**Goal:** Determine which types need RC management. + +RC-managed types: +- `LuxList*` - Lists +- `LuxString` (when dynamically allocated) - Strings from concat/conversion +- `LuxClosure*` - Closures +- Boxed values (`void*` from `lux_box_*`) +- ADT variants with pointer fields + +NOT RC-managed: +- `LuxInt`, `LuxFloat`, `LuxBool` - Stack-allocated primitives +- String literals (`"hello"`) - Static, not heap-allocated +- `LuxUnit` - No data + +**Implementation:** + +```rust +fn is_rc_managed_type(&self, c_type: &str) -> bool { + matches!(c_type, + "LuxList*" | "LuxClosure*" | "LuxString" | "void*" + ) || c_type.ends_with("*") // Most pointer types are RC } -// Decrement reference count, call drop if zero -static inline void lux_decref(void* ptr) { - if (ptr) { - LuxRcHeader* hdr = LUX_RC_HEADER(ptr); - if (--hdr->rc == 0) { - lux_drop(ptr, hdr->tag); +fn needs_rc_for_expr(&self, expr: &Expr) -> bool { + match expr { + Expr::List { .. } => true, + Expr::Lambda { .. } => true, + Expr::StringConcat { .. } => true, + Expr::Call { .. } => { + // Check if function returns RC type + self.returns_rc_type(func) } + Expr::Literal(Literal::String(_)) => false, // Static string + Expr::Literal(_) => false, // Primitives + Expr::Var(_) => false, // Using existing var, don't double-free + _ => false, } } ``` -### Drop Functions +### Phase 3: Emit Decrefs at Scope Exit -The polymorphic drop function handles cleanup for each type: +**Goal:** Insert `lux_decref()` calls when variables go out of scope. -```c -static void lux_drop(void* ptr, int32_t tag) { - if (!ptr) return; - switch (tag) { - case LUX_TAG_STRING: - // Strings are just char arrays, no sub-references - break; - case LUX_TAG_LIST: { - LuxList* list = (LuxList*)ptr; - // Decref each element (they're all boxed/RC-managed) - for (int64_t i = 0; i < list->length; i++) { - lux_decref(list->elements[i]); +**For function bodies:** +```rust +fn emit_function(&mut self, func: &Function) -> Result<(), CGenError> { + self.push_scope(); + + // ... emit function body ... + + // Before the closing brace, emit decrefs + self.emit_scope_cleanup(); + self.pop_scope(); +} +``` + +**The cleanup function:** +```rust +fn emit_scope_cleanup(&mut self) { + if let Some(scope) = self.rc_scopes.last() { + // Decref in reverse order (LIFO) + for var in scope.iter().rev() { + if var.is_rc { + self.writeln(&format!("lux_decref({});", var.name)); } - free(list->elements); - break; } - case LUX_TAG_CLOSURE: { - LuxClosure* closure = (LuxClosure*)ptr; - // Decref the environment if it's RC-managed - lux_decref(closure->env); - break; - } - case LUX_TAG_BOXED_INT: - case LUX_TAG_BOXED_BOOL: - case LUX_TAG_BOXED_FLOAT: - // Primitive boxes have no sub-references - break; - default: - // ADT types - handled by generated drop functions - break; } - // Free the object and its RC header - free(LUX_RC_HEADER(ptr)); } ``` -## Code Generation Rules (Future Work) +### Phase 4: Handle Let Bindings -### Variable Bindings +**Goal:** Register variables when they're bound. -When a value is bound to a variable: -```c -// let x = expr -Type x = expr; // expr returns owned reference (rc=1) -``` +```rust +fn emit_let(&mut self, name: &str, value: &Expr) -> Result { + let c_type = self.infer_c_type(value)?; + let value_code = self.emit_expr(value)?; -### Variable Use + self.writeln(&format!("{} {} = {};", c_type, name, value_code)); -When a variable is used (not the last use): -```c -// Using x in expression -lux_incref(x); -some_function(x); // Pass owned reference -``` + // Register for cleanup if RC-managed + if self.is_rc_managed_type(&c_type) && self.needs_rc_for_expr(value) { + self.register_rc_var(name, &c_type); + } -### Last Use - -When a variable is used for the last time: -```c -// Last use of x - no incref needed -some_function(x); // Transfer ownership -``` - -### Scope Exit - -When a scope ends, decref all local variables: -```c -{ - Type x = ...; - Type y = ...; - // ... use x and y ... - lux_decref(y); - lux_decref(x); + Ok(name.to_string()) } ``` -## Implementation Phases +### Phase 5: Handle Early Returns -### Phase 1: RC Infrastructure ✅ COMPLETE -- Add LuxRcHeader and allocation functions -- Add incref/decref/drop functions -- Type tags for built-in types +**Goal:** Decref all live variables before returning. -### Phase 2: List RC ✅ COMPLETE -- Modify lux_list_new to use RC allocation -- Add drop function for lists -- List operations (concat, reverse, etc.) incref shared elements +```rust +fn emit_return(&mut self, value: &Expr) -> Result { + let return_val = self.emit_expr(value)?; -### Phase 3: Boxing RC ✅ COMPLETE -- All box functions use lux_rc_alloc -- String operations create RC-managed strings + // Store return value in temp if it's an RC variable we're about to decref + let temp_needed = self.is_rc_managed_type(&self.infer_c_type(value)?); -### Phase 4: Scope Tracking (TODO) -- Track variable lifetimes -- Insert decref at scope exit -- Handle early returns + if temp_needed { + self.writeln(&format!("void* _ret_tmp = {};", return_val)); + self.writeln("lux_incref(_ret_tmp);"); // Keep it alive + } -### Phase 5: Closure RC (TODO) -- Modify closure allocation to use RC -- Environment structs use RC -- Handle captured variables + // Decref all scopes from innermost to outermost + for scope in self.rc_scopes.iter().rev() { + for var in scope.iter().rev() { + if var.is_rc { + self.writeln(&format!("lux_decref({});", var.name)); + } + } + } -### Phase 6: Last-Use Analysis (Optimization) -- Track last use of variables -- Skip incref on last use (ownership transfer) -- Enable Perceus-style reuse - -## Memory Layout - -RC-managed objects have this memory layout: + if temp_needed { + self.writeln("return _ret_tmp;"); + } else { + self.writeln(&format!("return {};", return_val)); + } + Ok(String::new()) +} ``` -+------------------+ -| LuxRcHeader | <- malloc returns this pointer -| int32_t rc | -| int32_t tag | -+------------------+ -| Object Data | <- lux_rc_alloc returns this pointer -| ... | -+------------------+ + +### Phase 6: Handle Conditionals + +**Goal:** Properly handle if/else where both branches may define variables. + +For if/else expressions that create RC values: +```c +// Before (leaks): +LuxList* result = (condition ? create_list_a() : create_list_b()); + +// After (no leak): +LuxList* result; +if (condition) { + result = create_list_a(); +} else { + result = create_list_b(); +} +// Only one path executed, only one allocation ``` +This requires changing if/else from ternary expressions to proper if statements. + +### Phase 7: Handle Blocks + +**Goal:** Each block `{ ... }` creates a new scope. + +```rust +fn emit_block(&mut self, statements: &[Statement]) -> Result { + self.push_scope(); + self.writeln("{"); + self.indent += 1; + + let mut last_value = String::from("NULL"); + for stmt in statements { + last_value = self.emit_statement(stmt)?; + } + + // Cleanup before leaving block + self.emit_scope_cleanup(); + + self.indent -= 1; + self.writeln("}"); + self.pop_scope(); + + Ok(last_value) +} +``` + +--- + +## Testing Strategy + +### Unit Tests + +1. **Simple allocation and free:** +```lux +fn test(): Unit = { + let x = [1, 2, 3] // Should be freed at end +} +``` + +2. **Nested scopes:** +```lux +fn test(): Unit = { + let outer = [1] + { + let inner = [2] // Freed here + } + // outer still live +} // outer freed here +``` + +3. **Early return:** +```lux +fn test(b: Bool): List = { + let x = [1, 2, 3] + if b then return [] // x must be freed before return + x +} +``` + +4. **Conditionals:** +```lux +fn test(b: Bool): List = { + let x = if b then [1] else [2] // Only one allocated + x +} +``` + +### Memory Leak Detection + +Use valgrind (if available) or add debug tracking: + +```c +static int64_t lux_alloc_count = 0; +static int64_t lux_free_count = 0; + +static void* lux_rc_alloc(size_t size, int32_t tag) { + lux_alloc_count++; + // ... existing code ... +} + +static void lux_drop(void* ptr, int32_t tag) { + lux_free_count++; + // ... existing code ... +} + +// At program exit: +void lux_check_leaks() { + if (lux_alloc_count != lux_free_count) { + fprintf(stderr, "LEAK: %lld allocations, %lld frees\n", + lux_alloc_count, lux_free_count); + } +} +``` + +--- + ## Comparison with Perceus | Feature | Perceus (Koka) | Lux RC (Current) | |---------|----------------|------------------| | RC header | Yes | Yes ✅ | -| RC insertion | Compile-time | Partial | -| Last-use opt | Yes | TODO | -| Reuse (FBIP) | Yes | Future | +| Scope tracking | Yes | Yes ✅ | +| Auto decref | Yes | Yes ✅ | +| Memory tracking | No | Yes ✅ (debug) | +| Early return | Yes | Partial | +| Last-use opt | Yes | No | +| Reuse (FBIP) | Yes | No | | Drop fusion | Yes | No | -| Borrow inference | Yes | No | + +--- + +## Files to Modify + +| File | Changes | +|------|---------| +| `src/codegen/c_backend.rs` | Add scope tracking, emit decrefs | + +## Estimated Complexity + +- Scope tracking data structures: ~30 lines +- Type classification: ~40 lines +- Scope cleanup emission: ~30 lines +- Let binding registration: ~20 lines +- Early return handling: ~40 lines +- Block scope handling: ~30 lines +- Testing: ~100 lines + +**Total: ~300 lines of careful implementation** + +--- ## References diff --git a/src/codegen/c_backend.rs b/src/codegen/c_backend.rs index 2ebc4a8..91dfbda 100644 --- a/src/codegen/c_backend.rs +++ b/src/codegen/c_backend.rs @@ -63,6 +63,13 @@ struct ClosureInfo { body: Expr, } +/// Information about an RC-managed variable in scope +#[derive(Debug, Clone)] +struct RcVariable { + name: String, // Variable name in generated C code + c_type: String, // C type (for documentation/debugging) +} + impl std::fmt::Display for CGenError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "C codegen error: {}", self.message) @@ -99,6 +106,10 @@ pub struct CBackend { effectful_functions: HashSet, /// Whether we're currently inside an effectful function (has evidence available) has_evidence: bool, + /// Stack of scopes for RC management - each scope contains variables that need decref + rc_scopes: Vec>, + /// Whether to emit memory tracking code for debugging + debug_rc: bool, } impl CBackend { @@ -117,6 +128,8 @@ impl CBackend { variant_field_types: HashMap::new(), effectful_functions: HashSet::new(), has_evidence: false, + rc_scopes: Vec::new(), + debug_rc: true, // Enable memory tracking for now } } @@ -403,12 +416,24 @@ impl CBackend { self.writeln("// Forward declaration of polymorphic drop"); self.writeln("static void lux_drop(void* ptr, int32_t tag);"); self.writeln(""); + + // Memory tracking counters (must be before lux_rc_alloc which uses them) + if self.debug_rc { + self.writeln("// Memory tracking counters"); + self.writeln("static int64_t lux_rc_alloc_count = 0;"); + self.writeln("static int64_t lux_rc_free_count = 0;"); + self.writeln(""); + } + self.writeln("// Allocate RC-managed memory with initial refcount of 1"); self.writeln("static void* lux_rc_alloc(size_t size, int32_t tag) {"); self.writeln(" LuxRcHeader* hdr = (LuxRcHeader*)malloc(sizeof(LuxRcHeader) + size);"); self.writeln(" if (!hdr) return NULL;"); self.writeln(" hdr->rc = 1;"); self.writeln(" hdr->tag = tag;"); + if self.debug_rc { + self.writeln(" lux_rc_alloc_count++;"); + } self.writeln(" return hdr + 1; // Return pointer after header"); self.writeln("}"); self.writeln(""); @@ -432,6 +457,23 @@ impl CBackend { self.writeln(" return ptr ? LUX_RC_HEADER(ptr)->rc : 0;"); self.writeln("}"); self.writeln(""); + + // Memory leak check function (only if debug_rc is enabled) + if self.debug_rc { + self.writeln("// === Memory Tracking (Debug) ==="); + self.writeln("static void lux_rc_check_leaks(void) {"); + self.writeln(" if (lux_rc_alloc_count != lux_rc_free_count) {"); + self.writeln(" fprintf(stderr, \"[RC] LEAK DETECTED: %lld allocs, %lld frees, %lld leaked\\n\","); + self.writeln(" (long long)lux_rc_alloc_count, (long long)lux_rc_free_count,"); + self.writeln(" (long long)(lux_rc_alloc_count - lux_rc_free_count));"); + self.writeln(" } else {"); + self.writeln(" fprintf(stderr, \"[RC] No leaks: %lld allocs, %lld frees\\n\","); + self.writeln(" (long long)lux_rc_alloc_count, (long long)lux_rc_free_count);"); + self.writeln(" }"); + self.writeln("}"); + self.writeln(""); + } + self.writeln("// === String Operations ==="); self.writeln("// Dynamically created strings are RC-managed."); self.writeln("// Static string literals from source code are NOT RC-managed."); @@ -1080,6 +1122,9 @@ impl CBackend { self.writeln(" break;"); self.writeln(" }"); self.writeln(" // Free the object and its RC header"); + if self.debug_rc { + self.writeln(" lux_rc_free_count++;"); + } self.writeln(" free(LUX_RC_HEADER(ptr));"); self.writeln("}"); self.writeln(""); @@ -1273,14 +1318,34 @@ impl CBackend { self.has_evidence = true; } + // Push function scope for RC tracking + self.push_rc_scope(); + // Emit function body let result = self.emit_expr(&func.body)?; // Restore previous evidence state self.has_evidence = prev_has_evidence; - if ret_type != "void" { - self.writeln(&format!("return {};", result)); + // For non-void functions, we need to save result, decref locals, then return + if ret_type != "void" && ret_type != "LuxUnit" { + // Check if result is an RC type that we need to keep alive + let is_rc_result = self.is_rc_type(&ret_type); + if is_rc_result && !self.rc_scopes.last().map_or(true, |s| s.is_empty()) { + // Save result, incref to keep it alive through cleanup + self.writeln(&format!("{} _result = {};", ret_type, result)); + self.writeln("lux_incref(_result);"); + self.pop_rc_scope(); // Emit decrefs for all local RC vars + self.writeln("lux_decref(_result); // Balance the incref"); + self.writeln("return _result;"); + } else { + // No RC locals or non-RC result - simple cleanup + self.pop_rc_scope(); + self.writeln(&format!("return {};", result)); + } + } else { + // Void function - just cleanup + self.pop_rc_scope(); } self.indent -= 1; @@ -1536,6 +1601,9 @@ impl CBackend { } Expr::Block { statements, result, .. } => { + // Push a scope for this block's local variables + self.push_rc_scope(); + for stmt in statements { match stmt { Statement::Let { name, value, .. } => { @@ -1553,6 +1621,11 @@ impl CBackend { "LuxInt".to_string() }; self.writeln(&format!("{} {} = {};", typ, name.name, val)); + + // Register RC variable if it creates a new RC value + if self.expr_creates_rc_value(value) { + self.register_rc_var(&name.name, &typ); + } } Statement::Expr(e) => { // Emit expression - if it's a function call that returns void/unit, @@ -1574,7 +1647,15 @@ impl CBackend { } } } - self.emit_expr(result) + + // Emit the result expression + let result_val = self.emit_expr(result)?; + + // Pop scope and emit decrefs for block-local variables + // Note: We don't decref the result variable itself if it's being returned + self.pop_rc_scope(); + + Ok(result_val) } Expr::EffectOp { effect, operation, args, .. } => { @@ -2001,6 +2082,8 @@ impl CBackend { self.writeln(&format!("LuxBool {} = ((LuxBool(*)(void*, LuxInt)){}->fn_ptr)({}->env, lux_unbox_int({}));", keep_var, fn_var, fn_var, elem_var)); self.writeln(&format!("if ({}) {{", keep_var)); self.indent += 1; + // Incref the element since it's now shared between lists + self.writeln(&format!("lux_incref({});", elem_var)); self.writeln(&format!("{}->elements[{}++] = {};", result_var, count_var, elem_var)); self.indent -= 1; self.writeln("}"); @@ -2450,6 +2533,11 @@ impl CBackend { } } + // Check for memory leaks in debug mode + if self.debug_rc { + self.writeln("lux_rc_check_leaks();"); + } + self.writeln("return 0;"); self.indent -= 1; self.writeln("}"); @@ -2509,6 +2597,101 @@ impl CBackend { self.name_counter } + // === RC Scope Management === + + /// Push a new scope onto the RC scope stack + fn push_rc_scope(&mut self) { + self.rc_scopes.push(Vec::new()); + } + + /// Pop the current scope and emit decref calls for all variables + fn pop_rc_scope(&mut self) { + if let Some(scope) = self.rc_scopes.pop() { + // Decref in reverse order (LIFO - last allocated, first freed) + for var in scope.iter().rev() { + self.writeln(&format!("lux_decref({});", var.name)); + } + } + } + + /// Register an RC-managed variable in the current scope + fn register_rc_var(&mut self, name: &str, c_type: &str) { + if let Some(scope) = self.rc_scopes.last_mut() { + scope.push(RcVariable { + name: name.to_string(), + c_type: c_type.to_string(), + }); + } + } + + /// Emit decrefs for all variables in all scopes (for early return) + fn emit_all_scope_cleanup(&mut self) { + // Collect all decrefs first to avoid borrow issues + let decrefs: Vec = self.rc_scopes.iter().rev() + .flat_map(|scope| scope.iter().rev()) + .map(|var| format!("lux_decref({});", var.name)) + .collect(); + + for decref in decrefs { + self.writeln(&decref); + } + } + + /// Check if a C type needs RC management + fn is_rc_type(&self, c_type: &str) -> bool { + // Pointer types that are RC-managed + matches!(c_type, "LuxList*" | "LuxClosure*" | "void*") + || c_type.ends_with("*") && c_type != "LuxString" + // Note: LuxString (char*) needs special handling - only dynamic strings are RC + } + + /// Check if an expression creates a new RC-managed value that needs tracking + fn expr_creates_rc_value(&self, expr: &Expr) -> bool { + match expr { + // List literals create new RC lists + Expr::List { .. } => true, + + // Lambdas create closures (though we don't RC closures yet) + Expr::Lambda { .. } => false, // TODO: enable when closures are RC + + // Calls to List.* that return lists + Expr::Call { func, .. } => { + if let Expr::Field { object, field, .. } = func.as_ref() { + if let Expr::Var(module) = object.as_ref() { + if module.name == "List" { + // These List operations return new lists + return matches!(field.name.as_str(), + "map" | "filter" | "concat" | "reverse" | + "take" | "drop" | "range" + ); + } + } + } + false + } + + // Effect operations that return lists + Expr::EffectOp { effect, operation, .. } => { + if effect.name == "List" { + matches!(operation.name.as_str(), + "map" | "filter" | "concat" | "reverse" | + "take" | "drop" | "range" + ) + } else { + false + } + } + + // Variable references don't create new values - they borrow + Expr::Var(_) => false, + + // Literals don't need RC (primitives or static strings) + Expr::Literal(_) => false, + + _ => false, + } + } + /// Check if an expression is a call to a function that returns a closure fn is_closure_returning_call(&self, expr: &Expr) -> bool { match expr {