wip: compile to c

This commit is contained in:
2026-02-14 00:12:28 -05:00
parent d9be70b021
commit 9a42a7f540
3 changed files with 979 additions and 0 deletions

624
src/codegen/c_backend.rs Normal file
View File

@@ -0,0 +1,624 @@
//! C code generation backend for Lux
//!
//! Compiles Lux programs to C code that can be compiled with GCC/Clang.
//! Inspired by Koka's approach: effects compile to evidence passing,
//! no garbage collector needed with Perceus-style reference counting.
use crate::ast::*;
use std::collections::{HashMap, HashSet};
use std::fmt::Write;
/// C code generation errors
#[derive(Debug, Clone)]
pub struct CGenError {
pub message: String,
pub span: Option<Span>,
}
impl std::fmt::Display for CGenError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "C codegen error: {}", self.message)
}
}
impl std::error::Error for CGenError {}
/// The C backend code generator
pub struct CBackend {
/// Generated C code
output: String,
/// Current indentation level
indent: usize,
/// Known function names for forward declarations
functions: HashSet<String>,
/// Type definitions we've emitted
types_emitted: HashSet<String>,
/// Counter for generating unique names
name_counter: usize,
/// Effects used in the program (for evidence struct)
effects_used: HashSet<String>,
}
impl CBackend {
pub fn new() -> Self {
Self {
output: String::new(),
indent: 0,
functions: HashSet::new(),
types_emitted: HashSet::new(),
name_counter: 0,
effects_used: HashSet::new(),
}
}
/// Generate C code from a Lux program
pub fn generate(&mut self, program: &Program) -> Result<String, CGenError> {
self.output.clear();
self.emit_prelude();
// First pass: collect all function names and types
for decl in &program.declarations {
match decl {
Declaration::Function(f) => {
self.functions.insert(f.name.name.clone());
}
Declaration::Type(t) => {
self.collect_type(t)?;
}
_ => {}
}
}
// Emit type definitions
self.emit_type_definitions(program)?;
// Emit forward declarations
self.emit_forward_declarations(program)?;
// Emit function definitions
for decl in &program.declarations {
match decl {
Declaration::Function(f) => {
self.emit_function(f)?;
}
Declaration::Let { name, value, .. } => {
self.emit_global_let(name, value)?;
}
_ => {}
}
}
// Emit main wrapper if there's a main function or top-level expressions
self.emit_main_wrapper(program)?;
Ok(self.output.clone())
}
fn emit_prelude(&mut self) {
self.writeln("// Generated by Lux compiler");
self.writeln("// Do not edit - regenerate from .lux source");
self.writeln("");
self.writeln("#include <stdint.h>");
self.writeln("#include <stdbool.h>");
self.writeln("#include <stdio.h>");
self.writeln("#include <stdlib.h>");
self.writeln("#include <string.h>");
self.writeln("");
self.writeln("// === Lux Runtime Types ===");
self.writeln("");
self.writeln("typedef int64_t LuxInt;");
self.writeln("typedef double LuxFloat;");
self.writeln("typedef bool LuxBool;");
self.writeln("typedef char* LuxString;");
self.writeln("typedef void* LuxUnit;");
self.writeln("");
self.writeln("// === String Operations ===");
self.writeln("");
self.writeln("static LuxString lux_string_concat(LuxString a, LuxString b) {");
self.writeln(" size_t len_a = strlen(a);");
self.writeln(" size_t len_b = strlen(b);");
self.writeln(" LuxString result = malloc(len_a + len_b + 1);");
self.writeln(" memcpy(result, a, len_a);");
self.writeln(" memcpy(result + len_a, b, len_b + 1);");
self.writeln(" return result;");
self.writeln("}");
self.writeln("");
self.writeln("static LuxString lux_int_to_string(LuxInt n) {");
self.writeln(" char buffer[32];");
self.writeln(" snprintf(buffer, sizeof(buffer), \"%lld\", (long long)n);");
self.writeln(" return strdup(buffer);");
self.writeln("}");
self.writeln("");
self.writeln("static LuxBool lux_string_eq(LuxString a, LuxString b) {");
self.writeln(" return strcmp(a, b) == 0;");
self.writeln("}");
self.writeln("");
self.writeln("static LuxBool lux_string_contains(LuxString haystack, LuxString needle) {");
self.writeln(" return strstr(haystack, needle) != NULL;");
self.writeln("}");
self.writeln("");
self.writeln("// === Console Effect (built-in) ===");
self.writeln("");
self.writeln("static void lux_console_print(LuxString msg) {");
self.writeln(" printf(\"%s\\n\", msg);");
self.writeln("}");
self.writeln("");
self.writeln("// === Forward Declarations ===");
self.writeln("");
}
fn collect_type(&mut self, _type_decl: &TypeDecl) -> Result<(), CGenError> {
// Collect type info for later emission
Ok(())
}
fn emit_type_definitions(&mut self, program: &Program) -> Result<(), CGenError> {
for decl in &program.declarations {
if let Declaration::Type(t) = decl {
self.emit_type_def(t)?;
}
}
Ok(())
}
fn emit_type_def(&mut self, type_decl: &TypeDecl) -> Result<(), CGenError> {
let name = &type_decl.name.name;
if self.types_emitted.contains(name) {
return Ok(());
}
self.types_emitted.insert(name.clone());
match &type_decl.definition {
TypeDefinition::Record(fields) => {
self.writeln(&format!("typedef struct {} {{", name));
self.indent += 1;
for field in fields {
let c_type = self.type_to_c(&field.field_type)?;
self.writeln(&format!("{} {};", c_type, field.name.name));
}
self.indent -= 1;
self.writeln(&format!("}} {};", name));
self.writeln("");
}
TypeDefinition::Adt(variants) => {
// Emit tag enum
self.writeln(&format!("typedef enum {}_Tag {{", name));
self.indent += 1;
for (i, variant) in variants.iter().enumerate() {
let comma = if i < variants.len() - 1 { "," } else { "" };
self.writeln(&format!("{}_TAG_{}{}", name, variant.name.name.to_uppercase(), comma));
}
self.indent -= 1;
self.writeln(&format!("}} {}_Tag;", name));
self.writeln("");
// Emit variant structs
for variant in variants {
if !variant.fields.is_empty() {
self.writeln(&format!("typedef struct {}_{}_Data {{", name, variant.name.name));
self.indent += 1;
for (i, field) in variant.fields.iter().enumerate() {
let c_type = self.type_to_c(field)?;
self.writeln(&format!("{} field{};", c_type, i));
}
self.indent -= 1;
self.writeln(&format!("}} {}_{}_Data;", name, variant.name.name));
self.writeln("");
}
}
// Emit main union struct
self.writeln(&format!("typedef struct {} {{", name));
self.indent += 1;
self.writeln(&format!("{}_Tag tag;", name));
self.writeln("union {");
self.indent += 1;
for variant in variants {
if !variant.fields.is_empty() {
self.writeln(&format!("{}_{}_Data {};", name, variant.name.name, variant.name.name.to_lowercase()));
}
}
self.indent -= 1;
self.writeln("} data;");
self.indent -= 1;
self.writeln(&format!("}} {};", name));
self.writeln("");
}
TypeDefinition::Alias(_) => {
// Type aliases are handled during type resolution
}
}
Ok(())
}
fn emit_forward_declarations(&mut self, program: &Program) -> Result<(), CGenError> {
for decl in &program.declarations {
if let Declaration::Function(f) = decl {
let ret_type = self.type_expr_to_c(&f.return_type)?;
let params = self.emit_params(&f.params)?;
self.writeln(&format!("{} {}({});", ret_type, f.name.name, params));
}
}
self.writeln("");
Ok(())
}
fn emit_function(&mut self, func: &FunctionDecl) -> Result<(), CGenError> {
let ret_type = self.type_expr_to_c(&func.return_type)?;
let params = self.emit_params(&func.params)?;
self.writeln(&format!("{} {}({}) {{", ret_type, func.name.name, params));
self.indent += 1;
// Emit function body
let result = self.emit_expr(&func.body)?;
if ret_type != "void" {
self.writeln(&format!("return {};", result));
}
self.indent -= 1;
self.writeln("}");
self.writeln("");
Ok(())
}
fn emit_params(&self, params: &[Parameter]) -> Result<String, CGenError> {
if params.is_empty() {
return Ok("void".to_string());
}
let param_strs: Result<Vec<_>, _> = params.iter().map(|p| {
let c_type = self.type_expr_to_c(&p.param_type)?;
Ok(format!("{} {}", c_type, p.name.name))
}).collect();
Ok(param_strs?.join(", "))
}
fn emit_expr(&mut self, expr: &Expr) -> Result<String, CGenError> {
match expr {
Expr::Literal(lit) => self.emit_literal(lit),
Expr::Var(ident) => Ok(ident.name.clone()),
Expr::BinaryOp { op, left, right, .. } => {
let l = self.emit_expr(left)?;
let r = self.emit_expr(right)?;
let op_str = match op {
BinaryOp::Add => "+",
BinaryOp::Sub => "-",
BinaryOp::Mul => "*",
BinaryOp::Div => "/",
BinaryOp::Mod => "%",
BinaryOp::Eq => "==",
BinaryOp::Ne => "!=",
BinaryOp::Lt => "<",
BinaryOp::Le => "<=",
BinaryOp::Gt => ">",
BinaryOp::Ge => ">=",
BinaryOp::And => "&&",
BinaryOp::Or => "||",
BinaryOp::Concat => {
return Ok(format!("lux_string_concat({}, {})", l, r));
}
_ => return Err(CGenError {
message: format!("Unsupported binary operator: {:?}", op),
span: None,
}),
};
Ok(format!("({} {} {})", l, op_str, r))
}
Expr::UnaryOp { op, operand, .. } => {
let val = self.emit_expr(operand)?;
let op_str = match op {
UnaryOp::Neg => "-",
UnaryOp::Not => "!",
};
Ok(format!("({}{})", op_str, val))
}
Expr::If { condition, then_branch, else_branch, .. } => {
let cond = self.emit_expr(condition)?;
let then_val = self.emit_expr(then_branch)?;
let else_val = self.emit_expr(else_branch)?;
Ok(format!("({} ? {} : {})", cond, then_val, else_val))
}
Expr::Let { name, value, body, .. } => {
let val = self.emit_expr(value)?;
let var_name = format!("{}_{}", name.name, self.fresh_name());
// For simple cases, we can use a compound literal or statement expression
// For now, emit as a block
self.writeln(&format!("LuxInt {} = {};", var_name, val));
// Substitute the name in the body
// For now, assume the variable is directly usable
let body_result = self.emit_expr_with_substitution(body, &name.name, &var_name)?;
Ok(body_result)
}
Expr::Call { func, args, .. } => {
let func_name = match func.as_ref() {
Expr::Var(ident) => ident.name.clone(),
_ => return Err(CGenError {
message: "Only direct function calls supported".to_string(),
span: None,
}),
};
let arg_strs: Result<Vec<_>, _> = args.iter().map(|a| self.emit_expr(a)).collect();
let args_str = arg_strs?.join(", ");
Ok(format!("{}({})", func_name, args_str))
}
Expr::Block { statements, result, .. } => {
for stmt in statements {
match stmt {
Statement::Let { name, value, .. } => {
let val = self.emit_expr(value)?;
self.writeln(&format!("LuxInt {} = {};", name.name, val));
}
Statement::Expr(e) => {
let _ = self.emit_expr(e)?;
}
}
}
self.emit_expr(result)
}
Expr::EffectOp { effect, operation, args, .. } => {
self.effects_used.insert(effect.name.clone());
// Built-in effects
if effect.name == "Console" && operation.name == "print" {
let arg = self.emit_expr(&args[0])?;
self.writeln(&format!("lux_console_print({});", arg));
return Ok("NULL".to_string());
}
// For other effects, emit evidence-passing call
let arg_strs: Result<Vec<_>, _> = args.iter().map(|a| self.emit_expr(a)).collect();
Ok(format!("ev_{}__{}({})",
effect.name.to_lowercase(),
operation.name,
arg_strs?.join(", ")))
}
Expr::Record { fields, .. } => {
let field_strs: Result<Vec<_>, _> = fields.iter().map(|(name, val)| {
let v = self.emit_expr(val)?;
Ok(format!(".{} = {}", name.name, v))
}).collect();
Ok(format!("{{ {} }}", field_strs?.join(", ")))
}
Expr::Field { object, field, .. } => {
let obj = self.emit_expr(object)?;
Ok(format!("{}.{}", obj, field.name))
}
Expr::Match { expr, arms, .. } => {
self.emit_match(expr, arms)
}
Expr::Constructor { name, args, .. } => {
// ADT constructor - need to determine the type
// For now, assume it's a simple constructor call
if args.is_empty() {
Ok(format!("/* {} */ 0", name.name))
} else {
let arg_strs: Result<Vec<_>, _> = args.iter().map(|a| self.emit_expr(a)).collect();
Ok(format!("/* {}({}) */", name.name, arg_strs?.join(", ")))
}
}
_ => Err(CGenError {
message: format!("Unsupported expression type in C backend"),
span: None,
}),
}
}
fn emit_expr_with_substitution(&mut self, expr: &Expr, from: &str, to: &str) -> Result<String, CGenError> {
// Simple substitution - in a real implementation, this would be more sophisticated
match expr {
Expr::Var(ident) if ident.name == from => Ok(to.to_string()),
_ => self.emit_expr(expr),
}
}
fn emit_match(&mut self, expr: &Expr, arms: &[MatchArm]) -> Result<String, CGenError> {
let scrutinee = self.emit_expr(expr)?;
let result_var = format!("match_result_{}", self.fresh_name());
self.writeln(&format!("LuxInt {};", result_var));
for (i, arm) in arms.iter().enumerate() {
let condition = self.pattern_to_condition(&arm.pattern, &scrutinee)?;
if i == 0 {
self.writeln(&format!("if ({}) {{", condition));
} else {
self.writeln(&format!("}} else if ({}) {{", condition));
}
self.indent += 1;
let body = self.emit_expr(&arm.body)?;
self.writeln(&format!("{} = {};", result_var, body));
self.indent -= 1;
}
self.writeln("}");
Ok(result_var)
}
fn pattern_to_condition(&self, pattern: &Pattern, scrutinee: &str) -> Result<String, CGenError> {
match pattern {
Pattern::Wildcard(_) => Ok("1".to_string()),
Pattern::Var(ident, _) => Ok(format!("(1) /* bind {} = {} */", ident.name, scrutinee)),
Pattern::Literal(lit, _) => {
let lit_val = self.emit_literal_value(lit)?;
Ok(format!("{} == {}", scrutinee, lit_val))
}
Pattern::Constructor { name, .. } => {
Ok(format!("{}.tag == TAG_{}", scrutinee, name.name.to_uppercase()))
}
_ => Ok("1".to_string()),
}
}
fn emit_literal(&self, lit: &Literal) -> Result<String, CGenError> {
self.emit_literal_value(&lit.kind)
}
fn emit_literal_value(&self, kind: &LiteralKind) -> Result<String, CGenError> {
match kind {
LiteralKind::Int(n) => Ok(format!("{}", n)),
LiteralKind::Float(f) => Ok(format!("{}", f)),
LiteralKind::Bool(b) => Ok(if *b { "true" } else { "false" }.to_string()),
LiteralKind::String(s) => Ok(format!("\"{}\"", s.replace("\"", "\\\""))),
LiteralKind::Char(c) => Ok(format!("'{}'", c)),
LiteralKind::Unit => Ok("NULL".to_string()),
}
}
fn emit_global_let(&mut self, name: &Ident, value: &Expr) -> Result<(), CGenError> {
let val = self.emit_expr(value)?;
self.writeln(&format!("static LuxInt {} = {};", name.name, val));
self.writeln("");
Ok(())
}
fn emit_main_wrapper(&mut self, program: &Program) -> Result<(), CGenError> {
// Check if there's a main function
let has_main = program.declarations.iter().any(|d| {
matches!(d, Declaration::Function(f) if f.name.name == "main")
});
// Check for top-level run expressions
let has_run = program.declarations.iter().any(|d| {
matches!(d, Declaration::Let { value, .. } if matches!(value.as_ref(), Expr::Run { .. }))
});
if has_main || has_run {
self.writeln("int main(int argc, char** argv) {");
self.indent += 1;
// Execute top-level let bindings with run expressions
for decl in &program.declarations {
if let Declaration::Let { name, value, .. } = decl {
if matches!(value.as_ref(), Expr::Run { .. }) {
if let Expr::Run { expr, .. } = value.as_ref() {
if let Expr::Call { func, .. } = expr.as_ref() {
if let Expr::Var(fn_name) = func.as_ref() {
self.writeln(&format!("{}();", fn_name.name));
}
}
}
}
}
}
self.writeln("return 0;");
self.indent -= 1;
self.writeln("}");
}
Ok(())
}
fn type_to_c(&self, type_expr: &TypeExpr) -> Result<String, CGenError> {
self.type_expr_to_c(type_expr)
}
fn type_expr_to_c(&self, type_expr: &TypeExpr) -> Result<String, CGenError> {
match type_expr {
TypeExpr::Named(ident) => {
match ident.name.as_str() {
"Int" => Ok("LuxInt".to_string()),
"Float" => Ok("LuxFloat".to_string()),
"Bool" => Ok("LuxBool".to_string()),
"String" => Ok("LuxString".to_string()),
"Unit" => Ok("void".to_string()),
other => Ok(other.to_string()),
}
}
TypeExpr::Generic { name, .. } => {
// For now, use void* for generic types
match name.name.as_str() {
"List" => Ok("void*".to_string()),
"Option" => Ok("void*".to_string()),
_ => Ok("void*".to_string()),
}
}
TypeExpr::Function { .. } => Ok("void*".to_string()),
TypeExpr::Tuple(_) => Ok("void*".to_string()),
TypeExpr::Record(_) => Ok("void*".to_string()),
_ => Ok("void*".to_string()),
}
}
fn fresh_name(&mut self) -> usize {
self.name_counter += 1;
self.name_counter
}
fn writeln(&mut self, line: &str) {
let indent = " ".repeat(self.indent);
writeln!(self.output, "{}{}", indent, line).unwrap();
}
}
impl Default for CBackend {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::Parser;
fn generate(source: &str) -> Result<String, CGenError> {
let program = Parser::parse_source(source).expect("Parse error");
let mut backend = CBackend::new();
backend.generate(&program)
}
#[test]
fn test_simple_function() {
let source = r#"
fn add(a: Int, b: Int): Int = a + b
"#;
let c_code = generate(source).unwrap();
assert!(c_code.contains("LuxInt add(LuxInt a, LuxInt b)"));
assert!(c_code.contains("return (a + b)"));
}
#[test]
fn test_factorial() {
let source = r#"
fn factorial(n: Int): Int =
if n <= 1 then 1 else n * factorial(n - 1)
"#;
let c_code = generate(source).unwrap();
assert!(c_code.contains("LuxInt factorial(LuxInt n)"));
assert!(c_code.contains("factorial((n - 1))"));
}
#[test]
fn test_console_effect() {
let source = r#"
fn greet(): Unit with {Console} = Console.print("Hello")
"#;
let c_code = generate(source).unwrap();
assert!(c_code.contains("lux_console_print"));
}
}