From 7e110a95600a8189b3bdb299463520f84ce88a9d Mon Sep 17 00:00:00 2001 From: Logan Date: Fri, 25 Oct 2024 01:14:44 -0500 Subject: [PATCH] finished variable and function hoisting --- demo.hal | 6 +- src/frontend.rs | 20 +--- src/ir.rs | 236 ++++++++++++++++++++++++++++++++++++--- src/main.rs | 6 +- src/parse/expression.rs | 13 ++- src/parse/statement.rs | 14 +++ src/semantic/analyzer.rs | 85 ++++++++++---- src/semantic/mod.rs | 43 ++++--- src/semantic/types.rs | 37 +++++- 9 files changed, 377 insertions(+), 83 deletions(-) diff --git a/demo.hal b/demo.hal index d51f4dd..62aaf05 100644 --- a/demo.hal +++ b/demo.hal @@ -1,3 +1,3 @@ -bar :: (a: integer) { - local := a; -} +10 + 2 + 3 * 4; + + diff --git a/src/frontend.rs b/src/frontend.rs index 7e3bd0c..143d688 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -1,17 +1,17 @@ use std::path::Path; use crate::{ + Parser, Tokenizer, err::*, + ir::{Compiler, IR}, semantic::{self}, - Parser, Statement, Tokenizer, }; #[derive(Debug, Clone)] pub struct Module { file_name: String, source: String, - pub program: Vec, - errors: Vec, + pub program: Vec, } impl Module { @@ -31,20 +31,12 @@ impl Module { let tokens = Tokenizer::new(source.chars()).filter(|t| t.0.is_meaningful()); let statements = Parser::new(tokens); let program = semantic::Analyzer::typecheck(statements.collect()); - let mut errors = vec![]; + let mut compiler = Compiler::new(); + compiler.compile(program); Self { file_name, source: source.into(), - program, - errors, + program: compiler.ir, } } - - pub fn errors(&self) -> &[Diagnostic] { - &self.errors - } - - pub fn ok(&self) -> bool { - self.errors.len() == 0 - } } diff --git a/src/ir.rs b/src/ir.rs index aed22fe..3313fa8 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -1,5 +1,6 @@ use crate::{ - BinaryOp, Expression, ExpressionKind, Immediate, UnaryOp, + BinaryOp, Expression, ExpressionKind, Immediate, Statement, StatementKind, + UnaryOp, semantic::{Type, VarKind, uid}, }; @@ -15,43 +16,246 @@ pub enum IR { AssignGlobal { uid: uid }, GetGlobal { uid: uid }, StartFunc { uid: uid }, - NewParam { uid: uid, type_: Type }, EndFunc, + ReturnType { type_: Type }, + NewParam { uid: uid, type_: Type }, + Return, + Call { uid: uid }, + Drop, +} + +impl std::fmt::Display for IR { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use IR::*; + match self { + BinOp { op, type_ } => write!(f, "{op} ({type_})"), + UnOp { op, type_ } => write!(f, "{op}, {type_}"), + Imm(immediate) => write!(f, "push {immediate}"), + NewLocal { uid, type_ } => write!(f, "local ${uid} = {type_}"), + AssignLocal { uid } => write!(f, "pop local ${uid}"), + GetLocal { uid } => write!(f, "push local ${uid}"), + NewGlobal { uid, type_ } => write!(f, "global ${uid} = {type_}"), + AssignGlobal { uid } => write!(f, "pop global ${uid}"), + GetGlobal { uid } => write!(f, "push global ${uid}"), + StartFunc { uid } => write!(f, ""), + EndFunc => write!(f, ""), + NewParam { uid, type_ } => write!(f, "param ${uid} = {type_}"), + Return => write!(f, "return"), + Call { uid } => write!(f, "call ${uid}"), + Drop => write!(f, "pop"), + ReturnType { type_ } => write!(f, "result {type_}"), + } + } } pub struct Compiler { - ir: Vec, + pub ir: Vec, } impl Compiler { + pub fn new() -> Self { + Self { ir: vec![] } + } + + pub fn compile(&mut self, block: Vec) { + for s in block { + self.statement(s); + } + self.ir = self.hoist_functions(); + } + + fn statement(&mut self, statement: Statement) { + use StatementKind::*; + match statement.kind { + Declaration { + type_actual, + value, + varkind, + .. + } => { + match varkind { + VarKind::Global(uid) => self.ir.push(IR::NewGlobal { + uid, + type_: type_actual, + }), + VarKind::Local(uid) => self.ir.push(IR::NewLocal { + uid, + type_: type_actual, + }), + _ => {}, + } + self.expression(value); + match varkind { + VarKind::Global(uid) => self.ir.push(IR::AssignGlobal { uid }), + VarKind::Local(uid) => self.ir.push(IR::AssignLocal { uid }), + _ => {}, + }; + }, + Assignment { + name, + value, + varkind, + } => { + self.expression(value); + match varkind { + VarKind::Global(uid) => self.ir.push(IR::AssignGlobal { uid }), + VarKind::Local(uid) => self.ir.push(IR::AssignLocal { uid }), + _ => {}, + } + }, + If { + predicate, + block, + else_, + } => todo!(), + While { predicate, block } => todo!(), + Print(expression) => todo!(), + Expression(expression) => { + if expression.type_ == Type::Nothing { + self.expression(expression); + } else { + self.expression(expression); + self.ir.push(IR::Drop); + } + }, + Block(statements) => { + for s in statements { + self.statement(s); + } + }, + Error(diagnostic) => { + panic!("{}", diagnostic); + }, + Return(expression) => { + if let Some(e) = expression { + self.expression(e); + } + self.ir.push(IR::Return); + }, + } + } + fn expression(&mut self, expression: Expression) { use ExpressionKind::*; match expression.kind { Immediate(immediate) => { self.ir.push(IR::Imm(immediate)); }, - Identifier(name, var_kind) => match var_kind { + Identifier(_, var_kind) => match var_kind { VarKind::Global(uid) => self.ir.push(IR::GetGlobal { uid }), - VarKind::Local(uid) | VarKind::Param(uid) => { - self.ir.push(IR::GetLocal { uid }) - }, - VarKind::Function(_) => todo!(), - VarKind::Undefined => todo!(), + VarKind::Local(uid) => self.ir.push(IR::GetLocal { uid }), + VarKind::Function(_) => {}, + VarKind::Undefined => panic!("Undefined var not caught by typecheck"), + }, + Binary { + op, + mut left, + mut right, + } => { + left.type_ = Type::coerce(&expression.type_, &left.type_).unwrap(); + right.type_ = Type::coerce(&expression.type_, &right.type_).unwrap(); + assert!(&left.type_ == &right.type_); + self.expression(*left.clone()); + self.expression(*right); + self.ir.push(IR::BinOp { + op, + type_: expression.type_, + }); + }, + Unary { op, mut child } => { + child.type_ = Type::coerce(&expression.type_, &child.type_).unwrap(); + self.expression(*child); + self.ir.push(IR::UnOp { + op, + type_: expression.type_, + }) + }, + Parenthesis(mut e) => { + e.type_ = Type::coerce(&expression.type_, &e.type_).unwrap(); + self.expression(*e); }, - Binary { op, left, right } => todo!(), - Unary { op, child } => todo!(), - Parenthesis(expression) => todo!(), FunctionDef { params, - returns_str, returns_actual, body, id, - } => todo!(), - FunctionCall { callee, args } => todo!(), - StructDef(vec) => todo!(), + .. + } => { + self.ir.push(IR::StartFunc { uid: id }); + for (i, p) in params.iter().enumerate() { + self.ir.push(IR::NewParam { + uid: i as uid, + type_: p.type_actual.clone(), + }) + } + self.ir.push(IR::ReturnType { + type_: returns_actual, + }); + for s in body { + self.statement(s); + } + self.ir.push(IR::EndFunc); + }, + FunctionCall { callee, args } => { + let Type::FunctionDef { id, .. } = callee.type_ else { + panic!() + }; + for arg in args { + self.expression(arg); + } + self.ir.push(IR::Call { uid: id }); + }, + StructDef(_) => {}, StructLiteral { name, args } => todo!(), Field { namespace, field } => todo!(), } } + + fn hoist_functions(&self) -> Vec { + let mut functions = vec![(vec![], vec![])]; + let mut result = vec![]; + for index in 0..self.ir.len() { + let ir = self.ir.get(index).unwrap(); + match ir { + IR::StartFunc { .. } => { + functions.push((vec![], vec![])); + }, + IR::EndFunc => { + let (inits, instr) = functions.pop().unwrap(); + for ir in inits { + result.push(ir); + } + for ir in instr { + result.push(ir); + } + result.push(IR::EndFunc); + continue; + }, + _ => {}, + } + // Push instruction to correct stack + let (inits, instr) = functions.last_mut().unwrap(); + match ir { + IR::NewLocal { .. } + | IR::NewGlobal { .. } + | IR::NewParam { .. } + | IR::StartFunc { .. } => { + inits.push(ir.clone()); + }, + _ => instr.push(ir.clone()), + } + } + // Initialize globals + let (inits, instr) = functions.pop().unwrap(); + for ir in inits { + result.push(ir); + } + // The main function (index 0) + result.push(IR::StartFunc { uid: 0 }); + for ir in instr { + result.push(ir); + } + result.push(IR::EndFunc); + result + } } diff --git a/src/main.rs b/src/main.rs index ad802b0..22b5eb6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -86,12 +86,10 @@ fn prints(st: &Statement) { } fn main() -> Result<()> { - test_expression("asdf.asdf()"); - /* + //test_expression("a.b.c()"); let module = frontend::Module::from_file("./demo.hal")?; for s in &module.program { - prints(s); + println!("{s}"); } - */ Ok(()) } diff --git a/src/parse/expression.rs b/src/parse/expression.rs index a5b07d4..3d2e9c2 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -23,6 +23,17 @@ pub enum Immediate { Boolean(bool), } +impl std::fmt::Display for Immediate { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Immediate::Integer(i) => write!(f, "{i}"), + Immediate::Real(r) => write!(f, "{r}"), + Immediate::String(s) => write!(f, "{s}"), + Immediate::Boolean(b) => write!(f, "{b}"), + } + } +} + #[derive(Clone)] pub enum ExpressionKind { Immediate(Immediate), @@ -99,7 +110,7 @@ impl std::fmt::Debug for ExpressionKind { write!(f, "({token:?} {child:?})") }, e::Identifier(i, _) => write!(f, "{i}"), - e::FunctionCall { callee, args } => { + e::FunctionCall { callee, args, .. } => { write!(f, "({callee:?} call {args:?})") }, e::Field { namespace, field } => { diff --git a/src/parse/statement.rs b/src/parse/statement.rs index 7d1d060..8e76ae4 100644 --- a/src/parse/statement.rs +++ b/src/parse/statement.rs @@ -29,6 +29,7 @@ pub enum StatementKind { Print(Expression), Expression(Expression), Block(Vec), + Return(Option), Error(Diagnostic), } @@ -161,6 +162,19 @@ impl> Parser { span, }); }, + // Return + (Token(t::Return, span2), _) => { + span = span + span2; + self.skip(1); + let expr = self.expression(0).ok(); + if let Some(expr) = &expr { + span = span + expr.span; + } + Statement { + span, + kind: s::Return(expr), + } + }, // Expression (Token(_, span2), _) => { span = span + span2; diff --git a/src/semantic/analyzer.rs b/src/semantic/analyzer.rs index 288c9b1..de2ecce 100644 --- a/src/semantic/analyzer.rs +++ b/src/semantic/analyzer.rs @@ -1,3 +1,5 @@ +use std::any::Any; + use crate::{ BinaryOp, Expression, ExpressionKind, Immediate, Parameter, Statement, StatementKind, UnaryOp, @@ -52,7 +54,12 @@ impl Analyzer { } else { Type::Ambiguous }; - let mut value = self.expression(value.into())?; + let type_hint = if let Type::Ambiguous = type_lhs { + None + } else { + Some(type_lhs.clone()) + }; + let mut value = self.expression(value.into(), type_hint)?; let type_actual = Type::coerce(&type_lhs, &value.type_) .reason(format!( "Expected type '{:?}', found type '{:?}'", @@ -82,7 +89,8 @@ impl Analyzer { .reason(format!("Cannot assign to immutable '{}'", name)) .span(&stmt.span); } - let mut value = *self.expression(value.into())?; + let mut value = + *self.expression(value.into(), Some(symbol.type_.clone()))?; let type_actual = Type::coerce(&symbol.type_, &value.type_).span(&stmt.span)?; value.type_ = type_actual; @@ -98,7 +106,8 @@ impl Analyzer { else_, } => { self.table.start_block(); - let predicate = *self.expression(predicate.into())?; + let predicate = + *self.expression(predicate.into(), Some(Type::Prim(p::boolean)))?; Type::coerce(&Type::Prim(p::boolean), &predicate.type_) .span(&predicate.span)?; let block = self.block(block); @@ -116,7 +125,8 @@ impl Analyzer { }, s::While { predicate, block } => { self.table.start_block(); - let predicate = *self.expression(predicate.into())?; + let predicate = + *self.expression(predicate.into(), Some(Type::Prim(p::boolean)))?; Type::coerce(&Type::Prim(p::boolean), &predicate.type_) .span(&predicate.span)?; let block = self.block(block); @@ -124,10 +134,13 @@ impl Analyzer { self.table.end_block(); }, s::Print(e) => { - stmt.kind = s::Print(*self.expression(e.into())?); + stmt.kind = s::Print(*self.expression(e.into(), None)?); }, s::Expression(e) => { - stmt.kind = s::Expression(*self.expression(e.into())?); + let mut expr = *self.expression(e.into(), None)?; + expr.type_ = + Type::coerce(&Type::Ambiguous, &expr.type_).span(&expr.span)?; + stmt.kind = s::Expression(expr); }, s::Block(block) => { self.table.start_block(); @@ -136,6 +149,20 @@ impl Analyzer { self.table.end_block(); }, s::Error(e) => return Err(e), + s::Return(mut expression) => { + let return_type = self.table.get_return_type().span(&stmt.span)?; + let type_ = match expression { + Some(e) => { + let e = self.expression(e.into(), Some(return_type.clone()))?; + let type_ = e.type_.clone(); + expression = Some(*e); + type_ + }, + None => Type::Nothing, + }; + Type::coerce(&return_type, &type_).span(&stmt.span)?; + stmt.kind = s::Return(expression); + }, } Ok(stmt) } @@ -143,7 +170,9 @@ impl Analyzer { fn expression( &mut self, mut expr: Box, + type_hint: Option, ) -> Result> { + // TODO implement type hinting use ExpressionKind as e; use Immediate as i; use Primitive as p; @@ -160,20 +189,20 @@ impl Analyzer { self.table.find_symbol(i)?.type_ }, e::Binary { op, left, right } => { - let left = self.expression(left)?; - let right = self.expression(right)?; + let left = self.expression(left, type_hint.clone())?; + let right = self.expression(right, type_hint.clone())?; let type_ = Type::binary_op(&left.type_, op, &right.type_)?; expr.kind = e::Binary { left, right, op }; type_ }, e::Unary { op, child } => { - let child = self.expression(child)?; + let child = self.expression(child, type_hint.clone())?; let type_ = Type::unary_op(op, &child.type_)?; expr.kind = e::Unary { child, op }; type_ }, e::Parenthesis(inner) => { - let inner = self.expression(inner)?; + let inner = self.expression(inner, type_hint.clone())?; let type_ = inner.type_.clone(); expr.kind = e::Parenthesis(inner); type_ @@ -183,19 +212,19 @@ impl Analyzer { returns_str, mut returns_actual, body, - id, + id: _, } => { - self.table.start_func(); + returns_actual = match &returns_str { + Some(s) => self.table.get_type(s).span(&expr.span)?, + None => Type::Nothing, + }; + let id = self.table.start_func(returns_actual.clone()); for p in &mut params { p.type_actual = self.table.get_type(&p.type_str).span(&expr.span)?; self .table .define_param(p.name.clone(), p.type_actual.clone())?; } - returns_actual = match &returns_str { - Some(s) => self.table.get_type(s).span(&expr.span)?, - None => Type::Nothing, - }; let body = self.block(body); self.table.end_func(); expr.kind = e::FunctionDef { @@ -205,17 +234,20 @@ impl Analyzer { body, id, }; - Type::Function { + Type::FunctionDef { params: params.into_iter().map(|p| p.type_actual).collect(), returns: returns_actual.into(), + id, } }, e::FunctionCall { callee, mut args } => { - let callee = self.expression(callee)?; + let callee = self.expression(callee, None)?; // Check that this is actually a function - let Type::Function { + // TODO allow function references to be called + let Type::FunctionDef { ref params, ref returns, + .. } = callee.type_ else { return error() @@ -234,7 +266,8 @@ impl Analyzer { } // Check for correct arg types for (expect, actual) in params.iter().zip(args.iter_mut()) { - *actual = *self.expression(actual.clone().into())?; + *actual = + *self.expression(actual.clone().into(), Some(expect.clone()))?; let coerced_type = Type::coerce(expect, &actual.type_); if let Ok(t) = coerced_type { actual.type_ = t; @@ -296,7 +329,7 @@ impl Analyzer { } let argspan = argexpr.span; let mut arg = *self - .expression(argexpr.clone().into()) + .expression(argexpr.clone().into(), Some(ptype.clone())) .trace_span(expr.span, "while parsing struct literal")?; let coerced_type = Type::coerce(ptype, &arg.type_); if let Ok(t) = coerced_type { @@ -318,7 +351,7 @@ impl Analyzer { Type::Struct(params) }, e::Field { namespace, field } => { - let namespace = self.expression(namespace)?; + let namespace = self.expression(namespace, None)?; // Check that namespace is struct // TODO: fields in other types let Type::Struct(ref params) = namespace.type_ else { @@ -350,6 +383,14 @@ impl Analyzer { type_ }, }; + /* + if let Some(expect) = &type_hint { + if let Type::Ambiguous = expect { + } else { + expr.type_ = Type::coerce(expect, &expr.type_)?; + } + } + */ expr.type_ = type_; Ok(expr) } diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index d6ae31b..770a64b 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -14,7 +14,6 @@ pub type uid = u32; pub enum VarKind { Global(uid), Local(uid), - Param(uid), Function(uid), Undefined, } @@ -22,10 +21,7 @@ pub enum VarKind { impl VarKind { pub fn unwrap(self) -> uid { match self { - VarKind::Global(i) - | VarKind::Local(i) - | VarKind::Param(i) - | VarKind::Function(i) => i, + VarKind::Global(i) | VarKind::Local(i) | VarKind::Function(i) => i, VarKind::Undefined => unreachable!("Failed unwrapping uid"), } } @@ -43,7 +39,7 @@ pub struct Symbol { pub enum Definition { Symbol(Symbol), BlockStart, - FuncStart, + FuncStart(Type), } fn next(array: &mut [uid]) -> uid { @@ -59,7 +55,7 @@ pub struct SymbolTable { nesting: usize, local_varno: Vec, global_varno: Vec, - funcno: Vec, + funcno: uid, } impl SymbolTable { @@ -69,7 +65,7 @@ impl SymbolTable { nesting: 0, global_varno: vec![0], local_varno: vec![0], - funcno: vec![0], + funcno: 1, } } @@ -80,7 +76,7 @@ impl SymbolTable { mutable: bool, ) -> Result { let kind = match type_ { - Type::Prim(_) | Type::Struct(_) => { + Type::Prim(_) | Type::Struct(_) | Type::FunctionRef { .. } => { if self.nesting == 0 { VarKind::Global(next(&mut self.global_varno)) } else { @@ -93,9 +89,9 @@ impl SymbolTable { } VarKind::Undefined }, - Type::Function { .. } => { + Type::FunctionDef { id, .. } => { if !mutable { - VarKind::Function(next(&mut self.funcno)) + VarKind::Function(id) } else { return error().reason("Function declaration must be immutable"); } @@ -112,7 +108,7 @@ impl SymbolTable { } fn define_param(&mut self, name: String, type_: Type) -> Result { - let kind = VarKind::Param(next(&mut self.local_varno)); + let kind = VarKind::Local(next(&mut self.local_varno)); self.syms.push(Definition::Symbol(Symbol { name, type_, @@ -122,17 +118,29 @@ impl SymbolTable { Ok(kind) } - fn start_func(&mut self) { + fn start_func(&mut self, returns: Type) -> uid { self.nesting += 1; self.local_varno.push(0); - self.syms.push(Definition::FuncStart); + self.syms.push(Definition::FuncStart(returns)); + let old = self.funcno; + self.funcno += 1; + old + } + + fn get_return_type(&mut self) -> Result { + for def in &self.syms { + if let Definition::FuncStart(t) = def { + return Ok(t.clone()); + } + } + error().reason("Return outside of function") } fn end_func(&mut self) { self.nesting -= 1; self.local_varno.pop(); while !self.syms.is_empty() { - if let Some(Definition::FuncStart) = self.syms.pop() { + if let Some(Definition::FuncStart(_)) = self.syms.pop() { return; } } @@ -154,18 +162,17 @@ impl SymbolTable { fn find_symbol(&self, find_name: &str) -> Result { let mut nesting = self.nesting; - println!("Looking for {find_name}, scope = {nesting}"); for s in self.syms.iter().rev() { match s { Definition::Symbol(sym) // Only search function local and global scope if nesting == self.nesting || nesting == 0 => { - println!("{}, {:?}, {nesting}", sym.name, sym.type_); + // Convert function definition to function reference if find_name == sym.name { return Ok(sym.clone()); } }, - Definition::FuncStart => { + Definition::FuncStart(_) => { nesting -= 1; }, _ => {}, diff --git a/src/semantic/types.rs b/src/semantic/types.rs index 6308b91..b6cd8e8 100644 --- a/src/semantic/types.rs +++ b/src/semantic/types.rs @@ -1,10 +1,10 @@ use crate::{ - semantic::{Symbol, SymbolTable}, BinaryOp, Expression, ExpressionKind, Immediate, Parameter, Statement, StatementKind, UnaryOp, + semantic::{Symbol, SymbolTable}, }; -use super::primitives::*; +use super::{primitives::*, uid}; use crate::err::*; #[derive(Debug, Clone)] @@ -14,10 +14,35 @@ pub enum Type { Prim(Primitive), Struct(Vec), StructDef(Vec), - Function { + FunctionRef { params: Vec, returns: Box, }, + FunctionDef { + params: Vec, + returns: Box, + id: uid, + }, +} + +impl std::fmt::Display for Type { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Type::Ambiguous => write!(f, "ambiguous"), + Type::Nothing => write!(f, "nothing"), + Type::Prim(primitive) => write!(f, "{primitive}"), + Type::Struct(vec) => write!(f, "struct {vec:?}"), + Type::StructDef(vec) => write!(f, "struct definition"), + Type::FunctionRef { params, returns } => { + write!(f, "({params:?}) -> {returns}") + }, + Type::FunctionDef { + params, + returns, + id, + } => write!(f, "({params:?}) -> {returns}"), + } + } } impl PartialEq for Type { @@ -31,15 +56,16 @@ impl PartialEq for Type { .map(|p| p.type_actual.clone()) .eq(p2.iter().map(|p| p.type_actual.clone())), ( - Function { + FunctionRef { params: p1, returns: r1, }, - Function { + FunctionRef { params: p2, returns: r2, }, ) => p1.iter().eq(p2.iter()) && r1 == r2, + (FunctionDef { id: id1, .. }, FunctionDef { id: id2, .. }) => id1 == id2, (Nothing, Nothing) => true, _ => false, } @@ -88,6 +114,7 @@ impl Type { let (p1, p2) = Primitive::coerce_ambiguous(*p1, *p2); if p1 != p2 { e() } else { Ok(Type::Prim(p1)) } }, + (t1, t2) if t1 == t2 => Ok(t1.clone()), _ => e(), } }