finished variable and function hoisting

This commit is contained in:
Logan 2024-10-25 01:14:44 -05:00
parent 4e13bb2ffc
commit 7e110a9560
9 changed files with 377 additions and 83 deletions

View file

@ -1,3 +1,3 @@
bar :: (a: integer) {
local := a;
}
10 + 2 + 3 * 4;

View file

@ -1,17 +1,17 @@
use std::path::Path;
use crate::{
Parser, Tokenizer,
err::*,
ir::{Compiler, IR},
semantic::{self},
Parser, Statement, Tokenizer,
};
#[derive(Debug, Clone)]
pub struct Module {
file_name: String,
source: String,
pub program: Vec<Statement>,
errors: Vec<Diagnostic>,
pub program: Vec<IR>,
}
impl Module {
@ -31,20 +31,12 @@ impl Module {
let tokens = Tokenizer::new(source.chars()).filter(|t| t.0.is_meaningful());
let statements = Parser::new(tokens);
let program = semantic::Analyzer::typecheck(statements.collect());
let mut errors = vec![];
let mut compiler = Compiler::new();
compiler.compile(program);
Self {
file_name,
source: source.into(),
program,
errors,
program: compiler.ir,
}
}
pub fn errors(&self) -> &[Diagnostic] {
&self.errors
}
pub fn ok(&self) -> bool {
self.errors.len() == 0
}
}

236
src/ir.rs
View file

@ -1,5 +1,6 @@
use crate::{
BinaryOp, Expression, ExpressionKind, Immediate, UnaryOp,
BinaryOp, Expression, ExpressionKind, Immediate, Statement, StatementKind,
UnaryOp,
semantic::{Type, VarKind, uid},
};
@ -15,43 +16,246 @@ pub enum IR {
AssignGlobal { uid: uid },
GetGlobal { uid: uid },
StartFunc { uid: uid },
NewParam { uid: uid, type_: Type },
EndFunc,
ReturnType { type_: Type },
NewParam { uid: uid, type_: Type },
Return,
Call { uid: uid },
Drop,
}
impl std::fmt::Display for IR {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use IR::*;
match self {
BinOp { op, type_ } => write!(f, "{op} ({type_})"),
UnOp { op, type_ } => write!(f, "{op}, {type_}"),
Imm(immediate) => write!(f, "push {immediate}"),
NewLocal { uid, type_ } => write!(f, "local ${uid} = {type_}"),
AssignLocal { uid } => write!(f, "pop local ${uid}"),
GetLocal { uid } => write!(f, "push local ${uid}"),
NewGlobal { uid, type_ } => write!(f, "global ${uid} = {type_}"),
AssignGlobal { uid } => write!(f, "pop global ${uid}"),
GetGlobal { uid } => write!(f, "push global ${uid}"),
StartFunc { uid } => write!(f, "<function id=${uid}>"),
EndFunc => write!(f, "</function>"),
NewParam { uid, type_ } => write!(f, "param ${uid} = {type_}"),
Return => write!(f, "return"),
Call { uid } => write!(f, "call ${uid}"),
Drop => write!(f, "pop"),
ReturnType { type_ } => write!(f, "result {type_}"),
}
}
}
pub struct Compiler {
ir: Vec<IR>,
pub ir: Vec<IR>,
}
impl Compiler {
pub fn new() -> Self {
Self { ir: vec![] }
}
pub fn compile(&mut self, block: Vec<Statement>) {
for s in block {
self.statement(s);
}
self.ir = self.hoist_functions();
}
fn statement(&mut self, statement: Statement) {
use StatementKind::*;
match statement.kind {
Declaration {
type_actual,
value,
varkind,
..
} => {
match varkind {
VarKind::Global(uid) => self.ir.push(IR::NewGlobal {
uid,
type_: type_actual,
}),
VarKind::Local(uid) => self.ir.push(IR::NewLocal {
uid,
type_: type_actual,
}),
_ => {},
}
self.expression(value);
match varkind {
VarKind::Global(uid) => self.ir.push(IR::AssignGlobal { uid }),
VarKind::Local(uid) => self.ir.push(IR::AssignLocal { uid }),
_ => {},
};
},
Assignment {
name,
value,
varkind,
} => {
self.expression(value);
match varkind {
VarKind::Global(uid) => self.ir.push(IR::AssignGlobal { uid }),
VarKind::Local(uid) => self.ir.push(IR::AssignLocal { uid }),
_ => {},
}
},
If {
predicate,
block,
else_,
} => todo!(),
While { predicate, block } => todo!(),
Print(expression) => todo!(),
Expression(expression) => {
if expression.type_ == Type::Nothing {
self.expression(expression);
} else {
self.expression(expression);
self.ir.push(IR::Drop);
}
},
Block(statements) => {
for s in statements {
self.statement(s);
}
},
Error(diagnostic) => {
panic!("{}", diagnostic);
},
Return(expression) => {
if let Some(e) = expression {
self.expression(e);
}
self.ir.push(IR::Return);
},
}
}
fn expression(&mut self, expression: Expression) {
use ExpressionKind::*;
match expression.kind {
Immediate(immediate) => {
self.ir.push(IR::Imm(immediate));
},
Identifier(name, var_kind) => match var_kind {
Identifier(_, var_kind) => match var_kind {
VarKind::Global(uid) => self.ir.push(IR::GetGlobal { uid }),
VarKind::Local(uid) | VarKind::Param(uid) => {
self.ir.push(IR::GetLocal { uid })
},
VarKind::Function(_) => todo!(),
VarKind::Undefined => todo!(),
VarKind::Local(uid) => self.ir.push(IR::GetLocal { uid }),
VarKind::Function(_) => {},
VarKind::Undefined => panic!("Undefined var not caught by typecheck"),
},
Binary {
op,
mut left,
mut right,
} => {
left.type_ = Type::coerce(&expression.type_, &left.type_).unwrap();
right.type_ = Type::coerce(&expression.type_, &right.type_).unwrap();
assert!(&left.type_ == &right.type_);
self.expression(*left.clone());
self.expression(*right);
self.ir.push(IR::BinOp {
op,
type_: expression.type_,
});
},
Unary { op, mut child } => {
child.type_ = Type::coerce(&expression.type_, &child.type_).unwrap();
self.expression(*child);
self.ir.push(IR::UnOp {
op,
type_: expression.type_,
})
},
Parenthesis(mut e) => {
e.type_ = Type::coerce(&expression.type_, &e.type_).unwrap();
self.expression(*e);
},
Binary { op, left, right } => todo!(),
Unary { op, child } => todo!(),
Parenthesis(expression) => todo!(),
FunctionDef {
params,
returns_str,
returns_actual,
body,
id,
} => todo!(),
FunctionCall { callee, args } => todo!(),
StructDef(vec) => todo!(),
..
} => {
self.ir.push(IR::StartFunc { uid: id });
for (i, p) in params.iter().enumerate() {
self.ir.push(IR::NewParam {
uid: i as uid,
type_: p.type_actual.clone(),
})
}
self.ir.push(IR::ReturnType {
type_: returns_actual,
});
for s in body {
self.statement(s);
}
self.ir.push(IR::EndFunc);
},
FunctionCall { callee, args } => {
let Type::FunctionDef { id, .. } = callee.type_ else {
panic!()
};
for arg in args {
self.expression(arg);
}
self.ir.push(IR::Call { uid: id });
},
StructDef(_) => {},
StructLiteral { name, args } => todo!(),
Field { namespace, field } => todo!(),
}
}
fn hoist_functions(&self) -> Vec<IR> {
let mut functions = vec![(vec![], vec![])];
let mut result = vec![];
for index in 0..self.ir.len() {
let ir = self.ir.get(index).unwrap();
match ir {
IR::StartFunc { .. } => {
functions.push((vec![], vec![]));
},
IR::EndFunc => {
let (inits, instr) = functions.pop().unwrap();
for ir in inits {
result.push(ir);
}
for ir in instr {
result.push(ir);
}
result.push(IR::EndFunc);
continue;
},
_ => {},
}
// Push instruction to correct stack
let (inits, instr) = functions.last_mut().unwrap();
match ir {
IR::NewLocal { .. }
| IR::NewGlobal { .. }
| IR::NewParam { .. }
| IR::StartFunc { .. } => {
inits.push(ir.clone());
},
_ => instr.push(ir.clone()),
}
}
// Initialize globals
let (inits, instr) = functions.pop().unwrap();
for ir in inits {
result.push(ir);
}
// The main function (index 0)
result.push(IR::StartFunc { uid: 0 });
for ir in instr {
result.push(ir);
}
result.push(IR::EndFunc);
result
}
}

View file

@ -86,12 +86,10 @@ fn prints(st: &Statement) {
}
fn main() -> Result<()> {
test_expression("asdf.asdf()");
/*
//test_expression("a.b.c()");
let module = frontend::Module::from_file("./demo.hal")?;
for s in &module.program {
prints(s);
println!("{s}");
}
*/
Ok(())
}

View file

@ -23,6 +23,17 @@ pub enum Immediate {
Boolean(bool),
}
impl std::fmt::Display for Immediate {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Immediate::Integer(i) => write!(f, "{i}"),
Immediate::Real(r) => write!(f, "{r}"),
Immediate::String(s) => write!(f, "{s}"),
Immediate::Boolean(b) => write!(f, "{b}"),
}
}
}
#[derive(Clone)]
pub enum ExpressionKind {
Immediate(Immediate),
@ -99,7 +110,7 @@ impl std::fmt::Debug for ExpressionKind {
write!(f, "({token:?} {child:?})")
},
e::Identifier(i, _) => write!(f, "{i}"),
e::FunctionCall { callee, args } => {
e::FunctionCall { callee, args, .. } => {
write!(f, "({callee:?} call {args:?})")
},
e::Field { namespace, field } => {

View file

@ -29,6 +29,7 @@ pub enum StatementKind {
Print(Expression),
Expression(Expression),
Block(Vec<Statement>),
Return(Option<Expression>),
Error(Diagnostic),
}
@ -161,6 +162,19 @@ impl<I: Iterator<Item = Token>> Parser<I> {
span,
});
},
// Return
(Token(t::Return, span2), _) => {
span = span + span2;
self.skip(1);
let expr = self.expression(0).ok();
if let Some(expr) = &expr {
span = span + expr.span;
}
Statement {
span,
kind: s::Return(expr),
}
},
// Expression
(Token(_, span2), _) => {
span = span + span2;

View file

@ -1,3 +1,5 @@
use std::any::Any;
use crate::{
BinaryOp, Expression, ExpressionKind, Immediate, Parameter, Statement,
StatementKind, UnaryOp,
@ -52,7 +54,12 @@ impl Analyzer {
} else {
Type::Ambiguous
};
let mut value = self.expression(value.into())?;
let type_hint = if let Type::Ambiguous = type_lhs {
None
} else {
Some(type_lhs.clone())
};
let mut value = self.expression(value.into(), type_hint)?;
let type_actual = Type::coerce(&type_lhs, &value.type_)
.reason(format!(
"Expected type '{:?}', found type '{:?}'",
@ -82,7 +89,8 @@ impl Analyzer {
.reason(format!("Cannot assign to immutable '{}'", name))
.span(&stmt.span);
}
let mut value = *self.expression(value.into())?;
let mut value =
*self.expression(value.into(), Some(symbol.type_.clone()))?;
let type_actual =
Type::coerce(&symbol.type_, &value.type_).span(&stmt.span)?;
value.type_ = type_actual;
@ -98,7 +106,8 @@ impl Analyzer {
else_,
} => {
self.table.start_block();
let predicate = *self.expression(predicate.into())?;
let predicate =
*self.expression(predicate.into(), Some(Type::Prim(p::boolean)))?;
Type::coerce(&Type::Prim(p::boolean), &predicate.type_)
.span(&predicate.span)?;
let block = self.block(block);
@ -116,7 +125,8 @@ impl Analyzer {
},
s::While { predicate, block } => {
self.table.start_block();
let predicate = *self.expression(predicate.into())?;
let predicate =
*self.expression(predicate.into(), Some(Type::Prim(p::boolean)))?;
Type::coerce(&Type::Prim(p::boolean), &predicate.type_)
.span(&predicate.span)?;
let block = self.block(block);
@ -124,10 +134,13 @@ impl Analyzer {
self.table.end_block();
},
s::Print(e) => {
stmt.kind = s::Print(*self.expression(e.into())?);
stmt.kind = s::Print(*self.expression(e.into(), None)?);
},
s::Expression(e) => {
stmt.kind = s::Expression(*self.expression(e.into())?);
let mut expr = *self.expression(e.into(), None)?;
expr.type_ =
Type::coerce(&Type::Ambiguous, &expr.type_).span(&expr.span)?;
stmt.kind = s::Expression(expr);
},
s::Block(block) => {
self.table.start_block();
@ -136,6 +149,20 @@ impl Analyzer {
self.table.end_block();
},
s::Error(e) => return Err(e),
s::Return(mut expression) => {
let return_type = self.table.get_return_type().span(&stmt.span)?;
let type_ = match expression {
Some(e) => {
let e = self.expression(e.into(), Some(return_type.clone()))?;
let type_ = e.type_.clone();
expression = Some(*e);
type_
},
None => Type::Nothing,
};
Type::coerce(&return_type, &type_).span(&stmt.span)?;
stmt.kind = s::Return(expression);
},
}
Ok(stmt)
}
@ -143,7 +170,9 @@ impl Analyzer {
fn expression(
&mut self,
mut expr: Box<Expression>,
type_hint: Option<Type>,
) -> Result<Box<Expression>> {
// TODO implement type hinting
use ExpressionKind as e;
use Immediate as i;
use Primitive as p;
@ -160,20 +189,20 @@ impl Analyzer {
self.table.find_symbol(i)?.type_
},
e::Binary { op, left, right } => {
let left = self.expression(left)?;
let right = self.expression(right)?;
let left = self.expression(left, type_hint.clone())?;
let right = self.expression(right, type_hint.clone())?;
let type_ = Type::binary_op(&left.type_, op, &right.type_)?;
expr.kind = e::Binary { left, right, op };
type_
},
e::Unary { op, child } => {
let child = self.expression(child)?;
let child = self.expression(child, type_hint.clone())?;
let type_ = Type::unary_op(op, &child.type_)?;
expr.kind = e::Unary { child, op };
type_
},
e::Parenthesis(inner) => {
let inner = self.expression(inner)?;
let inner = self.expression(inner, type_hint.clone())?;
let type_ = inner.type_.clone();
expr.kind = e::Parenthesis(inner);
type_
@ -183,19 +212,19 @@ impl Analyzer {
returns_str,
mut returns_actual,
body,
id,
id: _,
} => {
self.table.start_func();
returns_actual = match &returns_str {
Some(s) => self.table.get_type(s).span(&expr.span)?,
None => Type::Nothing,
};
let id = self.table.start_func(returns_actual.clone());
for p in &mut params {
p.type_actual = self.table.get_type(&p.type_str).span(&expr.span)?;
self
.table
.define_param(p.name.clone(), p.type_actual.clone())?;
}
returns_actual = match &returns_str {
Some(s) => self.table.get_type(s).span(&expr.span)?,
None => Type::Nothing,
};
let body = self.block(body);
self.table.end_func();
expr.kind = e::FunctionDef {
@ -205,17 +234,20 @@ impl Analyzer {
body,
id,
};
Type::Function {
Type::FunctionDef {
params: params.into_iter().map(|p| p.type_actual).collect(),
returns: returns_actual.into(),
id,
}
},
e::FunctionCall { callee, mut args } => {
let callee = self.expression(callee)?;
let callee = self.expression(callee, None)?;
// Check that this is actually a function
let Type::Function {
// TODO allow function references to be called
let Type::FunctionDef {
ref params,
ref returns,
..
} = callee.type_
else {
return error()
@ -234,7 +266,8 @@ impl Analyzer {
}
// Check for correct arg types
for (expect, actual) in params.iter().zip(args.iter_mut()) {
*actual = *self.expression(actual.clone().into())?;
*actual =
*self.expression(actual.clone().into(), Some(expect.clone()))?;
let coerced_type = Type::coerce(expect, &actual.type_);
if let Ok(t) = coerced_type {
actual.type_ = t;
@ -296,7 +329,7 @@ impl Analyzer {
}
let argspan = argexpr.span;
let mut arg = *self
.expression(argexpr.clone().into())
.expression(argexpr.clone().into(), Some(ptype.clone()))
.trace_span(expr.span, "while parsing struct literal")?;
let coerced_type = Type::coerce(ptype, &arg.type_);
if let Ok(t) = coerced_type {
@ -318,7 +351,7 @@ impl Analyzer {
Type::Struct(params)
},
e::Field { namespace, field } => {
let namespace = self.expression(namespace)?;
let namespace = self.expression(namespace, None)?;
// Check that namespace is struct
// TODO: fields in other types
let Type::Struct(ref params) = namespace.type_ else {
@ -350,6 +383,14 @@ impl Analyzer {
type_
},
};
/*
if let Some(expect) = &type_hint {
if let Type::Ambiguous = expect {
} else {
expr.type_ = Type::coerce(expect, &expr.type_)?;
}
}
*/
expr.type_ = type_;
Ok(expr)
}

View file

@ -14,7 +14,6 @@ pub type uid = u32;
pub enum VarKind {
Global(uid),
Local(uid),
Param(uid),
Function(uid),
Undefined,
}
@ -22,10 +21,7 @@ pub enum VarKind {
impl VarKind {
pub fn unwrap(self) -> uid {
match self {
VarKind::Global(i)
| VarKind::Local(i)
| VarKind::Param(i)
| VarKind::Function(i) => i,
VarKind::Global(i) | VarKind::Local(i) | VarKind::Function(i) => i,
VarKind::Undefined => unreachable!("Failed unwrapping uid"),
}
}
@ -43,7 +39,7 @@ pub struct Symbol {
pub enum Definition {
Symbol(Symbol),
BlockStart,
FuncStart,
FuncStart(Type),
}
fn next(array: &mut [uid]) -> uid {
@ -59,7 +55,7 @@ pub struct SymbolTable {
nesting: usize,
local_varno: Vec<uid>,
global_varno: Vec<uid>,
funcno: Vec<uid>,
funcno: uid,
}
impl SymbolTable {
@ -69,7 +65,7 @@ impl SymbolTable {
nesting: 0,
global_varno: vec![0],
local_varno: vec![0],
funcno: vec![0],
funcno: 1,
}
}
@ -80,7 +76,7 @@ impl SymbolTable {
mutable: bool,
) -> Result<VarKind> {
let kind = match type_ {
Type::Prim(_) | Type::Struct(_) => {
Type::Prim(_) | Type::Struct(_) | Type::FunctionRef { .. } => {
if self.nesting == 0 {
VarKind::Global(next(&mut self.global_varno))
} else {
@ -93,9 +89,9 @@ impl SymbolTable {
}
VarKind::Undefined
},
Type::Function { .. } => {
Type::FunctionDef { id, .. } => {
if !mutable {
VarKind::Function(next(&mut self.funcno))
VarKind::Function(id)
} else {
return error().reason("Function declaration must be immutable");
}
@ -112,7 +108,7 @@ impl SymbolTable {
}
fn define_param(&mut self, name: String, type_: Type) -> Result<VarKind> {
let kind = VarKind::Param(next(&mut self.local_varno));
let kind = VarKind::Local(next(&mut self.local_varno));
self.syms.push(Definition::Symbol(Symbol {
name,
type_,
@ -122,17 +118,29 @@ impl SymbolTable {
Ok(kind)
}
fn start_func(&mut self) {
fn start_func(&mut self, returns: Type) -> uid {
self.nesting += 1;
self.local_varno.push(0);
self.syms.push(Definition::FuncStart);
self.syms.push(Definition::FuncStart(returns));
let old = self.funcno;
self.funcno += 1;
old
}
fn get_return_type(&mut self) -> Result<Type> {
for def in &self.syms {
if let Definition::FuncStart(t) = def {
return Ok(t.clone());
}
}
error().reason("Return outside of function")
}
fn end_func(&mut self) {
self.nesting -= 1;
self.local_varno.pop();
while !self.syms.is_empty() {
if let Some(Definition::FuncStart) = self.syms.pop() {
if let Some(Definition::FuncStart(_)) = self.syms.pop() {
return;
}
}
@ -154,18 +162,17 @@ impl SymbolTable {
fn find_symbol(&self, find_name: &str) -> Result<Symbol> {
let mut nesting = self.nesting;
println!("Looking for {find_name}, scope = {nesting}");
for s in self.syms.iter().rev() {
match s {
Definition::Symbol(sym)
// Only search function local and global scope
if nesting == self.nesting || nesting == 0 => {
println!("{}, {:?}, {nesting}", sym.name, sym.type_);
// Convert function definition to function reference
if find_name == sym.name {
return Ok(sym.clone());
}
},
Definition::FuncStart => {
Definition::FuncStart(_) => {
nesting -= 1;
},
_ => {},

View file

@ -1,10 +1,10 @@
use crate::{
semantic::{Symbol, SymbolTable},
BinaryOp, Expression, ExpressionKind, Immediate, Parameter, Statement,
StatementKind, UnaryOp,
semantic::{Symbol, SymbolTable},
};
use super::primitives::*;
use super::{primitives::*, uid};
use crate::err::*;
#[derive(Debug, Clone)]
@ -14,10 +14,35 @@ pub enum Type {
Prim(Primitive),
Struct(Vec<Parameter>),
StructDef(Vec<Parameter>),
Function {
FunctionRef {
params: Vec<Type>,
returns: Box<Type>,
},
FunctionDef {
params: Vec<Type>,
returns: Box<Type>,
id: uid,
},
}
impl std::fmt::Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Type::Ambiguous => write!(f, "ambiguous"),
Type::Nothing => write!(f, "nothing"),
Type::Prim(primitive) => write!(f, "{primitive}"),
Type::Struct(vec) => write!(f, "struct {vec:?}"),
Type::StructDef(vec) => write!(f, "struct definition"),
Type::FunctionRef { params, returns } => {
write!(f, "({params:?}) -> {returns}")
},
Type::FunctionDef {
params,
returns,
id,
} => write!(f, "({params:?}) -> {returns}"),
}
}
}
impl PartialEq for Type {
@ -31,15 +56,16 @@ impl PartialEq for Type {
.map(|p| p.type_actual.clone())
.eq(p2.iter().map(|p| p.type_actual.clone())),
(
Function {
FunctionRef {
params: p1,
returns: r1,
},
Function {
FunctionRef {
params: p2,
returns: r2,
},
) => p1.iter().eq(p2.iter()) && r1 == r2,
(FunctionDef { id: id1, .. }, FunctionDef { id: id2, .. }) => id1 == id2,
(Nothing, Nothing) => true,
_ => false,
}
@ -88,6 +114,7 @@ impl Type {
let (p1, p2) = Primitive::coerce_ambiguous(*p1, *p2);
if p1 != p2 { e() } else { Ok(Type::Prim(p1)) }
},
(t1, t2) if t1 == t2 => Ok(t1.clone()),
_ => e(),
}
}