From fd1c13dc0403c7e531d906ff58a4b9aca39eee21 Mon Sep 17 00:00:00 2001 From: Logan Date: Mon, 4 Nov 2024 00:51:24 -0600 Subject: [PATCH] IR working, fixed function call ABI --- demo.hal | 15 +- src/frontend.rs | 48 ----- src/ir/generate.rs | 200 +++++++++++++++++--- src/ir/mod.rs | 374 +++++++++---------------------------- src/ir/wasm.rs | 211 +++++++++++++++++++++ src/main.rs | 14 +- src/parse/expression.rs | 26 +-- src/semantic/analyzer.rs | 24 ++- src/semantic/bottom_up.rs | 32 +++- src/semantic/mod.rs | 75 +++++--- src/semantic/naming.rs | 2 +- src/semantic/primitives.rs | 5 +- src/semantic/sizing.rs | 6 + src/semantic/top_down.rs | 8 +- src/semantic/types.rs | 4 +- src/token.rs | 14 +- test.wasm | Bin 67 -> 189 bytes test.wat | 40 ++-- 18 files changed, 644 insertions(+), 454 deletions(-) delete mode 100644 src/frontend.rs create mode 100644 src/ir/wasm.rs create mode 100644 src/semantic/sizing.rs diff --git a/demo.hal b/demo.hal index 20c3b7b..863e932 100644 --- a/demo.hal +++ b/demo.hal @@ -1,2 +1,13 @@ -a : integer : 10; -b := a + 10; +S :: struct { + a: integer, + b: glyph, + c: real, +} + +s := foo(S{ a: 1, b: 'a', c: 1.0}, 2); + +foo :: (s: S, a: integer) -> S { + return s; +} + + diff --git a/src/frontend.rs b/src/frontend.rs deleted file mode 100644 index 98ec929..0000000 --- a/src/frontend.rs +++ /dev/null @@ -1,48 +0,0 @@ -use std::path::Path; - -use crate::{ - Parser, Tokenizer, - err::*, - semantic::{self}, -}; - -#[derive(Debug, Clone)] -pub struct Module { - file_name: String, - source: String, -} - -impl Module { - pub fn from_file(path: impl AsRef) -> Result { - let file = std::fs::read(&path).trace(format!( - "while attempting to open file '{}'", - &path.as_ref().display() - ))?; - let source = String::from_utf8_lossy(&file); - Ok(Self::from_string( - format!("{}", path.as_ref().display()), - source.into(), - )) - } - - pub fn from_string(file_name: String, source: String) -> Self { - let tokens = Tokenizer::new(source.chars()).filter(|t| t.0.is_meaningful()); - let statements = Parser::new(tokens); - let program = semantic::Analyzer::typecheck(statements.collect()); - Self { - file_name, - source: source.into(), - } - } - /* - pub fn write_to(&self, path: impl AsRef) { - let watpath = path.as_ref().to_owned().with_extension("wat"); - let mut file = std::fs::File::create(watpath).unwrap(); - file.write_all(&self.wat.as_bytes()).unwrap(); - - let wasmpath = path.as_ref().to_owned().with_extension("wasm"); - let mut file = std::fs::File::create(wasmpath).unwrap(); - file.write_all(&self.wasm).unwrap(); - } - */ -} diff --git a/src/ir/generate.rs b/src/ir/generate.rs index 4d4f172..a1e79e3 100644 --- a/src/ir/generate.rs +++ b/src/ir/generate.rs @@ -3,36 +3,100 @@ use crate::{Expression, ExpressionKind, Statement, StatementKind}; use super::*; impl Compiler { - fn statement(&mut self, stmt: Statement) { + pub fn generate(&mut self, statements: Vec) { + for s in statements { + self.statement(s); + } + } + + pub fn new_temporary(&mut self, type_: Type) -> UID { + let uid = format!("$$tmp{}", self.tmp_num); + self.push(IR::New { + uid: uid.clone(), + type_, + mutable: true, + global: false, + }); + self.tmp_num += 1; + uid + } + + pub fn statement(&mut self, stmt: Statement) { use StatementKind as s; match stmt.kind { s::Declaration { - name, - type_str, type_actual, value, mutable, uid, - } => self.push(IR::New { - uid, - type_: type_actual, - }), - s::Assignment { name, value, uid } => todo!(), + .. + } => { + let global = self.table.table.get(&uid).unwrap().global; + match type_actual { + Type::Prim(_) | Type::Struct(_) => { + self.push(IR::New { + uid: uid.clone(), + type_: type_actual.clone(), + mutable, + global, + }); + self.expression(value); + self.push(IR::Set { + uid, + type_: type_actual, + global, + }); + }, + Type::Nothing | Type::Function(_) | Type::Alias(_) => { + self.expression(value); + }, + Type::Ambiguous => unreachable!(), + }; + }, + s::Assignment { name, value, uid } => { + let global = self.table.table.get(&uid).unwrap().global; + let type_ = value.type_.clone(); + self.expression(value); + self.push(IR::Set { uid, type_, global }); + }, s::If { predicate, block, else_, } => todo!(), s::While { predicate, block } => todo!(), - s::Print(expression) => todo!(), - s::Expression(expression) => todo!(), - s::Block(vec) => todo!(), - s::Return(expression) => todo!(), - s::Error(diagnostic) => todo!(), + s::Print(expression) => { + let type_ = expression.type_.clone(); + self.expression(expression); + self.push(IR::Print { type_ }); + }, + s::Expression(expression) => { + let type_ = expression.type_.clone(); + self.expression(expression); + self.push(IR::Drop { type_ }); + }, + s::Block(block) => { + for s in block { + self.statement(s); + } + }, + s::Return(expression) => { + let type_ = if let Some(expression) = expression { + let type_ = expression.type_.clone(); + self.expression(expression); + type_ + } else { + Type::Nothing + }; + self.push(IR::Return { type_ }); + }, + s::Error(diagnostic) => { + panic!("{diagnostic}") + }, } } - fn expression(&mut self, expr: Expression) { + pub fn expression(&mut self, expr: Expression) { use ExpressionKind as e; match expr.kind { e::Immediate(immediate) => { @@ -44,8 +108,14 @@ impl Compiler { prim: p, }) }, - e::Identifier(_, uid) => { - self.push(IR::Get { uid }); + e::Identifier(_, uid) => match expr.type_ { + Type::Prim(_) | Type::Struct(_) => self.push(IR::Get { + global: self.table.table.get(&uid).unwrap().global, + uid, + type_: expr.type_, + }), + Type::Ambiguous => unreachable!(), + _ => {}, }, e::Binary { op, left, right } => { self.expression(*left); @@ -65,14 +135,17 @@ impl Compiler { e::Parenthesis(inner) => self.expression(*inner), e::FunctionDef { params, - returns_str, returns_actual, body, id, + .. } => { self.push(IR::StartFunc { - fid: id, - params: params.into_iter().map(|p| p.type_actual).collect(), + uid: id, + params: params + .into_iter() + .map(|p| (p.name, p.type_actual)) + .collect(), returns: returns_actual, }); for s in body { @@ -83,24 +156,93 @@ impl Compiler { e::FunctionCall { callee, args, id, .. } => { - for arg in args { + self.expression(*callee); + for arg in args.into_iter().rev() { self.expression(arg); } - let fid = if let Type::Function(fid) = expr.type_ { - fid + self.push(IR::Call { uid: id }) + }, + e::StructDef(..) => {}, + e::StructLiteral { args, .. } => { + let struct_id = if let Type::Struct(s) = expr.type_ { + s } else { unreachable!() }; - self.push(IR::Call { fid }) + let length = args.len(); + let mut temp_buffer = vec![None; length]; + let mut iter = args.into_iter(); + let mut index = length - 1; + loop { + // If struct param has already been saved + if let Some((uid, type_)) = temp_buffer[index].take() { + self.ir.push(IR::Get { + uid, + type_, + global: false, + }); + if index == 0 { + break; + } + index -= 1; + } + // If struct parameter has not been saved + else { + let (name, arg) = iter.next().unwrap(); + let type_ = arg.type_.clone(); + self.expression(arg); + let argno = self.table.get_field_no(struct_id, &name); + if argno != index { + let temp = self.new_temporary(type_.clone()); + temp_buffer[argno] = Some((temp.clone(), type_.clone())); + self.push(IR::Set { + uid: temp, + type_: type_.clone(), + global: false, + }); + } else { + if index == 0 { + break; + } + index -= 1; + } + } + } }, - e::StructDef(..) => {}, - e::StructLiteral { name, args, id } => {}, e::Field { - namespace, - field, - uid, + namespace, field, .. } => { - self.expression(*namespace); + if let Type::Struct(sid) = namespace.type_ { + self.expression(*namespace); + let name = if let e::Identifier(name, _) = field.kind { + name + } else { + unreachable!() + }; + // TODO extract field + let field_type = self.table.get_field(sid, &name).unwrap(); + let temp = self.new_temporary(field_type.clone()); + for (field_name, uid) in self.table.structs[sid].0.clone() { + let type_ = + self.table.resolve_type(&uid).unwrap().is_alias().unwrap(); + if field_name != name { + self.push(IR::Drop { type_ }); + } else { + self.push(IR::Set { + uid: temp.clone(), + type_: field_type.clone(), + global: false, + }) + } + } + self.push(IR::Get { + uid: temp, + type_: field_type.clone(), + global: false, + }); + } else { + self.expression(*namespace); + } }, } } diff --git a/src/ir/mod.rs b/src/ir/mod.rs index a75ec01..d9a11ed 100644 --- a/src/ir/mod.rs +++ b/src/ir/mod.rs @@ -1,9 +1,11 @@ mod generate; -pub use generate::*; +mod wasm; + +use std::io::Write; use crate::{ - BinaryOp, Immediate, UnaryOp, - semantic::{FID, Primitive, Type, UID}, + BinaryOp, Immediate, Statement, UnaryOp, + semantic::{Primitive, SymbolTable, Type, UID}, }; #[derive(Debug, Clone)] @@ -23,27 +25,34 @@ pub enum IR { New { uid: UID, type_: Type, + mutable: bool, + global: bool, }, Set { uid: UID, + type_: Type, + global: bool, }, Get { uid: UID, + type_: Type, + global: bool, }, StartFunc { - fid: FID, - params: Vec, + uid: UID, + params: Vec<(UID, Type)>, returns: Type, }, EndFunc, - ReturnType { + Return { type_: Type, }, - Return, Call { - fid: FID, + uid: UID, + }, + Drop { + type_: Type, }, - Drop, Print { type_: Type, }, @@ -56,23 +65,28 @@ impl std::fmt::Display for IR { BinOp { op, type_ } => write!(f, "{op} ({type_})"), UnOp { op, type_ } => write!(f, "{op}, {type_}"), Push { prim, value } => write!(f, "push {value} ({prim})"), - New { uid, type_ } => write!(f, "local ${uid} = {type_}"), - Set { uid } => write!(f, "pop local ${uid}"), - Get { uid } => write!(f, "push local ${uid}"), + New { + uid, + type_, + mutable, + .. + } => write!( + f, + "let {}{uid} : {type_}", + if *mutable { "mut " } else { "" } + ), + Set { uid, .. } => write!(f, "set local {uid}"), + Get { uid, .. } => write!(f, "get local {uid}"), StartFunc { uid, params, returns, - } => write!( - f, - "" - ), + } => write!(f, ""), EndFunc => write!(f, ""), - Return => write!(f, "return"), - Call { uid } => write!(f, "call ${uid}"), - Drop => write!(f, "pop"), - ReturnType { type_ } => write!(f, "result {type_}"), + Call { uid } => write!(f, "call {uid}"), + Return { type_ } => write!(f, "result {type_}"), Print { type_ } => write!(f, "print {type_} [DEBUG]"), + Drop { .. } => write!(f, "pop"), } } } @@ -80,276 +94,53 @@ impl std::fmt::Display for IR { #[derive(Debug, Clone)] pub struct Compiler { ir: Vec, + table: SymbolTable, + tmp_num: usize, } impl Compiler { - pub fn new() -> Self { - Self { ir: vec![] } + pub fn new(table: SymbolTable) -> Self { + Self { + ir: vec![], + table, + tmp_num: 0, + } + } + + pub fn compile(&mut self, statements: Vec) { + self.generate(statements); + self.hoist(); + for ir in &self.ir { + println!("{ir}"); + } + let mut s = String::new(); + for ir in &self.ir { + s.push_str(&self.ir_to_wat(ir.clone()).unwrap()); + } + let assembly = format!("(module\n{s})"); + println!("--------"); + println!("{assembly}"); + std::fs::File::create("test.wat") + .unwrap() + .write_all(assembly.as_bytes()) + .unwrap(); + let binary = wat::parse_str(assembly).unwrap(); + std::fs::File::create("test.wasm") + .unwrap() + .write_all(&binary) + .unwrap(); } pub fn push(&mut self, ir: IR) { self.ir.push(ir); } -} -/* -impl IR { - pub fn to_wat(&self) -> String { - use BinaryOp as b; - use IR::*; - use Primitive as p; - use Type as t; - match self { - // Primitive - BinOp { - op, - type_: t::Prim(p), - } => match (op, p) { - // Addition - (b::Plus, p::i32 | p::w32 | p::integer | p::whole) => "i32.add", - (b::Plus, p::i64 | p::w64) => "i64.add", - (b::Plus, p::r32) => "f32.add", - (b::Plus, p::r64) => "f64.add", - // Subtraction - (b::Minus, p::i32 | p::w32 | p::integer | p::whole) => "i32.sub", - (b::Minus, p::i64 | p::w64) => "i64.sub", - (b::Minus, p::r32) => "f32.sub", - (b::Minus, p::r64) => "f64.sub", - // Multiplication - (b::Star, p::i32 | p::w32 | p::integer | p::whole) => "i32.mul", - (b::Star, p::i64 | p::w64) => "i64.mul", - (b::Star, p::r32) => "f32.mul", - (b::Star, p::r64) => "f64.mul", - // Division - (b::Slash, p::i32 | p::integer) => "i32.div_s", - (b::Slash, p::w32 | p::whole) => "i32.div_u", - (b::Slash, p::i64) => "i64.div_s", - (b::Slash, p::w64) => "i64.div_u", - (b::Slash, p::r32) => "f32.div", - (b::Slash, p::r64) => "f64.div", - _ => todo!(), - } - .into(), - UnOp { - op, - type_: t::Prim(p), - } => todo!(), - Push { prim, value } => todo!(), - NewLocal { - uid, - type_: t::Prim(p), - } => format!("(local ${uid} {})", p.as_wat()), - AssignLocal { uid } => format!("local.set ${uid}"), - GetLocal { uid } => format!("local.get ${uid}"), - NewGlobal { - uid, - type_: t::Prim(p), - } => format!( - "(global ${uid} (mut {}) ({}.const 0))", - p.as_wat(), - p.as_wat() - ), - AssignGlobal { uid } => format!("global.set ${uid}"), - GetGlobal { uid } => format!("global.get ${uid}"), - StartFunc { uid } => format!("(func ${uid}"), - EndFunc => ")".into(), - ReturnType { type_: t::Prim(p) } => format!("(result {})", p.as_wat()), - NewParam { - uid, - type_: t::Prim(p), - } => format!("(param ${uid} {})", p.as_wat()), - Return => "return".into(), - Call { uid } => format!("call ${uid}"), - Drop => "drop".into(), - Print => "call $log".into(), - _ => todo!(), - } - .into() - } -} - -pub struct Compiler { - pub ir: Vec, -} - -impl Compiler { - const IMPORTS: &'static str = - r#"(import "console" "log" (func $log (param i32)))"#; - - pub fn compile(block: Vec) -> (Vec, String) { - let mut this = Self { ir: vec![] }; - for s in block { - this.statement(s); - } - this.ir = this.hoist(); - let mut output = String::new(); - for ir in &this.ir { - //println!("{ir}"); - let s = ir.to_wat(); - output.push_str(&format!("{s}\n")); - } - output = format!("(module\n{}\n{output}\n(start $0)\n)", Self::IMPORTS); - println!("{output}"); - (wat::parse_str(&output).unwrap(), output) - } - - fn statement(&mut self, statement: Statement) { - use StatementKind::*; - match statement.kind { - Declaration { - type_actual, - value, - varkind, - .. - } => { - match varkind { - VarKind::Global(uid) => self.ir.push(IR::NewGlobal { - uid, - type_: type_actual, - }), - VarKind::Local(uid) => self.ir.push(IR::NewLocal { - uid, - type_: type_actual, - }), - _ => {}, - } - self.expression(value); - match varkind { - VarKind::Global(uid) => self.ir.push(IR::AssignGlobal { uid }), - VarKind::Local(uid) => self.ir.push(IR::AssignLocal { uid }), - _ => {}, - }; - }, - Assignment { - name, - value, - varkind, - } => { - self.expression(value); - match varkind { - VarKind::Global(uid) => self.ir.push(IR::AssignGlobal { uid }), - VarKind::Local(uid) => self.ir.push(IR::AssignLocal { uid }), - _ => {}, - } - }, - If { - predicate, - block, - else_, - } => todo!(), - While { predicate, block } => todo!(), - Print(expression) => { - self.expression(expression); - self.ir.push(IR::Print); - }, - Expression(expression) => { - if expression.type_ == Type::Nothing { - self.expression(expression); - } else { - self.expression(expression); - self.ir.push(IR::Drop); - } - }, - Block(statements) => { - for s in statements { - self.statement(s); - } - }, - Error(diagnostic) => { - panic!("{}", diagnostic); - }, - Return(expression) => { - if let Some(e) = expression { - self.expression(e); - } - self.ir.push(IR::Return); - }, - } - } - - fn expression(&mut self, expression: Expression) { - use ExpressionKind::*; - match expression.kind { - Immediate(immediate) => { - let Type::Prim(p) = expression.type_ else { - panic!(); - }; - self.ir.push(IR::Push { - value: immediate, - prim: p, - }) - }, - Identifier(_, var_kind) => match var_kind { - VarKind::Global(uid) => self.ir.push(IR::GetGlobal { uid }), - VarKind::Local(uid) => self.ir.push(IR::GetLocal { uid }), - VarKind::Function(_) => {}, - VarKind::Undefined => panic!("Undefined var not caught by typecheck"), - }, - Binary { - op, - mut left, - mut right, - } => { - assert!(&left.type_ == &right.type_); - self.expression(*left.clone()); - self.expression(*right); - self.ir.push(IR::BinOp { - op, - type_: expression.type_, - }); - }, - Unary { op, mut child } => { - self.expression(*child); - self.ir.push(IR::UnOp { - op, - type_: expression.type_, - }) - }, - Parenthesis(mut e) => { - self.expression(*e); - }, - FunctionDef { - params, - returns_actual, - body, - id, - .. - } => { - self.ir.push(IR::StartFunc { uid: id }); - for (i, p) in params.iter().enumerate() { - self.ir.push(IR::NewParam { - uid: i as uid, - type_: p.type_actual.clone(), - }) - } - self.ir.push(IR::ReturnType { - type_: returns_actual, - }); - for s in body { - self.statement(s); - } - self.ir.push(IR::EndFunc); - }, - FunctionCall { callee, args } => { - let Type::FunctionDef { id, .. } = callee.type_ else { - panic!() - }; - for arg in args { - self.expression(arg); - } - self.ir.push(IR::Call { uid: id }); - }, - StructDef(_) => {}, - StructLiteral { name, args } => todo!(), - Field { namespace, field } => todo!(), - } - } - - fn hoist(&self) -> Vec { + fn hoist(&mut self) { + // Declarations, instructions let mut functions = vec![(vec![], vec![])]; + // Final IR output let mut result = vec![]; - for index in 0..self.ir.len() { - let ir = self.ir.get(index).unwrap(); + for ir in &self.ir { match ir { IR::StartFunc { .. } => { functions.push((vec![], vec![])); @@ -370,10 +161,7 @@ impl Compiler { // Push instruction to correct stack let (inits, instr) = functions.last_mut().unwrap(); match ir { - IR::NewLocal { .. } - | IR::NewGlobal { .. } - | IR::NewParam { .. } - | IR::StartFunc { .. } => { + IR::New { .. } | IR::StartFunc { .. } => { inits.push(ir.clone()); }, _ => instr.push(ir.clone()), @@ -381,16 +169,28 @@ impl Compiler { } // Initialize globals let (inits, instr) = functions.pop().unwrap(); + let mut main_locals = vec![]; for ir in inits { - result.push(ir); + match ir { + IR::New { global: true, .. } => { + result.push(ir); + }, + _ => main_locals.push(ir), + } } // The main function (index 0) - result.push(IR::StartFunc { uid: 0 }); + result.push(IR::StartFunc { + uid: "$$main".into(), + params: vec![], + returns: Type::Nothing, + }); + for ir in main_locals { + result.push(ir); + } for ir in instr { result.push(ir); } result.push(IR::EndFunc); - result + self.ir = result; } } -*/ diff --git a/src/ir/wasm.rs b/src/ir/wasm.rs new file mode 100644 index 0000000..fb1d2c5 --- /dev/null +++ b/src/ir/wasm.rs @@ -0,0 +1,211 @@ +use crate::{ + Base, BinaryOp, Immediate, + err::*, + semantic::{Primitive, Type}, +}; + +use super::{Compiler, IR}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(non_camel_case_types)] +pub enum RegisterType { + f32, + f64, + i32, + i64, +} + +impl std::fmt::Display for RegisterType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", match self { + RegisterType::f32 => "f32", + RegisterType::f64 => "f64", + RegisterType::i32 => "i32", + RegisterType::i64 => "i64", + }) + } +} + +const SYS_INT: RegisterType = RegisterType::i32; +const SYS_REAL: RegisterType = RegisterType::f32; + +fn convert_sys_whole(input: &str, base: Base) -> Option { + u32::from_str_radix(input, base as u32) + .ok() + .map(|i| format!("{i}")) +} + +fn convert_sys_int(input: &str, base: Base) -> Option { + i32::from_str_radix(input, base as u32) + .ok() + .map(|i| format!("{i}")) +} + +fn convert_sys_real(input: &str) -> Option { + input.parse::().ok().map(|f| format!("{f}")) +} + +impl Compiler { + pub fn type_prim(&self, prim: Primitive) -> RegisterType { + use Primitive as p; + use RegisterType as r; + match prim { + p::i8 | p::i16 | p::i32 | p::w8 | p::w16 | p::w32 => r::i32, + p::i64 | p::w64 => r::i64, + p::integer | p::whole => SYS_INT, + p::r32 => r::f32, + p::r64 => r::f64, + p::real => SYS_REAL, + p::boolean => r::i32, + p::string => r::i64, + p::glyph => r::i32, + p::integer_ambiguous | p::real_ambiguous => unreachable!(), + } + } + + pub fn splat(&self, type_: &Type) -> Vec { + match type_ { + Type::Ambiguous => unreachable!(), + Type::Function(_) | Type::Nothing | Type::Alias(_) => vec![], + Type::Prim(prim) => vec![self.type_prim(*prim)], + Type::Struct(sid) => { + let struct_def = &self.table.structs[*sid].0; + let mut buf = vec![]; + for (_, type_) in struct_def { + let type_ = + self.table.resolve_type(type_).unwrap().is_alias().unwrap(); + buf.append(&mut self.splat(&type_)); + } + buf + }, + } + } + + pub fn ir_to_wat(&self, ir: IR) -> Result { + use Immediate as i; + use Primitive as p; + Ok(match ir { + IR::Push { value, prim } => match value { + i::Integer(ref i, base) => { + let b = base as u32; + // Unfortunately this can't be simplified + let s = match prim { + p::w8 => u8::from_str_radix(i, b).ok().map(|s| format!("{s}")), + p::w16 => u16::from_str_radix(i, b).ok().map(|s| format!("{s}")), + p::w32 => u32::from_str_radix(i, b).ok().map(|s| format!("{s}")), + p::w64 => u64::from_str_radix(i, b).ok().map(|s| format!("{s}")), + p::i8 => i8::from_str_radix(i, b).ok().map(|s| format!("{s}")), + p::i16 => i16::from_str_radix(i, b).ok().map(|s| format!("{s}")), + p::i32 => i32::from_str_radix(i, b).ok().map(|s| format!("{s}")), + p::i64 => i64::from_str_radix(i, b).ok().map(|s| format!("{s}")), + p::whole => convert_sys_whole(i, base), + p::integer => convert_sys_int(i, base), + _ => unreachable!(), + } + .reason(format!("Cannot parse immediate value as '{}'", prim))?; + format!("{}.const {}\n", self.type_prim(prim), s) + }, + i::Real(ref i) => { + let s = match prim { + p::r32 => i.parse::().ok().map(|f| format!("{f}")), + p::r64 => i.parse::().ok().map(|f| format!("{f}")), + p::real => convert_sys_real(i), + _ => unreachable!(), + } + .reason(format!("Cannot parse immediate value as '{}'", prim))?; + format!("{}.const {}\n", self.type_prim(prim), s) + }, + i::String(_) => todo!(), + i::Glyph(c) => format!("i32.const {}\n", c as u32), + i::Boolean(b) => format!("i32.const {}\n", b as i8), + }, + IR::Drop { type_ } => { + let mut buffer = String::new(); + for _ in 0..self.splat(&type_).len() { + buffer.push_str("drop\n"); + } + buffer + }, + IR::New { + uid, + type_, + mutable, + global, + } => { + let mut buffer = String::new(); + for (index, rt) in self.splat(&type_).iter().enumerate() { + buffer.push_str(&format!( + "({} {uid}${index} {})\n", + if global { "global" } else { "local" }, + if global && mutable { + format!("(mut {rt})") + } else { + format!("{rt}") + } + )) + } + buffer + }, + IR::Set { uid, type_, global } => { + let mut buffer = String::new(); + for index in 0..self.splat(&type_).len() { + buffer.push_str(&format!( + "{}.set {uid}${index}\n", + if global { "global" } else { "local" } + )) + } + buffer + }, + IR::Get { uid, type_, global } => { + let mut buffer = String::new(); + for index in (0..self.splat(&type_).len()).rev() { + buffer.push_str(&format!( + "{}.get {uid}${index}\n", + if global { "global" } else { "local" } + )) + } + buffer + }, + IR::StartFunc { + uid, + params, + returns, + } => { + let mut buffer = format!("(func {uid}\n"); + for (puid, type_) in params { + for (id, rt) in self.splat(&type_).iter().enumerate() { + buffer.push_str(&format!("(param {puid}${id} {rt})\n")); + } + } + let returns = self.splat(&returns); + if returns.len() > 0 { + buffer.push_str("(result "); + for rt in returns { + buffer.push_str(&format!("{rt} ")) + } + buffer.push_str(")\n"); + } + buffer + }, + IR::EndFunc => ")\n".into(), + IR::Return { type_ } => "return\n".into(), + IR::Call { uid } => format!("call {uid}\n"), + IR::BinOp { op, type_ } => { + use BinaryOp::*; + let p = if let Type::Prim(p) = type_ { + p + } else { + unreachable!() + }; + match (op, p) { + (Plus, _) => format!("{}.add\n", self.type_prim(p)), + (Minus, _) => format!("{}.sub\n", self.type_prim(p)), + (Star, _) => format!("{}.mul\n", self.type_prim(p)), + _ => todo!(), + } + }, + IR::UnOp { op, type_ } => todo!(), + IR::Print { type_ } => todo!(), + }) + } +} diff --git a/src/main.rs b/src/main.rs index ea624f5..c955d67 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,4 @@ mod err; -mod frontend; mod ir; mod lookahead; mod parse; @@ -9,6 +8,7 @@ mod treewalk; use std::ops::Add; use err::*; +use ir::Compiler; use lookahead::*; use parse::*; use semantic::Analyzer; @@ -95,14 +95,24 @@ fn parse(input: &'static str) -> impl Iterator { } fn typecheck(input: &'static str) -> Vec { - Analyzer::typecheck(parse(input).collect()) + Analyzer::new().typecheck(parse(input).collect()) +} + +fn compile(input: &'static str) { + let mut a = Analyzer::new(); + let s = a.typecheck(parse(input).collect()); + let mut c = Compiler::new(a.table); + c.compile(s); } fn main() -> Result<()> { + /* for s in typecheck(include_str!("../demo.hal")) { println!("------------------"); println!("{s:#?}"); } + */ + compile(include_str!("../demo.hal")); //let module = frontend::Module::from_file("./demo.hal")?; //module.write_to("test"); Ok(()) diff --git a/src/parse/expression.rs b/src/parse/expression.rs index 0ebea87..210795a 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -36,7 +36,7 @@ impl std::fmt::Display for Immediate { } } -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum ExpressionKind { Immediate(Immediate), Identifier(String, UID), @@ -55,7 +55,7 @@ pub enum ExpressionKind { returns_str: Option, returns_actual: Type, body: Vec, - id: usize, + id: UID, }, FunctionCall { callee: Box, @@ -76,7 +76,7 @@ pub enum ExpressionKind { }, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Expression { pub kind: ExpressionKind, pub span: Span, @@ -89,7 +89,7 @@ impl Expression { } } -impl std::fmt::Debug for ExpressionKind { +impl std::fmt::Display for ExpressionKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use ExpressionKind as e; match self { @@ -99,27 +99,27 @@ impl std::fmt::Debug for ExpressionKind { left, right, } => { - write!(f, "({left:?} {token:?} {right:?})") + write!(f, "({left} {token} {right})") }, - e::Parenthesis(inner) => write!(f, "{inner:?}"), + e::Parenthesis(inner) => write!(f, "{inner}"), e::Unary { op: token, child } => { - write!(f, "({token:?} {child:?})") + write!(f, "({token} {child})") }, e::Identifier(i, _) => write!(f, "{i}"), e::FunctionCall { callee, args, .. } => { - write!(f, "({callee:?} call {args:?})") + write!(f, "({callee} call {args:?})") }, e::Field { namespace, field, .. } => { - write!(f, "({namespace:?} . {field:?})") + write!(f, "({namespace} . {field})") }, e::FunctionDef { params, returns_actual, .. } => { - write!(f, "(fn({params:?}) -> {returns_actual:?})") + write!(f, "(fn({params:?}) -> {returns_actual})") }, e::StructDef(params, _) => write!(f, "struct {{ {params:?} }}"), e::StructLiteral { name, args, .. } => write!(f, "{name} {{ {args:?} }}"), @@ -127,9 +127,9 @@ impl std::fmt::Debug for ExpressionKind { } } -impl std::fmt::Debug for Expression { +impl std::fmt::Display for Expression { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "({:?} {})", self.kind, self.type_) + write!(f, "({} {})", self.kind, self.type_) } } @@ -355,7 +355,7 @@ impl> Parser { returns_str, returns_actual: Type::Ambiguous, body, - id: usize::MAX, + id: "".into(), } }, // Struct definition diff --git a/src/semantic/analyzer.rs b/src/semantic/analyzer.rs index 2b33041..beb7fa4 100644 --- a/src/semantic/analyzer.rs +++ b/src/semantic/analyzer.rs @@ -5,25 +5,31 @@ pub struct Analyzer { } impl Analyzer { - pub fn typecheck(mut statements: Vec) -> Vec { - let mut this = Self { + pub fn new() -> Self { + Self { table: SymbolTable::new(), - }; + } + } + + pub fn typecheck( + &mut self, + mut statements: Vec, + ) -> Vec { for s in &mut statements { - *s = *this.naming_pass_stmt(s.clone().into()).unwrap(); + *s = *self.naming_pass_stmt(s.clone().into()).unwrap(); } for s in &mut statements { - *s = *this.bottom_up_stmt(s.clone().into()).unwrap(); + *s = *self.bottom_up_stmt(s.clone().into()).unwrap(); } for s in &mut statements { - *s = *this.top_down_stmt(s.clone().into()).unwrap(); + *s = *self.top_down_stmt(s.clone().into()).unwrap(); } println!("-----TABLE------"); - println!("{:#?}", this.table.table); + println!("{:#?}", self.table.table); println!("-----FUNCS------"); - println!("{:#?}", this.table.functions); + println!("{:#?}", self.table.functions); println!("-----STRUCTS----"); - println!("{:#?}", this.table.structs); + println!("{:#?}", self.table.structs); statements } } diff --git a/src/semantic/bottom_up.rs b/src/semantic/bottom_up.rs index 1b89a86..10f1d3f 100644 --- a/src/semantic/bottom_up.rs +++ b/src/semantic/bottom_up.rs @@ -40,6 +40,7 @@ impl Analyzer { Some(type_actual.clone()), None, true, + self.table.in_global_scope(), )?; s::Declaration { name, @@ -62,6 +63,7 @@ impl Analyzer { Some(value.type_.clone()), Some(true), true, + false, )?; s::Assignment { name, value, uid } }, @@ -170,18 +172,23 @@ impl Analyzer { mut body, id, } => { - let funcdef = self.table.functions[id].clone(); + let funcdef = self.table.functions.get(&id).unwrap().clone(); self.table.start_function(); for (uid, param) in funcdef.params.iter().zip(params.iter_mut()) { let type_ = self.table.resolve_type(uid).span(&expr.span)?; + param.name = uid.clone(); param.type_actual = type_; } for s in &mut body { *s = *self.bottom_up_stmt(s.clone().into())?; } self.table.end_function(); - returns_actual = - self.table.resolve_type(&funcdef.returns).span(&expr.span)?; + returns_actual = self + .table + .resolve_type(&funcdef.returns) + .span(&expr.span)? + .is_alias() + .span(&expr.span)?; e::FunctionDef { params, returns_str, @@ -194,14 +201,15 @@ impl Analyzer { mut callee, mut args, is_reference, - id, + mut id, } => { callee = self.bottom_up_expr(callee)?; match callee.type_ { - Type::Function(fid) => { + Type::Function(ref uid) => { + id = uid.clone(); expr.type_ = self .table - .resolve_type(&self.table.functions[fid].returns) + .resolve_type(&self.table.functions.get(uid).unwrap().returns) .span(&callee.span)? .is_alias() .span(&expr.span)? @@ -247,10 +255,10 @@ impl Analyzer { e::Field { mut namespace, field, - uid, + mut uid, } => { namespace = self.bottom_up_expr(namespace)?; - if let Type::Struct(s) = namespace.type_ { + let field_name = if let Type::Struct(s) = namespace.type_ { if let e::Identifier(name, _) = &field.kind { expr.type_ = self .table @@ -259,6 +267,7 @@ impl Analyzer { .is_alias() .reason("Expected type, found value") .span(&expr.span)?; + name.clone() } else { return error().reason("Field must be identifier").span(&expr.span); } @@ -269,8 +278,13 @@ impl Analyzer { namespace.type_ )) .span(&expr.span); + }; + // Name mangling + if let e::Identifier(_, mangle) = &namespace.kind { + uid = mangle.clone() + "$" + &field_name; + } else if uid != "" { + uid = uid + "$" + &field_name; } - // TODO handle name mangle e::Field { namespace, field, diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index e797c36..4cfb649 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -2,6 +2,7 @@ mod analyzer; mod bottom_up; mod naming; mod primitives; +mod sizing; mod top_down; mod types; @@ -23,11 +24,12 @@ pub type UID = String; #[derive(Debug, Clone)] pub struct Symbol { - name: String, - type_: Type, - uid: UID, + pub name: String, + pub type_: Type, + pub uid: UID, initialized: bool, - mutable: Option, + pub mutable: Option, + pub global: bool, } #[derive(Debug, Clone)] @@ -40,7 +42,7 @@ enum Definition { #[derive(Debug, Clone)] pub struct SymbolTable { pub structs: Vec, - pub functions: Vec, + pub functions: HashMap, scope: Vec, pub table: HashMap, mangle_num: usize, @@ -63,12 +65,18 @@ impl SymbolTable { uid: nothing_mangle(), initialized: true, mutable: Some(false), + global: true, }; + let mut functions = HashMap::new(); + functions.insert("$$main".into(), FunctionDef { + params: vec![], + returns: "Nothing".into(), + }); scope.push(Definition::Ident(nothing_symbol.clone())); table.insert(nothing_symbol.uid.clone(), nothing_symbol); Self { structs: vec![], - functions: vec![], + functions, scope, table, mangle_num: 0, @@ -96,6 +104,17 @@ impl SymbolTable { unreachable!("Cannot end global scope") } + pub fn in_global_scope(&self) -> bool { + for def in &self.scope { + if let Definition::Ident(..) = def { + continue; + } else { + return false; + } + } + true + } + pub fn resolve_type(&self, uid: &UID) -> Result { let symbol = self.table.get(uid).unwrap(); if symbol.initialized == false { @@ -105,6 +124,16 @@ impl SymbolTable { } } + pub fn get_field_no(&self, sid: SID, argname: &str) -> usize { + let struct_def = &self.structs[sid].0; + for (id, (name, _)) in struct_def.iter().enumerate() { + if name == argname { + return id; + } + } + unreachable!() + } + pub fn start_function(&mut self) { self.nesting += 1; self.scope.push(Definition::FuncStart); @@ -158,8 +187,8 @@ impl SymbolTable { &mut self, params: Vec, returns: Option, - ) -> Result { - let fid = self.functions.len(); + ) -> Result { + let uid = self.generate_uid("func"); let mut symbols = vec![]; for p in ¶ms { let symbol = self.reference_ident(&p.type_str); @@ -174,15 +203,15 @@ impl SymbolTable { let mut new_params = vec![]; for (p, s) in params.iter().zip(symbols.iter()) { let s = self - .define_ident(&p.name, s.type_.clone(), false) + .define_ident(&p.name, s.type_.is_alias()?, false) .trace("While initializing function parameters")?; new_params.push(s.uid); } - self.functions.push(FunctionDef { + self.functions.insert(uid.clone(), FunctionDef { params: new_params, returns, }); - Ok(fid) + Ok(uid) } pub fn modify_ident( @@ -192,6 +221,7 @@ impl SymbolTable { type_: Option, mutable: Option, init: bool, + global: bool, ) -> Result<()> { let symbol = self.table.get_mut(&uid).unwrap(); if let Some(ref type_) = type_ { @@ -204,6 +234,7 @@ impl SymbolTable { _ => mutable, }; symbol.initialized |= init; + symbol.global |= global; let mut nesting = self.nesting; for def in &mut self.scope { @@ -225,6 +256,7 @@ impl SymbolTable { _ => mutable, }; symbol.initialized |= init; + symbol.global |= global; return Ok(()); }, Definition::FuncStart => nesting -= 1, @@ -241,8 +273,6 @@ impl SymbolTable { type_: Type, mutable: bool, ) -> Result { - println!("---{name}---"); - println!("{:#?}", self.lookup_block_scope(name)); if let Type::Alias(_) | Type::Function(_) = &type_ { if mutable { return error() @@ -264,6 +294,7 @@ impl SymbolTable { Some(type_), Some(mutable), true, + self.in_global_scope(), )?; return Ok(s); } @@ -276,6 +307,7 @@ impl SymbolTable { uid: uid.clone(), initialized: true, mutable: Some(mutable), + global: self.in_global_scope(), }; self.scope.push(Definition::Ident(sym.clone())); self.table.insert(uid, sym.clone()); @@ -293,6 +325,7 @@ impl SymbolTable { uid: uid.clone(), initialized: false, mutable: None, + global: false, }; self.scope.push(Definition::Ident(sym.clone())); self.table.insert(uid, sym.clone()); @@ -334,19 +367,3 @@ impl SymbolTable { )) } } - -// I think this works? Box pattern matching weirdness -// going on. If this is ever even used -fn unwrap_aliases(mut t: Type) -> Type { - loop { - if let Type::Alias(ref t1) = t { - if let Type::Alias(t2) = &**t1 { - t = *t2.clone(); - } else { - return t; - } - } else { - return t; - } - } -} diff --git a/src/semantic/naming.rs b/src/semantic/naming.rs index e1f2c10..4abd795 100644 --- a/src/semantic/naming.rs +++ b/src/semantic/naming.rs @@ -29,7 +29,6 @@ impl Analyzer { } else { value.type_.clone() }; - println!("{type_:?}"); let symbol = self.table.define_ident(&name, type_, mutable)?; uid = symbol.uid; s::Declaration { @@ -55,6 +54,7 @@ impl Analyzer { Some(value.type_.clone()), Some(true), false, + false, )?; s::Assignment { name, value, uid } }, diff --git a/src/semantic/primitives.rs b/src/semantic/primitives.rs index 5fa8c45..19a4ba2 100644 --- a/src/semantic/primitives.rs +++ b/src/semantic/primitives.rs @@ -34,8 +34,8 @@ macro_rules! primitives { impl std::fmt::Display for Primitive { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Primitive::integer_ambiguous => write!(f, ""), - Primitive::real_ambiguous => write!(f, ""), + Primitive::integer_ambiguous => write!(f, "ambiguous integer"), + Primitive::real_ambiguous => write!(f, "ambiguous real"), $(Primitive::$i => write!(f, stringify!{$i}),)* } } @@ -48,6 +48,7 @@ macro_rules! primitives { uid: format!("$${}", stringify!{$i}), initialized: true, mutable: Some(false), + global: true, },)* ] } diff --git a/src/semantic/sizing.rs b/src/semantic/sizing.rs new file mode 100644 index 0000000..603a8dc --- /dev/null +++ b/src/semantic/sizing.rs @@ -0,0 +1,6 @@ +pub enum DataType { + i32, + i64, + f32, + f64, +} diff --git a/src/semantic/top_down.rs b/src/semantic/top_down.rs index 001697d..d929792 100644 --- a/src/semantic/top_down.rs +++ b/src/semantic/top_down.rs @@ -89,6 +89,7 @@ impl Analyzer { s::Return(expr) => { if let Some(mut expr) = expr { let expr_t = expr.type_.clone(); + println!("{expr_t}"); expr = *self.top_down_expr(expr.into(), &expr_t)?; s::Return(Some(expr)) } else { @@ -106,6 +107,7 @@ impl Analyzer { expect: &Type, ) -> Result> { use ExpressionKind as e; + println!("{} -> {expect}", expr.type_); expr.type_ = expr.type_.coerce(expect).span(&expr.span)?; expr.kind = match expr.kind { e::Immediate(i) => e::Immediate(i), @@ -151,8 +153,8 @@ impl Analyzer { is_reference, id, } => { - let func_def = if let Type::Function(fid) = callee.type_ { - self.table.functions[fid].clone() + let func_def = if let Type::Function(ref uid) = callee.type_ { + self.table.functions.get(uid).unwrap().clone() } else { unreachable!() }; @@ -208,7 +210,9 @@ impl Analyzer { .span(&arg.span)? .is_alias() .span(&arg.span)?; + println!("{param_t}"); *arg = *self.top_down_expr(arg.clone().into(), ¶m_t)?; + println!("/{param_t}"); } e::StructLiteral { name, args, id } }, diff --git a/src/semantic/types.rs b/src/semantic/types.rs index bc4ee12..98d2a1d 100644 --- a/src/semantic/types.rs +++ b/src/semantic/types.rs @@ -9,8 +9,6 @@ pub type SID = usize; #[derive(Debug, Clone)] pub struct StructureDef(pub Vec<(String, UID)>); -pub type FID = usize; - #[derive(Debug, Clone)] pub struct FunctionDef { pub params: Vec, @@ -28,7 +26,7 @@ pub enum Type { Alias(Box), Prim(Primitive), Struct(SID), - Function(FID), + Function(UID), } impl std::fmt::Display for Type { diff --git a/src/token.rs b/src/token.rs index 9ff9085..5280ea7 100644 --- a/src/token.rs +++ b/src/token.rs @@ -385,14 +385,14 @@ impl> Tokenizer { } buffer = buffer.to_lowercase(); // Determine base - let base = if buffer.starts_with("0b") { - Base::Binary - } else if buffer.starts_with("0o") { - Base::Octal - } else if buffer.starts_with("0x") { - Base::Hex + let (buffer, base) = if let Some(buffer) = buffer.strip_prefix("0b") { + (buffer.to_string(), Base::Binary) + } else if let Some(buffer) = buffer.strip_prefix("0o") { + (buffer.to_string(), Base::Octal) + } else if let Some(buffer) = buffer.strip_prefix("0x") { + (buffer.to_string(), Base::Hex) } else { - Base::Decimal + (buffer, Base::Decimal) }; // Determine integer or float if base == Base::Decimal && (encountered_dot || buffer.contains("e")) { diff --git a/test.wasm b/test.wasm index 202d84d32ad9f431a57a23493af352150f3fcce6..b03226306ba02ba3dfb992bf838bbb285e54cc54 100644 GIT binary patch literal 189 zcmXAg%?iRW5QJwpO^Xd^Po7iQqlfxU==*12-RbWuj-qaRv6|RW*E5169C$B zpM)?66|D!9Vo;i^KqhJ?lOK2k%a$y5#uA5)J1WtAQPKtYoR_75KuFkNNJi&xR4~$3 x?(gwRTD*IV+%gjAqfAoKikeq6N}t5Vl99>FKdTtmP)n8V6?F5zEQpr{%?~RQ9;yHU literal 67 zcmV~$(FuSc07cRJluVSc9cmFbF(UfW{BQtQ?;Nht1jw7BNdc3z-u)ZBKDPE3tdX44 RR3>^UByHr7mv(Zn6Mt%82~Yq4 diff --git a/test.wat b/test.wat index a4f9aab..842c140 100644 --- a/test.wat +++ b/test.wat @@ -1,14 +1,32 @@ (module -(import "console" "log" (func $log (param i32))) -(global $a$0 (mut i32) (i32.const 0)) -(func $0 -i32.const 10 -i32.const 5 -i32.div_s -global.set $a$0 -global.get $a$0 -call $log +(func $3$func +(param $4$s$0 i32) +(param $4$s$1 i32) +(param $4$s$2 f32) +(param $5$a$0 i32) +(result i32 i32 f32 ) +local.get $4$s$2 +local.get $4$s$1 +local.get $4$s$0 +return ) - -(start $0) +(global $2$s$0 (mut i32)) +(global $2$s$1 (mut i32)) +(global $2$s$2 (mut f32)) +(func $$main +(local $$tmp0$0 i32) +(local $$tmp1$0 i32) +i32.const 2 +i32.const 1 +local.set $$tmp0$0 +i32.const 97 +local.set $$tmp1$0 +f32.const 1 +local.get $$tmp1$0 +local.get $$tmp0$0 +call $3$func +global.set $2$s$0 +global.set $2$s$1 +global.set $2$s$2 ) +) \ No newline at end of file