From 1ab6bb1e2b22c462998d34c2d9efcf0f9a0afe9f Mon Sep 17 00:00:00 2001 From: Logan Date: Tue, 26 Nov 2024 11:04:23 -0600 Subject: [PATCH] Started AST --- src/main.rs | 11 -- src/parse/expression.rs | 35 ++-- src/parse/statement.rs | 11 +- src/semantic/analyzer.rs | 88 --------- src/semantic/builtin.rs | 24 +++ src/semantic/mod.rs | 360 ++++--------------------------------- src/semantic/primitives.rs | 15 +- 7 files changed, 87 insertions(+), 457 deletions(-) create mode 100644 src/semantic/builtin.rs diff --git a/src/main.rs b/src/main.rs index a02a468..b34cca0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,6 @@ use std::ops::Add; use err::*; use lookahead::*; use parse::*; -use semantic::Analyzer; use token::*; #[derive(Clone, Copy, Debug)] @@ -70,17 +69,7 @@ fn parse(input: &'static str) -> impl Iterator { Parser::new(tokenize(input)) } -fn typecheck(input: &'static str) { - let parsed: Vec<_> = parse(input).collect(); - let mut s = semantic::Analyzer::new(); - s.block(parsed); -} - fn main() -> Result<()> { - for p in parse(include_str!("../demo.hal")) { - println!("------------------"); - println!("{p:#?}"); - } /* for s in typecheck(include_str!("../demo.hal")) { println!("------------------"); diff --git a/src/parse/expression.rs b/src/parse/expression.rs index ebfe56a..b7af815 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -7,7 +7,6 @@ pub struct Parameters { pub arity: usize, pub names: Vec, pub type_names: Vec, - pub types: Vec, } impl Default for Parameters { @@ -16,7 +15,6 @@ impl Default for Parameters { arity: 0, names: vec![], type_names: vec![], - types: vec![], } } } @@ -94,12 +92,16 @@ pub enum ExpressionKind { pub struct Expression { pub kind: ExpressionKind, pub span: Span, - pub type_: Type, + pub type_: UID, } impl Expression { - pub fn new(kind: ExpressionKind, span: Span, type_: Type) -> Self { - Self { kind, span, type_ } + pub fn new(kind: ExpressionKind, span: Span) -> Self { + Self { + kind, + span, + type_: "".into(), + } } } @@ -194,7 +196,6 @@ impl> Parser { child: child.into(), }, span, - Type::Ambiguous, ) } // Primary @@ -226,7 +227,6 @@ impl> Parser { right: rhs.into(), }, span, - Type::Ambiguous, ); } // Field @@ -245,7 +245,6 @@ impl> Parser { uid: "".into(), }, span, - Type::Ambiguous, ) } // Function call @@ -275,7 +274,6 @@ impl> Parser { uid: "".into(), }, span + span2, - Type::Ambiguous, ); } // Unary postfix @@ -296,7 +294,6 @@ impl> Parser { child: current.into(), }, span, - Type::Ambiguous, ); } else { break; @@ -310,7 +307,6 @@ impl> Parser { let mut arity = 0; let mut names = vec![]; let mut type_names = vec![]; - let mut types = vec![]; let mut strongly_typed = false; loop { // Param name @@ -333,7 +329,6 @@ impl> Parser { } else { type_names.push("".into()); } - types.push("".into()); arity += 1; // Comma if !self.eat(t::Comma).is_ok() { @@ -368,7 +363,6 @@ impl> Parser { arity, names, type_names, - types, }) } @@ -507,7 +501,7 @@ impl> Parser { .reason(format!("Expected expression, found {}", next.0)); }, }; - Ok(Expression::new(kind, span, Type::Ambiguous)) + Ok(Expression::new(kind, span)) } fn if_else(&mut self) -> Result { @@ -526,22 +520,17 @@ impl> Parser { } else { None }; - Ok(Expression { - kind: ExpressionKind::If { + Ok(Expression::new( + ExpressionKind::If { predicate: predicate.into(), block, else_, }, span, - type_: Type::Ambiguous, - }) + )) } else { let (block, span) = self.block()?; - Ok(Expression { - kind: ExpressionKind::Block(block), - span, - type_: Type::Ambiguous, - }) + Ok(Expression::new(ExpressionKind::Block(block), span)) } } diff --git a/src/parse/statement.rs b/src/parse/statement.rs index ca92829..e0cd615 100644 --- a/src/parse/statement.rs +++ b/src/parse/statement.rs @@ -7,15 +7,12 @@ pub enum StatementKind { Declaration { name: String, type_str: Option, - type_uid: UID, value: Expression, mutable: bool, - uid: UID, }, Assignment { name: String, value: Expression, - uid: UID, }, While { predicate: Expression, @@ -78,10 +75,8 @@ impl> Parser { kind: s::Declaration { name, type_str, - type_uid: "".into(), value, mutable, - uid: "".into(), }, span, }; @@ -99,11 +94,7 @@ impl> Parser { .trace_span(span, "while parsing assignment")?; Statement { span, - kind: s::Assignment { - name, - value, - uid: "".into(), - }, + kind: s::Assignment { name, value }, } }, // While diff --git a/src/semantic/analyzer.rs b/src/semantic/analyzer.rs index 94a4b95..c3c7c14 100644 --- a/src/semantic/analyzer.rs +++ b/src/semantic/analyzer.rs @@ -1,91 +1,3 @@ use crate::{Expression, ExpressionKind, Statement, StatementKind}; use super::*; - -pub struct Analyzer { - table: SymbolTable, -} - -impl Analyzer { - pub fn new() -> Self { - Self { - table: SymbolTable::new(), - } - } - - pub fn typecheck() { - todo!() - } - - /// Analyzing a block: - /// 1. Name structs - /// 2. Type structs - /// 3. Name and type functions - /// 4. Name variables (recurse on blocks) (track moves) - /// 5. Type variables - /// 6. Type assert variables - pub fn block(&mut self, mut block: Vec) -> Result> { - // 1. Name structs - for s in &mut block { - if let StatementKind::Declaration { - name, - value: - Expression { - kind: ExpressionKind::StructDef(params, _), - type_, - .. - }, - .. - } = &mut s.kind - { - *type_ = Type::Nothing; - self.table.declare_struct(name, s.span)?; - } - } - // 2. Type structs - for s in &mut block { - if let StatementKind::Declaration { - name, - value: - Expression { - kind: ExpressionKind::StructDef(params, _), - .. - }, - .. - } = &mut s.kind - { - self.table.declare_struct(name, s.span)?; - } - } - // 3. Name and type functions - for s in &mut block { - if let StatementKind::Declaration { - name, - value: - Expression { - kind: - ExpressionKind::FunctionDef { - params, - returns_str, - returns_actual, - body, - uid, - }, - type_, - span, - }, - .. - } = &mut s.kind - { - let uid = self.table.define_function( - name, - params.clone(), - returns_str.as_ref().map(|s| s.as_str()), - *span, - )?; - *type_ = Type::Function(uid); - } - } - Ok(block) - } -} diff --git a/src/semantic/builtin.rs b/src/semantic/builtin.rs new file mode 100644 index 0000000..3e69347 --- /dev/null +++ b/src/semantic/builtin.rs @@ -0,0 +1,24 @@ +use super::{UID, primitives::Primitive}; + +pub fn mangle(input: &str) -> UID { + format!("$${input}") +} + +// Nothing ever happens +pub fn nothing() -> UID { + mangle("nothing") +} + +pub fn integer() -> UID { + Primitive::integer_ambiguous.mangle() +} + +pub fn real() -> UID { + Primitive::real_ambiguous.mangle() +} + +pub fn all() -> Vec { + let mut uids = Primitive::ALL.map(|p| p.mangle()).to_vec(); + uids.push(nothing()); + uids +} diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index 54c9955..6ffd5ea 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -1,12 +1,10 @@ pub mod analyzer; +mod builtin; pub mod primitives; -pub use analyzer::*; pub use primitives::*; -use std::collections::HashMap; - -use crate::{Parameters, Span, err::*, semantic::Primitive}; +use crate::{BinaryOp, UnaryOp, semantic::Primitive}; pub type UID = String; @@ -15,6 +13,7 @@ pub enum Type { Ambiguous, Prim(Primitive), Nothing, + Never, Struct(UID), Function(UID), } @@ -27,331 +26,50 @@ impl std::fmt::Display for Type { Type::Nothing => write!(f, "nothing"), Type::Struct(s) => write!(f, "struct {s}"), Type::Function(func) => write!(f, "func {func}"), + Type::Never => write!(f, "never"), } } } -#[derive(Debug, Clone)] -pub enum SymbolKind { - Variable { - type_: UID, +#[derive(Clone, Debug)] +pub struct IrBlock { + nodes: Vec, +} + +#[derive(Clone, Debug)] +pub enum IrNode { + Declaration { + uid: UID, mutable: bool, - global: bool, - children: Vec, + size: usize, + value: IrExpr, }, Function { - params: Parameters, - returns: UID, + uid: UID, + parameters: Vec, + block: IrBlock, }, - Struct { - params: Parameters, - size: usize, - align: usize, + Conditional { + branches: Vec<(IrExpr, IrBlock)>, + default: IrBlock, }, - TypeDef { - actual: Type, + Expr(IrExpr), +} + +#[derive(Clone, Debug)] +pub enum IrExpr { + Ident(UID), + UnOp { + op: UnaryOp, + child: Box, + }, + BinOp { + op: BinaryOp, + left: Box, + right: Box, + }, + Call { + function: UID, + args: Vec, }, } - -#[derive(Debug, Clone)] -pub struct Symbol { - pub name: String, - pub uid: UID, - pub span: Option, - pub kind: SymbolKind, -} - -impl Symbol { - pub fn is_variable(&self) -> Result<()> { - let name = &self.name; - match &self.kind { - SymbolKind::Variable { .. } => Ok(()), - SymbolKind::Function { .. } => { - error().reason(format!("'{name}' refers to a function, not a value")) - }, - _ => error().reason(format!("'{name}' refers to a type, not a value")), - } - } - - pub fn is_type(&self) -> Result { - let name = &self.name; - match &self.kind { - SymbolKind::Struct { .. } => Ok(Type::Struct(self.uid.clone())), - SymbolKind::TypeDef { actual } => Ok(actual.clone()), - _ => error().reason(format!("'{name}' refers to a value, not a type")), - } - } -} - -#[derive(Debug, Clone)] -pub enum Event { - Declared { name: String, uid: UID }, - Moved { name: String, uid: UID, span: Span }, - Func { returns: Vec, uid: UID }, - Block { returns: UID }, -} - -pub struct SymbolTable { - syms: HashMap, - scope: Vec, - nesting: usize, - mangle_num: usize, -} - -impl SymbolTable { - pub fn new() -> Self { - Self { - syms: HashMap::new(), - scope: vec![], - nesting: 0, - mangle_num: 0, - } - } - - fn generate_uid(&mut self, name: &str) -> UID { - let uid = format!("${}${name}", self.mangle_num); - self.mangle_num += 1; - uid - } - - pub fn get(&self, uid: &UID) -> &Symbol { - self.syms.get(uid).unwrap() - } - - pub fn get_mut(&mut self, uid: &UID) -> &mut Symbol { - self.syms.get_mut(uid).unwrap() - } - - // Find the definition of a symbol in local and global scope - pub fn find(&self, name: &str) -> Result<&Symbol> { - let mut nesting = self.nesting; - for e in self.scope.iter().rev() { - match e { - Event::Declared { uid, .. } - if nesting == self.nesting || nesting == 0 => - { - return Ok(self.get(uid)); - }, - Event::Moved { name, span, .. } - if nesting == self.nesting || nesting == 0 => - { - return error() - .reason(format!("Symbol '{name}' moved out of scope here")) - .span(&span); - }, - Event::Func { .. } => { - nesting -= 1; - }, - _ => {}, - } - } - error().reason(format!("Cannot find symbol '{name}'")) - } - - // Get all nested members of a struct variable - fn get_all_children(&self, uid: &UID) -> Vec { - use SymbolKind as s; - match &self.get(uid).kind { - s::Variable { children, .. } => { - let mut new_children = children.clone(); - for uid in children { - new_children.append(&mut self.get_all_children(uid)) - } - new_children - }, - _ => { - vec![] - }, - } - } - - // Move a symbol out of scope - pub fn move_symbol(&mut self, move_uid: &UID, span: Span) -> Result<()> { - if let SymbolKind::Variable { global, .. } = self.get(move_uid).kind { - if global { - return error().reason("Cannot move global symbol").span(&span); - } - } else { - panic!("Moved non-variable {move_uid}"); - } - let children = self.get_all_children(move_uid); - for e in self.scope.iter().rev() { - match e { - Event::Declared { uid, .. } => { - if move_uid == uid { - break; - } - }, - Event::Moved { uid, span, .. } => { - if children.contains(uid) { - return error() - .reason("Symbol was partially moved here") - .span(&span); - } else if move_uid == uid { - return error() - .reason("Symbol was previously moved here") - .span(&span); - } - }, - _ => {}, - } - } - self.scope.push(Event::Moved { - name: self.get(move_uid).name.clone(), - uid: move_uid.clone(), - span, - }); - Ok(()) - } - - fn in_global_scope(&self) -> bool { - for e in self.scope.iter().rev() { - if let Event::Func { .. } = e { - return false; - } - } - true - } - - pub fn define_var( - &mut self, - name: &str, - mutable: bool, - type_: &UID, - span: Span, - ) -> UID { - let uid = self.generate_uid(name); - self.syms.insert(uid.clone(), Symbol { - name: name.to_string(), - span: Some(span), - uid: uid.clone(), - kind: SymbolKind::Variable { - type_: type_.clone(), - mutable, - global: self.in_global_scope(), - children: vec![], - }, - }); - self.scope.push(Event::Declared { - name: name.to_string(), - uid: uid.clone(), - }); - uid - } - - pub fn declare_struct( - &mut self, - struct_name: &str, - span: Span, - ) -> Result { - // Check for multiple definition - for e in self.scope.iter().rev() { - match e { - Event::Declared { name, uid } => { - if name == struct_name { - let e = error().reason(format!( - "Structure '{struct_name}' is defined multiple times" - )); - if let Some(s) = &self.get(uid).span { - return e.span(s); - } else { - return e; - } - } - }, - Event::Moved { .. } => {}, - _ => break, - } - } - let uid = self.generate_uid(struct_name); - self.syms.insert(uid.clone(), Symbol { - name: struct_name.to_string(), - uid: uid.clone(), - span: Some(span), - kind: SymbolKind::Struct { - params: Parameters::default(), - size: 0, - align: 0, - }, - }); - self.scope.push(Event::Declared { - name: struct_name.to_string(), - uid: uid.clone(), - }); - Ok(uid) - } - - fn type_params(&mut self, mut params: Parameters) -> Result { - for i in 0..params.arity { - params.types[i] = self - .find(¶ms.type_names[i]) - .trace(format!( - "while resolving type of field '{}'", - params.type_names[i] - ))? - .uid - .clone(); - } - Ok(params) - } - - pub fn define_function( - &mut self, - func_name: &str, - params: Parameters, - returns_str: Option<&str>, - span: Span, - ) -> Result { - // Check for multiple definition - for e in self.scope.iter().rev() { - match e { - Event::Declared { name, uid } => { - if name == func_name { - let e = error().reason(format!( - "Function '{func_name}' is defined multiple times" - )); - if let Some(s) = &self.get(uid).span { - return e.span(s); - } else { - return e; - } - } - }, - Event::Moved { .. } => {}, - _ => break, - } - } - let uid = self.generate_uid(func_name); - // Check types - let params = self.type_params(params)?; - let returns = { - let sym = self.find(returns_str)?; - sym.is_type()?; - sym.uid.clone() - }; - self.syms.insert(uid.clone(), Symbol { - name: func_name.to_string(), - uid: uid.clone(), - span: Some(span), - kind: SymbolKind::Function { params, returns }, - }); - self.scope.push(Event::Declared { - name: func_name.to_string(), - uid: uid.clone(), - }); - Ok(uid) - } - - pub fn define_struct(&mut self, uid: &UID, params: Parameters) -> Result<()> { - let params = self.type_params(params)?; - if let SymbolKind::Struct { - params: old_params, .. - } = &mut self.get_mut(uid).kind - { - *old_params = params; - } else { - unreachable!("Defined non-existent struct") - } - Ok(()) - } -} diff --git a/src/semantic/primitives.rs b/src/semantic/primitives.rs index f7cda6a..ef12c41 100644 --- a/src/semantic/primitives.rs +++ b/src/semantic/primitives.rs @@ -1,7 +1,12 @@ use crate::{BinaryOp, UnaryOp}; use crate::err::*; -use crate::semantic::{Symbol, Type, UID}; +use crate::semantic::{UID, builtin}; + +macro_rules! count { + () => (0usize); + ( $x:tt $($xs:tt)* ) => (1usize + count!($($xs)*)); +} macro_rules! primitives { ( $($i:ident),* ) => { @@ -14,6 +19,8 @@ macro_rules! primitives { } impl Primitive { + pub const ALL: [Primitive; count!($($i)*,) - 1] = [$(Primitive::$i),*]; + pub fn from_string(string: &str) -> Option { match string { $(stringify!{$i} => Some(Self::$i),)* @@ -23,10 +30,10 @@ macro_rules! primitives { pub fn mangle(&self) -> UID { match self { - Primitive::integer_ambiguous => "$$integer_amgibuous".into(), - Primitive::real_ambiguous => "$$real_amgibuous".into(), + Primitive::integer_ambiguous => builtin::mangle("integer_ambiguous"), + Primitive::real_ambiguous => builtin::mangle("real_ambiguous"), $( - Primitive::$i => format!("$${}", stringify!{$i}), + Primitive::$i => builtin::mangle(stringify!{$i}), )* } }