diff --git a/Cargo.lock b/Cargo.lock index 4dfe71f..790502b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -40,6 +40,7 @@ dependencies = [ name = "lang" version = "0.1.0" dependencies = [ + "punycode", "wat", ] @@ -55,6 +56,12 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "punycode" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9e1dcb320d6839f6edb64f7a4a59d39b30480d4d1765b56873f7c858538a5fe" + [[package]] name = "unicode-width" version = "0.1.14" diff --git a/Cargo.toml b/Cargo.toml index 5e31e90..68b0f5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,4 @@ edition = "2021" [dependencies] wat = "1.219" +punycode = "0.4" diff --git a/src/main.rs b/src/main.rs index b34cca0..8add9f8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -78,5 +78,7 @@ fn main() -> Result<()> { */ //let module = frontend::Module::from_file("./demo.hal")?; //module.write_to("test"); + let p = punycode::encode("-_").unwrap(); + println!("{p}"); Ok(()) } diff --git a/src/parse/expression.rs b/src/parse/expression.rs index b7af815..76da257 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -1,4 +1,4 @@ -use crate::{Base, semantic::*}; +use crate::Base; use super::*; @@ -45,7 +45,6 @@ pub enum ExpressionKind { Immediate(Immediate), Identifier { name: String, - uid: UID, }, Binary { op: BinaryOp, @@ -60,25 +59,20 @@ pub enum ExpressionKind { FunctionDef { params: Parameters, returns_str: Option, - returns_actual: Type, body: Vec, - uid: UID, }, FunctionCall { callee: Box, args: Vec, - uid: UID, }, - StructDef(Parameters, UID), + StructDef(Parameters), StructLiteral { name: String, args: Vec<(String, Expression)>, - uid: UID, }, Field { namespace: Box, field: Box, - uid: UID, }, Block(Vec), If { @@ -92,16 +86,11 @@ pub enum ExpressionKind { pub struct Expression { pub kind: ExpressionKind, pub span: Span, - pub type_: UID, } impl Expression { pub fn new(kind: ExpressionKind, span: Span) -> Self { - Self { - kind, - span, - type_: "".into(), - } + Self { kind, span } } } @@ -116,28 +105,28 @@ impl std::fmt::Display for ExpressionKind { right, } => { write!(f, "({left} {token} {right})") - }, + } e::Parenthesis(inner) => write!(f, "{inner}"), e::Unary { op: token, child } => { write!(f, "({token} {child})") - }, + } e::Identifier { name, .. } => write!(f, "{name}"), e::FunctionCall { callee, args, .. } => { write!(f, "({callee} call {args:?})") - }, + } e::Field { namespace, field, .. } => { write!(f, "({namespace} . {field})") - }, + } e::FunctionDef { params, - returns_actual, + returns_str, .. } => { - write!(f, "(fn({params:?}) -> {returns_actual})") - }, - e::StructDef(params, _) => write!(f, "struct {{ {params:?} }}"), + write!(f, "(fn({params:?}) -> {returns_str:?})") + } + e::StructDef(params) => write!(f, "struct {{ {params:?} }}"), e::StructLiteral { name, args, .. } => write!(f, "{name} {{ {args:?} }}"), e::Block(block) => { write!(f, "{{\n")?; @@ -145,12 +134,8 @@ impl std::fmt::Display for ExpressionKind { write!(f, "{:#?}", s)?; } write!(f, "}}") - }, - e::If { - predicate, - block, - else_, - } => { + } + e::If { block, else_, .. } => { write!(f, "{{\n")?; for s in block { write!(f, "{:#?}", s)?; @@ -160,14 +145,14 @@ impl std::fmt::Display for ExpressionKind { write!(f, "{else_}")?; } Ok(()) - }, + } } } } impl std::fmt::Display for Expression { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "({} {})", self.kind, self.type_) + write!(f, "({})", self.kind) } } @@ -242,7 +227,6 @@ impl> Parser { e::Field { namespace: current.into(), field: field.into(), - uid: "".into(), }, span, ) @@ -259,7 +243,7 @@ impl> Parser { Ok(a) => { span = span + a.span; args.push(a) - }, + } Err(_) => break, }; if !self.eat(t::Comma).is_ok() { @@ -271,7 +255,6 @@ impl> Parser { e::FunctionCall { callee: current.into(), args, - uid: "".into(), }, span + span2, ); @@ -319,10 +302,9 @@ impl> Parser { names.push(name.clone()); // Param type (optional) if self.eat(t::Colon).is_ok() { - let (type_name, span2) = self.identifier().trace_span( - span + span2, - format!("While parsing type of '{}'", name), - )?; + let (type_name, span2) = self + .identifier() + .trace_span(span + span2, format!("While parsing type of '{}'", name))?; strongly_typed = true; span = span + span2; type_names.push(type_name); @@ -376,38 +358,37 @@ impl> Parser { t::IntegerLiteral(i, b) => { self.skip(1); e::Immediate(im::Integer(i, b)) - }, + } t::FloatLiteral(f) => { self.skip(1); e::Immediate(im::Real(f)) - }, + } t::StringLiteral(s) => { self.skip(1); e::Immediate(im::String(s)) - }, + } t::GlyphLiteral(c) => { self.skip(1); e::Immediate(im::Glyph(c)) - }, + } t::True => { self.skip(1); e::Immediate(im::Boolean(true)) - }, + } t::False => { self.skip(1); e::Immediate(im::Boolean(false)) - }, + } t::If => return self.if_else(), t::LeftBrace => { let (block, span1) = self.block()?; span = span + span1; e::Block(block) - }, + } // Function definition t::LeftParen if (self.look(1, t::Identifier("".into())).is_ok() - && (self.look(2, t::Colon).is_ok() - || self.look(2, t::Comma).is_ok())) + && (self.look(2, t::Colon).is_ok() || self.look(2, t::Comma).is_ok())) || self.look(1, t::RightParen).is_ok() => { self.skip(1); @@ -434,19 +415,17 @@ impl> Parser { e::FunctionDef { params, returns_str, - returns_actual: Type::Ambiguous, body, - uid: "".into(), } - }, + } // Struct definition t::Struct => { self.skip(1); self.eat(t::LeftBrace)?; let params = self.parameters(span)?; self.eat(t::RightBrace)?; - e::StructDef(params, "".into()) - }, + e::StructDef(params) + } // Struct literal t::Identifier(name) if self.look(1, t::LeftBrace).is_ok() => { self.skip(2); @@ -470,19 +449,12 @@ impl> Parser { } } self.eat(t::RightBrace)?; - e::StructLiteral { - name, - args, - uid: "".into(), - } - }, + e::StructLiteral { name, args } + } t::Identifier(i) => { self.skip(1); - e::Identifier { - name: i, - uid: "".into(), - } - }, + e::Identifier { name: i } + } // Parenthetical t::LeftParen => { self.skip(1); @@ -494,12 +466,12 @@ impl> Parser { .reason("Unclosed '('") .span(&expr.span)?; e::Parenthesis(expr.into()) - }, + } _ => { return error() .span(&span) .reason(format!("Expected expression, found {}", next.0)); - }, + } }; Ok(Expression::new(kind, span)) } @@ -540,7 +512,7 @@ impl> Parser { Ok(Token(t::Identifier(i), span)) => { self.skip(1); Ok((i, span)) - }, + } Ok(t) => error() .reason(format!("Expected identifier, found {}", t.0)) .span(&t.1), diff --git a/src/parse/statement.rs b/src/parse/statement.rs index e0cd615..629105a 100644 --- a/src/parse/statement.rs +++ b/src/parse/statement.rs @@ -1,5 +1,3 @@ -use crate::semantic::UID; - use super::*; #[derive(Debug, Clone)] @@ -47,7 +45,7 @@ impl> Parser { Ok(Token(t::Identifier(s), span2)) => { span = span + span2; Some(s) - }, + } _ => None, }; let mutable = if self.eat(t::Equal).is_ok() { @@ -63,14 +61,13 @@ impl> Parser { .expression(0) .trace_span(span, "while parsing declaration")?; span = span + value.span; - let no_semicolon = - if let ExpressionKind::FunctionDef { .. } = value.kind { - true - } else if let ExpressionKind::StructDef(_, _) = value.kind { - true - } else { - false - }; + let no_semicolon = if let ExpressionKind::FunctionDef { .. } = value.kind { + true + } else if let ExpressionKind::StructDef(_) = value.kind { + true + } else { + false + }; let s = Statement { kind: s::Declaration { name, @@ -84,7 +81,7 @@ impl> Parser { return Ok(s); } s - }, + } // Assignment (Token(t::Identifier(name), span2), Ok(Token(t::Equal, span3))) => { self.skip(2); @@ -96,7 +93,7 @@ impl> Parser { span, kind: s::Assignment { name, value }, } - }, + } // While (Token(t::While, span2), _) => { self.skip(1); @@ -117,7 +114,7 @@ impl> Parser { block: block.0, }, }); - }, + } // (DEBUG) print (Token(t::Print, span2), _) => { self.skip(1); @@ -130,7 +127,7 @@ impl> Parser { span, kind: s::Print(expr), } - }, + } // Return (Token(t::Return, span2), _) => { span = span + span2; @@ -143,7 +140,7 @@ impl> Parser { span, kind: s::Return(expr), } - }, + } // Expression (Token(_, span2), _) => { span = span + span2; @@ -165,14 +162,14 @@ impl> Parser { span, kind: s::Expression(expr), }); - }, + } _ => Statement { span, kind: s::Expression(expr), }, } } - }, + } }; // Check for semicolon if self.eat(t::Semicolon).is_ok() { diff --git a/src/semantic/analyzer.rs b/src/semantic/analyzer.rs index c3c7c14..f530e93 100644 --- a/src/semantic/analyzer.rs +++ b/src/semantic/analyzer.rs @@ -1,3 +1,53 @@ -use crate::{Expression, ExpressionKind, Statement, StatementKind}; +use std::collections::HashMap; + +use crate::{err::*, Expression, ExpressionKind, Statement, StatementKind}; use super::*; +use ir::*; + +#[derive(Debug, Clone)] +enum Undo { + FuncGuard, + BlockGuard, + Symbol { name: String, prev: Vec }, + Push { name: String }, + None, +} + +#[derive(Debug, Clone)] +struct Symbol { + mangle: SID, + type_: TID, + life: Lifetime, +} + +#[derive(Debug, Clone)] +struct SymbolTable { + types: HashMap, + table: HashMap>, + undo_stack: Vec, + path: Vec, + salt: usize, +} + +impl SymbolTable { + fn define(&mut self, name: &str, type_: TID, life: Lifetime) { + if !self.table.contains_key(name) { + self.table.insert(name.to_string(), vec![]); + } + let symbols = self.table.get_mut(name).unwrap(); + let mut path = self.path.clone(); + path.push(name.to_string()); + let mangle = names::mangle(path, &format!("{:#x}", self.salt)); + let symbol = Symbol { + mangle, + type_, + life, + }; + let undo = if + } +} + +pub fn analyze_block(stmts: Vec) -> Result> { + todo!() +} diff --git a/src/semantic/builtin.rs b/src/semantic/builtin.rs index 3e69347..4d5a054 100644 --- a/src/semantic/builtin.rs +++ b/src/semantic/builtin.rs @@ -1,23 +1,23 @@ -use super::{UID, primitives::Primitive}; +use super::{primitives::Primitive, SID}; -pub fn mangle(input: &str) -> UID { - format!("$${input}") +pub fn mangle(input: &str) -> SID { + format!("$B${input}") } // Nothing ever happens -pub fn nothing() -> UID { +pub fn nothing() -> SID { mangle("nothing") } -pub fn integer() -> UID { +pub fn integer() -> SID { Primitive::integer_ambiguous.mangle() } -pub fn real() -> UID { +pub fn real() -> SID { Primitive::real_ambiguous.mangle() } -pub fn all() -> Vec { +pub fn all() -> Vec { let mut uids = Primitive::ALL.map(|p| p.mangle()).to_vec(); uids.push(nothing()); uids diff --git a/src/semantic/ir.rs b/src/semantic/ir.rs new file mode 100644 index 0000000..c1cae87 --- /dev/null +++ b/src/semantic/ir.rs @@ -0,0 +1,53 @@ +use crate::{BinaryOp, UnaryOp}; + +use super::{Type, SID, TID}; + +#[derive(Clone, Debug)] +pub struct IrBlock { + nodes: Vec, +} + +#[derive(Clone, Debug)] +pub enum IrNode { + Declaration { + uid: SID, + mutable: bool, + size: usize, + value: IrExpr, + }, + Function { + uid: SID, + parameters: Vec, + block: IrBlock, + }, + Conditional { + branches: Vec<(IrExpr, IrBlock)>, + default: IrBlock, + }, + Expr(IrExpr), +} + +#[derive(Clone, Debug)] +pub struct IrExpr { + kind: IrExprKind, + type_: TID, +} + +#[derive(Clone, Debug)] +pub enum IrExprKind { + Ident(SID), + UnOp { + op: UnaryOp, + child: Box, + }, + BinOp { + op: BinaryOp, + left: Box, + right: Box, + }, + Block(IrBlock), + Call { + function: SID, + args: Vec, + }, +} diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index 6ffd5ea..0b84b7b 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -1,12 +1,25 @@ pub mod analyzer; -mod builtin; +pub mod builtin; +pub mod ir; +pub mod names; pub mod primitives; pub use primitives::*; -use crate::{BinaryOp, UnaryOp, semantic::Primitive}; +use crate::semantic::Primitive; -pub type UID = String; +/// Type ID +pub type TID = usize; +/// Name mangle +pub type SID = String; + +#[derive(Debug, Clone, Copy)] +pub enum Lifetime { + /// Exists for lifetime of program + Static, + /// Exists for lifetime of contained scope + Dynamic, +} #[derive(Debug, Clone)] pub enum Type { @@ -14,8 +27,18 @@ pub enum Type { Prim(Primitive), Nothing, Never, - Struct(UID), - Function(UID), + Struct { + size: usize, + name: String, + member_names: Vec, + member_types: Vec, + }, + Alias(TID), + Function { + name: String, + arg_names: Vec, + arg_types: Vec, + }, } impl std::fmt::Display for Type { @@ -24,52 +47,10 @@ impl std::fmt::Display for Type { Type::Ambiguous => write!(f, "ambiguous"), Type::Prim(primitive) => write!(f, "{primitive}"), Type::Nothing => write!(f, "nothing"), - Type::Struct(s) => write!(f, "struct {s}"), - Type::Function(func) => write!(f, "func {func}"), Type::Never => write!(f, "never"), + Type::Struct { name, .. } => write!(f, "struct {name}"), + Type::Alias(tid) => write!(f, "alias ({tid})"), + Type::Function { name, .. } => write!(f, "func ({name})"), } } } - -#[derive(Clone, Debug)] -pub struct IrBlock { - nodes: Vec, -} - -#[derive(Clone, Debug)] -pub enum IrNode { - Declaration { - uid: UID, - mutable: bool, - size: usize, - value: IrExpr, - }, - Function { - uid: UID, - parameters: Vec, - block: IrBlock, - }, - Conditional { - branches: Vec<(IrExpr, IrBlock)>, - default: IrBlock, - }, - Expr(IrExpr), -} - -#[derive(Clone, Debug)] -pub enum IrExpr { - Ident(UID), - UnOp { - op: UnaryOp, - child: Box, - }, - BinOp { - op: BinaryOp, - left: Box, - right: Box, - }, - Call { - function: UID, - args: Vec, - }, -} diff --git a/src/semantic/names.rs b/src/semantic/names.rs new file mode 100644 index 0000000..92d8f5d --- /dev/null +++ b/src/semantic/names.rs @@ -0,0 +1,16 @@ +// ::= "$" +// ::= {}* +// ::= +// ::= <_a-zA-Z> {<_a-zA-Z0-9>}* +// ::= {<0-9>}+ +// ::= {}* + +pub fn mangle(path: Vec, salt: &str) -> String { + let mut buf: Vec = vec![]; + for p in path { + let bytes = format!("{}{}", p.len(), punycode::encode(&p).unwrap()); + buf.extend_from_slice(bytes.as_bytes()); + } + buf.extend_from_slice(salt.as_bytes()); + String::from_utf8(buf).unwrap() +} diff --git a/src/semantic/primitives.rs b/src/semantic/primitives.rs index ef12c41..419a85f 100644 --- a/src/semantic/primitives.rs +++ b/src/semantic/primitives.rs @@ -1,7 +1,7 @@ use crate::{BinaryOp, UnaryOp}; use crate::err::*; -use crate::semantic::{UID, builtin}; +use crate::semantic::{builtin, SID}; macro_rules! count { () => (0usize); @@ -28,7 +28,7 @@ macro_rules! primitives { } } - pub fn mangle(&self) -> UID { + pub fn mangle(&self) -> SID { match self { Primitive::integer_ambiguous => builtin::mangle("integer_ambiguous"), Primitive::real_ambiguous => builtin::mangle("real_ambiguous"), @@ -62,9 +62,7 @@ impl Primitive { pub fn as_wat(&self) -> &'static str { use Primitive::*; match self { - boolean | glyph | w8 | w16 | w32 | whole | i8 | i16 | i32 | integer => { - "i32" - }, + boolean | glyph | w8 | w16 | w32 | whole | i8 | i16 | i32 | integer => "i32", w64 | i64 => "i64", r32 | real => "f32", r64 => "f64", @@ -128,16 +126,11 @@ impl Primitive { pub fn coerce(self, expect: Primitive) -> Result { use Primitive::*; match (self, expect) { - ( - integer_ambiguous, - a @ (i8 | i16 | i32 | i64 | integer | w8 | w16 | w32 | w64 | whole), - ) - | ( - a @ (i8 | i16 | i32 | i64 | integer | w8 | w16 | w32 | w64 | whole), - integer_ambiguous, - ) => Ok(a), - (real_ambiguous, a @ (r32 | r64 | real)) - | (a @ (r32 | r64 | real), real_ambiguous) => Ok(a), + (integer_ambiguous, a @ (i8 | i16 | i32 | i64 | integer | w8 | w16 | w32 | w64 | whole)) + | (a @ (i8 | i16 | i32 | i64 | integer | w8 | w16 | w32 | w64 | whole), integer_ambiguous) => { + Ok(a) + } + (real_ambiguous, a @ (r32 | r64 | real)) | (a @ (r32 | r64 | real), real_ambiguous) => Ok(a), (t1, t2) if t1 == t2 => Ok(t1), _ => error().reason(format!("Cannot coerce '{self}' into '{expect}'")), } @@ -158,11 +151,7 @@ impl Primitive { } } - pub fn binary_op( - mut lhs: Primitive, - op: BinaryOp, - mut rhs: Primitive, - ) -> Result { + pub fn binary_op(mut lhs: Primitive, op: BinaryOp, mut rhs: Primitive) -> Result { use Primitive::*; if lhs.is_ambiguous() && !rhs.is_ambiguous() { lhs = lhs.coerce(rhs)?; @@ -188,8 +177,7 @@ impl Primitive { pub fn unary_op(op: UnaryOp, child: Primitive) -> Result { use Primitive::*; use UnaryOp::*; - let e = - error().reason(format!("Unary {} is not defined for {}", op, child)); + let e = error().reason(format!("Unary {} is not defined for {}", op, child)); match op { Minus => match child { boolean | string | glyph | whole | w8 | w16 | w32 | w64 => e, diff --git a/test.wasm b/test.wasm index b032263..02507b6 100644 Binary files a/test.wasm and b/test.wasm differ diff --git a/test.wat b/test.wat index 842c140..dccf62e 100644 --- a/test.wat +++ b/test.wat @@ -1,32 +1,9 @@ (module -(func $3$func -(param $4$s$0 i32) -(param $4$s$1 i32) -(param $4$s$2 f32) -(param $5$a$0 i32) -(result i32 i32 f32 ) -local.get $4$s$2 -local.get $4$s$1 -local.get $4$s$0 -return -) -(global $2$s$0 (mut i32)) -(global $2$s$1 (mut i32)) -(global $2$s$2 (mut f32)) +(global $2$s$0 (mut i32) (i32.const 0)) +(global $2$s$1 (mut i32) (i32.const 0)) +(global $2$s$2 (mut f32) (f32.const 0)) (func $$main (local $$tmp0$0 i32) (local $$tmp1$0 i32) -i32.const 2 -i32.const 1 -local.set $$tmp0$0 -i32.const 97 -local.set $$tmp1$0 -f32.const 1 -local.get $$tmp1$0 -local.get $$tmp0$0 -call $3$func -global.set $2$s$0 -global.set $2$s$1 -global.set $2$s$2 ) -) \ No newline at end of file +)