diff --git a/demo.lang b/demo.lang index 74c6b0e..45a1565 100644 --- a/demo.lang +++ b/demo.lang @@ -1,6 +1,9 @@ -i := 0; +i := 0x0; -while i < 10 { - i = i + 1; - print i; +if i > 10 { + print "a"; +} else if i > 5 { + print "b"; +} else { + print "c"; } diff --git a/src/err.rs b/src/err.rs index d447bb7..cbf2bd9 100644 --- a/src/err.rs +++ b/src/err.rs @@ -8,9 +8,9 @@ pub fn error() -> Result { #[derive(Clone)] pub struct Diagnostic { - reason: String, - span: Option, - backtrace: Vec, + pub reason: String, + pub span: Option, + pub backtrace: Vec, } impl Diagnostic { @@ -30,8 +30,8 @@ impl std::fmt::Display for Diagnostic { } else { write!(f, "(E) {}\n", self.reason)?; } - for b in self.backtrace.iter().rev() { - write!(f, "--> {}\n", b)?; + for (_i, b) in self.backtrace.iter().enumerate() { + write!(f, "> {}\n", b)?; } Ok(()) } @@ -75,11 +75,11 @@ impl From for Diagnostic { pub trait IntoDiagnostic> { fn reason(self, s: S) -> Result; fn trace(self, s: S) -> Result; + fn trace_span(self, span: Span, s: S) -> Result; } pub trait WithSpan { fn span(self, span: &Span) -> Result; - fn no_span(self) -> Result; } impl WithSpan for Result { @@ -89,17 +89,6 @@ impl WithSpan for Result { e }) } - - fn no_span(self) -> Result { - self.map_err(|mut e| { - e.span = None; - e - }) - } -} - -pub trait CoerceDiagnostic { - fn coerce(self) -> Result; } impl> IntoDiagnostic for Option { @@ -124,6 +113,10 @@ impl> IntoDiagnostic for Option { }), } } + + fn trace_span(self, span: Span, s: S) -> Result { + self.trace(format!("{} {}", span, s.into())) + } } impl, S: Into> IntoDiagnostic @@ -131,7 +124,9 @@ impl, S: Into> IntoDiagnostic { fn reason(self, s: S) -> Result { self.map_err(|e| e.into()).map_err(|mut e| { - e.reason = s.into(); + if e.reason == "" { + e.reason = s.into(); + } e }) } @@ -142,10 +137,8 @@ impl, S: Into> IntoDiagnostic e }) } -} -impl> CoerceDiagnostic for std::result::Result { - fn coerce(self) -> Result { - self.map_err(|e| e.into()) + fn trace_span(self, span: Span, s: S) -> Result { + self.trace(format!("{} {}", span, s.into())).span(&span) } } diff --git a/src/frontend.rs b/src/frontend.rs new file mode 100644 index 0000000..09962a4 --- /dev/null +++ b/src/frontend.rs @@ -0,0 +1,67 @@ +use std::path::Path; + +use crate::{ + err::*, treewalk::Interpreter, Parser, Statement, StatementKind, Token, + Tokenizer, +}; + +#[derive(Debug, Clone)] +pub struct Module { + file_name: String, + source: String, + ast: Vec, + errors: Vec, +} + +impl Module { + pub fn from_file(path: impl AsRef) -> Result { + let file = std::fs::read(&path).trace(format!( + "while attempting to open file '{}'", + &path.as_ref().display() + ))?; + let source = String::from_utf8_lossy(&file); + Ok(Self::from_string( + format!("{}", path.as_ref().display()), + source.into(), + )) + } + + pub fn from_string(file_name: String, source: String) -> Self { + let tokens = Tokenizer::new(source.chars()).filter(|t| t.0.is_meaningful()); + let mut ast = vec![]; + let mut errors = vec![]; + for statement in Parser::new(tokens) { + use StatementKind as s; + match statement.kind { + s::Error(e) => errors.push(e), + _ => ast.push(statement), + } + } + Self { + file_name, + source: source.into(), + ast, + errors, + } + } + + pub fn errors(&self) -> &[Diagnostic] { + &self.errors + } + + pub fn ok(&self) -> bool { + self.errors.len() == 0 + } + + pub fn execute(&self) { + if !self.ok() { + for e in self.errors() { + eprintln!("{e}"); + } + return; + } + Interpreter::new(self.ast.clone().into_iter()) + .run() + .unwrap(); + } +} diff --git a/src/lookahead.rs b/src/lookahead.rs index 82dc6f9..848030b 100644 --- a/src/lookahead.rs +++ b/src/lookahead.rs @@ -34,10 +34,6 @@ where s } - pub fn inner(&self) -> &I { - &self.iterator - } - fn normalize(&mut self) { for item in &mut self.buffer { if self.exhausted { diff --git a/src/main.rs b/src/main.rs index c9b3857..cb66d12 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,16 @@ mod err; +mod frontend; mod lookahead; mod parse; mod token; mod treewalk; +mod types; use std::ops::Add; +use err::*; use lookahead::*; use parse::*; use token::*; -use treewalk::Interpreter; #[derive(Clone, Copy, Debug)] pub struct Span { @@ -16,13 +18,20 @@ pub struct Span { pub column: usize, } +impl std::fmt::Display for Span { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "({}:{})", self.row, self.column) + } +} + impl Add for Span { type Output = Span; fn add(self, rhs: Span) -> Self::Output { + let max = (self.row, self.column).max((rhs.row, rhs.column)); Span { - row: usize::min(self.row, rhs.row), - column: usize::max(self.column, rhs.column), + row: max.0, + column: max.1, } } } @@ -45,13 +54,18 @@ fn test_tokenization() { } } -fn main() { - let src = include_str!("../demo.lang"); - println!("{src}"); - let tokens: Vec<_> = Tokenizer::new(src.chars()) - .filter(|t| t.0.is_meaningful()) - .collect(); - let parsed = Parser::new(tokens.into_iter()).file().unwrap(); - let mut interp = Interpreter::new(parsed.into_iter()); - interp.run().unwrap(); +fn test_expression(expr: &str) { + let source = expr.to_string(); + let tokens = Tokenizer::new(source.chars()).filter(|t| t.0.is_meaningful()); + let mut parser = Parser::new(tokens); + println!("{:?}", parser.expression(0).unwrap()); +} + +fn main() -> Result<()> { + /* + let module = frontend::Module::from_file("./demo.lang")?; + module.execute(); + */ + test_expression("(a: b) -> c {}"); + Ok(()) } diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index 1fa257f..0000000 --- a/src/parse.rs +++ /dev/null @@ -1,453 +0,0 @@ -use crate::err::*; -use crate::{Span, Token, TokenKind}; - -#[derive(Clone)] -pub enum ExpressionKind { - Integer(i64), - Real(f64), - String(String), - Boolean(bool), - Identifier(String), - Binary { - token: TokenKind, - left: Box, - right: Box, - }, - Unary { - token: TokenKind, - child: Box, - }, - Parenthesis(Box), -} - -#[derive(Clone)] -pub struct Expression { - pub kind: ExpressionKind, - pub span: Span, -} - -impl Expression { - pub fn new(kind: ExpressionKind, span: Span) -> Self { - Self { kind, span } - } -} - -impl std::fmt::Debug for ExpressionKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use ExpressionKind as e; - match self { - e::Integer(i) => write!(f, "{i}"), - e::Binary { token, left, right } => { - write!(f, "({left:?} {token:?} {right:?})") - }, - e::Parenthesis(inner) => write!(f, "{inner:?}"), - e::Unary { token, child } => { - write!(f, "({token:?} {child:?})") - }, - e::Real(fp) => write!(f, "{fp}"), - e::String(s) => write!(f, r#""{s}""#), - e::Identifier(i) => write!(f, "{i}"), - e::Boolean(b) => write!(f, "{b}"), - } - } -} - -impl std::fmt::Debug for Expression { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.kind) - } -} - -#[derive(Debug, Clone)] -pub enum StatementKind { - Mutable { - name: String, - type_: Option, - value: Option, - }, - Immutable { - name: String, - type_: Option, - value: Expression, - }, - Assignment { - name: String, - value: Expression, - }, - If { - predicate: Expression, - block: Vec, - else_: Option>, - }, - While { - predicate: Expression, - block: Vec, - }, - Print(Expression), - Expression(Expression), - Block(Vec), -} - -#[derive(Debug, Clone)] -pub struct Statement { - pub kind: StatementKind, - pub span: Span, -} - -pub type Precedence = usize; - -fn binary_prec(tok: &TokenKind) -> Result<(Precedence, bool)> { - use TokenKind::*; - Ok(match tok { - Star | Slash | Percent => (10, false), - Plus | Minus => (9, false), - And | Nand => (8, false), - Xor | Xnor => (7, false), - Or | Nor => (6, false), - DoubleEqual | BangEqual | Less | LessEqual | Greater | GreaterEqual => { - (5, false) - }, - //Colon => Some((5, false)), - _ => { - return error() - .reason(format!("{:?} is not a valid binary operator", tok)); - }, - }) -} - -fn unary_prefix_prec(tok: &TokenKind) -> Result { - use TokenKind::*; - Ok(match tok { - Minus | Not => 11, - Break => 3, - _ => { - return error() - .reason(format!("{tok:?} is not a valid prefix unary operator")); - }, - }) -} - -fn unary_postfix_prec(tok: &TokenKind) -> Result { - use TokenKind::*; - Ok(match tok { - Question => 12, - Bang => 12, - _ => { - return error() - .reason(format!("{tok:?} is not a valid postfix unary operator")); - }, - }) -} - -const PARSER_LOOKAHEAD: usize = 3; - -type TokenIter = crate::Window; - -pub struct Parser> { - iter: TokenIter, -} - -impl> Parser { - pub fn new(iter: I) -> Self { - Self { - iter: TokenIter::new(iter), - } - } - - fn skip(&mut self, n: usize) { - for _ in 0..n { - let _ = self.next(); - } - } - - fn next(&mut self) -> Result { - self.iter.next().reason("Unexpected end of file") - } - - fn peek(&self, n: usize) -> Result { - self.iter.peek(n).clone().reason("Unexpected end of file") - } - - fn eat(&mut self, expect: TokenKind) -> Result { - match self.look(expect) { - Ok(t) => { - self.skip(1); - Ok(t) - }, - Err(e) => Err(e), - } - } - - fn look(&mut self, expect: TokenKind) -> Result { - let next = self.peek(0)?; - if next.0 == expect { - Ok(next) - } else { - error() - .reason(format!("Expected {expect:?}, found {:?}", next.0)) - .span(&next.1) - } - } - - pub fn file(&mut self) -> Result> { - use TokenKind as t; - let mut statements = vec![]; - loop { - // Trim extra ; - while self.eat(t::Semicolon).is_ok() {} - if self.eat(t::EOF).is_ok() { - return Ok(statements); - } - match self.statement() { - Ok(s) => statements.push(s), - Err(e) => { - return Err(e); - }, - } - } - } - - pub fn statement(&mut self) -> Result { - use StatementKind as s; - use TokenKind as t; - let next = self.peek(0); - let next2 = self.peek(1); - let statement = match (next, next2) { - // (im)mutable declaration - (Ok(Token(t::Identifier(name), span)), Ok(Token(t::Colon, _))) => { - self.skip(2); - let type_ = match self.eat(t::Identifier("".into())) { - Ok(Token(t::Identifier(s), _)) => Some(s), - _ => None, - }; - match self.eat(t::Equal).or_else(|_| self.eat(t::Colon)) { - Ok(Token(t::Colon, _)) => Statement { - kind: s::Immutable { - name, - type_, - value: self - .expression(0) - .trace("while parsing immutable declaration")?, - }, - span, - }, - Ok(Token(t::Equal, _)) => Statement { - kind: s::Mutable { - name, - type_, - value: Some( - self - .expression(0) - .trace("while parsing mutable declaration")?, - ), - }, - span, - }, - _ => return error().reason("Expected expression here"), - } - }, - (Ok(Token(t::Identifier(name), span)), Ok(Token(t::Equal, _))) => { - self.skip(2); - let value = self - .expression(0) - .trace("while parsing assignment expression")?; - Statement { - kind: s::Assignment { name, value }, - span, - } - }, - // If - (Ok(Token(t::If, span)), _) => { - self.skip(1); - let predicate = self - .expression(0) - .reason("Expected predicate after 'if' keyword") - .span(&span)?; - let block = self.block().trace("while parsing if statement")?; - return Ok(Statement { - span, - kind: s::If { - predicate, - block: block.0, - else_: None, - }, - }); - }, - // While - (Ok(Token(t::While, span)), _) => { - self.skip(1); - let predicate = self - .expression(0) - .reason("Expected predicate after 'while' keyword") - .span(&span)?; - let block = self.block().trace("while parsing while statement")?; - return Ok(Statement { - span, - kind: s::While { - predicate, - block: block.0, - }, - }); - }, - // (DEBUG) print - (Ok(Token(t::Print, span)), _) => { - self.skip(1); - let expr = self.expression(0).trace("while parsing print statement")?; - Statement { - span: span + expr.span, - kind: s::Print(expr), - } - }, - // Block - (Ok(Token(t::LeftBrace, _)), _) => { - // Skip check for semicolon - let (block, span) = - self.block().trace("while parsing block statement")?; - return Ok(Statement { - kind: s::Block(block), - span, - }); - }, - // Expression - _ => { - let expr = self - .expression(0) - .trace("while parsing expression statement")?; - Statement { - span: expr.span, - kind: s::Expression(expr), - } - }, - }; - // Check for semicolon - if self.eat(t::Semicolon).is_ok() { - Ok(statement) - } else { - error().reason("Expected ;") - } - } - - pub fn expression( - &mut self, - mut precedence: Precedence, - ) -> Result { - use ExpressionKind as e; - use TokenKind as t; - let next = self.peek(0)?; - // Unary prefix expression - let mut current = if let Ok(p) = unary_prefix_prec(&next.0) { - let operator = self.next().expect("unreachable"); - let child = self - .expression(p) - .trace(format!("while parsing unary {:?}", operator.0)) - .span(&operator.1)?; - let span = child.span + operator.1; - Expression::new( - e::Unary { - token: operator.0, - child: child.into(), - }, - span, - ) - } - // Terminal or paren - else { - self.primary()? - }; - // Precedence climbing loop - while let Ok(next) = self.peek(0) { - // Binary infix - if let Ok((new_precedence, left_assoc)) = binary_prec(&next.0) { - if (!left_assoc && new_precedence <= precedence) - || (new_precedence < precedence) - { - return Ok(current); - } - let operator = self.next().expect("unreachable"); - let rhs = self - .expression(new_precedence) - .trace(format!("while parsing binary {:?}", operator.0)) - .span(&operator.1)?; - let span = next.1 + rhs.span; - current = Expression::new( - e::Binary { - token: operator.0, - left: current.into(), - right: rhs.into(), - }, - span, - ); - } - // Unary postfix - else if let Ok(new_precedence) = unary_postfix_prec(&next.0) { - let operator = self.next().expect("unreachable"); - let span = next.1 + operator.1; - precedence = new_precedence; - current = Expression::new( - e::Unary { - token: operator.0, - child: current.into(), - }, - span, - ); - } else { - break; - } - } - Ok(current) - } - - fn primary(&mut self) -> Result { - use ExpressionKind as e; - use TokenKind as t; - let next = self.peek(0)?; - let span = next.1; - let kind = match next.0 { - t::IntegerLiteral(i) => e::Integer(i), - t::FloatLiteral(f) => e::Real(f), - t::StringLiteral(s) => e::String(s), - t::True => e::Boolean(true), - t::Identifier(i) => e::Identifier(i), - t::LeftParen => { - self.eat(t::LeftParen).expect("unreachable"); - let expr = self - .expression(0) - .trace("while parsing parenthesized expression")?; - self - .look(t::RightParen) - .reason("Unclosed '('") - .span(&expr.span)?; - e::Parenthesis(expr.into()) - }, - _ => { - return error() - .span(&span) - .reason(format!("Expected primary, found {:?}", next.0)); - }, - }; - self.skip(1); - Ok(Expression { kind, span }) - } - - fn block(&mut self) -> Result<(Vec, Span)> { - use TokenKind as t; - let mut span = self.eat(t::LeftBrace).reason("Expected block")?.1; - let mut statements = vec![]; - loop { - let next = self.peek(0)?; - span = span + next.1; - match self.eat(t::RightBrace) { - Ok(t) => { - span = span + t.1; - break; - }, - _ => { - let statement = self.statement()?; - span = span + statement.span; - statements.push(statement); - }, - }; - } - Ok((statements, span)) - } -} diff --git a/src/parse/expression.rs b/src/parse/expression.rs new file mode 100644 index 0000000..c9f6e24 --- /dev/null +++ b/src/parse/expression.rs @@ -0,0 +1,304 @@ +use super::*; + +#[derive(Clone)] +pub struct Parameter { + name: String, + type_: String, +} + +impl std::fmt::Debug for Parameter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}: {}", self.name, self.type_) + } +} + +#[derive(Clone)] +pub enum ExpressionKind { + Integer(i64), + Real(f64), + String(String), + Boolean(bool), + Identifier(String), + Binary { + token: TokenKind, + left: Box, + right: Box, + }, + Unary { + token: TokenKind, + child: Box, + }, + Parenthesis(Box), + Function { + params: Vec, + returns: Option, + body: Vec, + }, + Call { + callee: Box, + args: Vec, + }, + Field { + namespace: Box, + field: Box, + }, +} + +#[derive(Clone)] +pub struct Expression { + pub kind: ExpressionKind, + pub span: Span, +} + +impl Expression { + pub fn new(kind: ExpressionKind, span: Span) -> Self { + Self { kind, span } + } +} + +impl std::fmt::Debug for ExpressionKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use ExpressionKind as e; + match self { + e::Integer(i) => write!(f, "{i}"), + e::Binary { token, left, right } => { + write!(f, "({left:?} {token:?} {right:?})") + }, + e::Parenthesis(inner) => write!(f, "{inner:?}"), + e::Unary { token, child } => { + write!(f, "({token:?} {child:?})") + }, + e::Real(fp) => write!(f, "{fp}"), + e::String(s) => write!(f, r#""{s}""#), + e::Identifier(i) => write!(f, "{i}"), + e::Boolean(b) => write!(f, "{b}"), + e::Call { callee, args } => write!(f, "({callee:?} call {args:?})"), + e::Field { namespace, field } => { + write!(f, "({namespace:?} . {field:?})") + }, + e::Function { + params, returns, .. + } => { + write!(f, "(fn({params:?}) -> {returns:?})") + }, + } + } +} + +impl std::fmt::Debug for Expression { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.kind) + } +} + +impl> Parser { + pub fn expression(&mut self, precedence: Precedence) -> Result { + use ExpressionKind as e; + use TokenKind as t; + let next = self.peek(0)?; + // Unary prefix expression + let mut current = if let Ok(p) = unary_prefix_prec(&next.0) { + let operator = self.next_tok().expect("unreachable"); + let child = self + .expression(p) + .trace(format!("while parsing unary {}", operator.0)) + .span(&operator.1)?; + let span = child.span + operator.1; + Expression::new( + e::Unary { + token: operator.0, + child: child.into(), + }, + span, + ) + } + // Terminal or paren + else { + self.primary().reason("Expected expression")? + }; + + // Precedence climbing loop + while let Ok(next) = self.peek(0) { + // Binary infix + if let Ok((new_precedence, left_assoc)) = binary_prec(&next.0) { + if (!left_assoc && new_precedence <= precedence) + || (new_precedence < precedence) + { + return Ok(current); + } + let operator = self.next_tok().expect("unreachable"); + let rhs = self + .expression(new_precedence) + .trace(format!("while parsing binary {}", operator.0)) + .span(&operator.1)?; + let span = next.1 + rhs.span; + current = Expression::new( + e::Binary { + token: operator.0, + left: current.into(), + right: rhs.into(), + }, + span, + ); + } + // Field + else if let Token(t::Dot, span) = next { + if FIELD_PREC <= precedence { + return Ok(current); + } + self.skip(1); + let field = self + .expression(FIELD_PREC) + .trace_span(span, "in field expression")?; + current = Expression::new( + e::Field { + namespace: current.into(), + field: field.into(), + }, + span, + ) + } + // Function call + else if let Token(t::LeftParen, mut span) = next { + if CALL_PREC <= precedence { + return Ok(current); + } + self.skip(1); + let mut args = vec![]; + loop { + match self.expression(0) { + Ok(a) => { + span = span + a.span; + args.push(a) + }, + Err(_) => break, + }; + if !self.eat(t::Comma).is_ok() { + break; + } + } + let Token(_, span2) = self.eat(t::RightParen).span(&span)?; + current = Expression::new( + e::Call { + callee: current.into(), + args, + }, + span + span2, + ); + } + // Unary postfix + else if let Ok(_) = unary_postfix_prec(&next.0) { + let operator = self.next_tok().expect("unreachable"); + let span = next.1 + operator.1; + current = Expression::new( + e::Unary { + token: operator.0, + child: current.into(), + }, + span, + ); + } else { + break; + } + } + Ok(current) + } + + fn primary(&mut self) -> Result { + use ExpressionKind as e; + use TokenKind as t; + let next = self.peek(0)?; + let mut span = next.1; + let kind = match next.0 { + t::IntegerLiteral(i) => e::Integer(i), + t::FloatLiteral(f) => e::Real(f), + t::StringLiteral(s) => e::String(s), + t::True => e::Boolean(true), + t::Identifier(i) => e::Identifier(i), + // function + t::LeftParen + if (self.look(1, t::Identifier("".into())).is_ok() + && self.look(2, t::Colon).is_ok()) + || self.look(1, t::RightParen).is_ok() => + { + self.skip(1); + let mut params = vec![]; + loop { + let (name, span2) = match self.identifier() { + Ok((name, span)) => (name, span), + Err(_) => break, + }; + span = span + span2; + self + .eat(t::Colon) + .trace_span(span, "while parsing function parameter type")?; + let (type_, span2) = self + .identifier() + .trace_span(span, "while parsing function parameter type")?; + span = span + span2; + params.push(Parameter { name, type_ }); + if !self.eat(t::Comma).is_ok() { + break; + } + } + let Token(_, span2) = self + .eat(t::RightParen) + .span(&span) + .trace_span(span, "while parsing function definition")?; + let returns = if let Ok(Token(_, span2)) = self.eat(t::Arrow) { + span = span + span2; + let (identifier, span2) = self + .identifier() + .trace_span(span, "while parsing function return type")?; + span = span + span2; + Some(identifier) + } else { + None + }; + span = span + span2; + let (body, span2) = self + .block() + .trace_span(span, "while parsing function body")?; + span = span + span2; + e::Function { + params, + returns, + body, + } + }, + // parenthetical + t::LeftParen => { + self.skip(1); + let expr = self + .expression(0) + .trace("while parsing parenthesized expression")?; + self + .look(0, t::RightParen) + .reason("Unclosed '('") + .span(&expr.span)?; + e::Parenthesis(expr.into()) + }, + _ => { + return error() + .span(&span) + .reason(format!("Expected expression, found {}", next.0)); + }, + }; + self.skip(1); + Ok(Expression { kind, span }) + } + + fn identifier(&mut self) -> Result<(String, Span)> { + use TokenKind as t; + match self.peek(0) { + Ok(Token(t::Identifier(i), span)) => { + self.skip(1); + Ok((i, span)) + }, + Ok(t) => error() + .reason(format!("Expected identifier, found {}", t.0)) + .span(&t.1), + Err(e) => Err(e), + } + } +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 0000000..e425b5b --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1,165 @@ +mod expression; +mod statement; +pub use expression::*; +pub use statement::*; + +use crate::err::*; +use crate::{Span, Token, TokenKind}; + +pub type Precedence = usize; + +const FIELD_PREC: Precedence = 13; +const CALL_PREC: Precedence = 12; + +fn binary_prec(tok: &TokenKind) -> Result<(Precedence, bool)> { + use TokenKind::*; + Ok(match tok { + Star | Slash | Percent => (10, false), + Plus | Minus => (9, false), + And | Nand => (8, false), + Xor | Xnor => (7, false), + Or | Nor => (6, false), + DoubleEqual | BangEqual | Less | LessEqual | Greater | GreaterEqual => { + (5, false) + }, + //Colon => Some((5, false)), + _ => { + return error().reason(format!("{} is not a valid binary operator", tok)); + }, + }) +} + +fn unary_prefix_prec(tok: &TokenKind) -> Result { + use TokenKind::*; + Ok(match tok { + Minus | Not => 11, + Break => 3, + _ => { + return error() + .reason(format!("{tok} is not a valid prefix unary operator")); + }, + }) +} + +fn unary_postfix_prec(tok: &TokenKind) -> Result { + use TokenKind::*; + Ok(match tok { + Question => 12, + Bang => 12, + _ => { + return error() + .reason(format!("{tok} is not a valid postfix unary operator")); + }, + }) +} + +const PARSER_LOOKAHEAD: usize = 3; + +type TokenIter = crate::Window; + +pub struct Parser> { + iter: TokenIter, +} + +impl> Iterator for Parser { + type Item = Statement; + + fn next(&mut self) -> Option { + use StatementKind as s; + use TokenKind as t; + // Trim extra ; + while self.eat(t::Semicolon).is_ok() {} + loop { + if self.eat(t::EOF).is_ok() || self.iter.finished { + return None; + } + match self.statement() { + Ok(s) => return Some(s), + Err(e) => { + loop { + let next = self.next_tok(); + match next { + Ok(Token(t::Semicolon | t::RightBrace | t::EOF, _)) => break, + _ => {}, + } + } + return Some(Statement { + span: e.span.unwrap_or(Span { row: 0, column: 0 }), + kind: s::Error(e), + }); + }, + } + } + } +} + +impl> Parser { + pub fn new(iter: I) -> Self { + Self { + iter: TokenIter::new(iter), + } + } + + fn skip(&mut self, n: usize) { + for _ in 0..n { + let _ = self.next_tok(); + } + } + + fn next_tok(&mut self) -> Result { + match self.iter.next() { + Some(Token(TokenKind::Error(e), span)) => Err(e).span(&span), + r => r.reason("Unexpected end of file"), + } + } + + fn peek(&self, n: usize) -> Result { + match self.iter.peek(n).clone() { + Some(Token(TokenKind::Error(e), span)) => Err(e).span(&span), + r => r.reason("Unexpected end of file"), + } + } + + fn eat(&mut self, expect: TokenKind) -> Result { + match self.look(0, expect) { + Ok(t) => { + self.skip(1); + Ok(t) + }, + Err(e) => Err(e), + } + } + + fn look(&mut self, n: usize, expect: TokenKind) -> Result { + let next = self.peek(n)?; + if next.0 == expect { + Ok(next) + } else { + error() + .reason(format!("Expected {expect}, found {}", next.0)) + .span(&next.1) + } + } + + fn block(&mut self) -> Result<(Vec, Span)> { + use TokenKind as t; + let mut span = self.eat(t::LeftBrace).reason("Expected block")?.1; + let mut statements = vec![]; + loop { + let next = self.peek(0)?; + span = span + next.1; + match self.eat(t::RightBrace) { + Ok(t) => { + span = span + t.1; + break; + }, + _ => { + let statement = self.statement()?; + span = span + statement.span; + statements.push(statement); + }, + }; + } + Ok((statements, span)) + } +} diff --git a/src/parse/statement.rs b/src/parse/statement.rs new file mode 100644 index 0000000..c990185 --- /dev/null +++ b/src/parse/statement.rs @@ -0,0 +1,207 @@ +use super::*; + +#[derive(Debug, Clone)] +pub enum StatementKind { + Mutable { + name: String, + type_: Option, + value: Option, + }, + Immutable { + name: String, + type_: Option, + value: Expression, + }, + Assignment { + name: String, + value: Expression, + }, + If { + predicate: Expression, + block: Vec, + else_: Option>, + }, + While { + predicate: Expression, + block: Vec, + }, + Print(Expression), + Expression(Expression), + Block(Vec), + Error(Diagnostic), +} + +#[derive(Debug, Clone)] +pub struct Statement { + pub kind: StatementKind, + pub span: Span, +} + +impl> Parser { + pub fn statement(&mut self) -> Result { + use StatementKind as s; + use TokenKind as t; + let next = self.peek(0)?; + let next2 = self.peek(1); + let mut span = next.1; + let statement = match (next, next2) { + // (im)mutable declaration + (Token(t::Identifier(name), span2), Ok(Token(t::Colon, span3))) => { + self.skip(2); + span = span + span2 + span3; + // type + let type_ = match self.eat(t::Identifier("".into())) { + Ok(Token(t::Identifier(s), span2)) => { + span = span + span2; + Some(s) + }, + _ => None, + }; + // value + match self.eat(t::Equal).or_else(|_| self.eat(t::Colon)) { + Ok(Token(t::Colon, span2)) => { + span = span + span2; + let value = self + .expression(0) + .trace_span(span, "while parsing mutable declaration")?; + span = span + value.span; + Statement { + kind: s::Immutable { name, type_, value }, + span, + } + }, + Ok(Token(t::Equal, span2)) => { + span = span + span2; + let value = self + .expression(0) + .trace_span(span, "while parsing mutable declaration")?; + span = span + value.span; + Statement { + kind: s::Mutable { + name, + type_, + value: Some(value), + }, + span, + } + }, + _ => return error().reason("Expected expression").span(&span), + } + }, + // Assignment + (Token(t::Identifier(name), span2), Ok(Token(t::Equal, span3))) => { + self.skip(2); + span = span + span2 + span3; + let value = self + .expression(0) + .trace_span(span, "while parsing assignment")?; + Statement { + span, + kind: s::Assignment { name, value }, + } + }, + // If + (Token(t::If, _), _) => { + return self.if_else(); + }, + // While + (Token(t::While, span2), _) => { + self.skip(1); + span = span + span2; + let predicate = self + .expression(0) + .reason("Expected predicate after 'while' keyword") + .span(&span)?; + span = span + predicate.span; + let block = self + .block() + .trace_span(span, "while parsing while statement")?; + span = span + block.1; + return Ok(Statement { + span, + kind: s::While { + predicate, + block: block.0, + }, + }); + }, + // (DEBUG) print + (Token(t::Print, span2), _) => { + self.skip(1); + span = span + span2; + let expr = self + .expression(0) + .trace_span(span, "while parsing print statement")?; + span = span + expr.span; + Statement { + span, + kind: s::Print(expr), + } + }, + // Block + (Token(t::LeftBrace, span2), _) => { + // Skip check for semicolon + span = span + span2; + let (block, span2) = self + .block() + .trace_span(span, "while parsing block statement")?; + span = span + span2; + return Ok(Statement { + kind: s::Block(block), + span, + }); + }, + // Expression + (Token(_, span2), _) => { + span = span + span2; + let expr = self + .expression(0) + .trace_span(span, "while parsing expression statement")?; + span = span + expr.span; + Statement { + span, + kind: s::Expression(expr), + } + }, + }; + // Check for semicolon + if self.eat(t::Semicolon).is_ok() { + Ok(statement) + } else { + error().reason("Expected ;").span(&span) + } + } + + fn if_else(&mut self) -> Result { + use TokenKind as t; + if let Ok(Token(_, span)) = self.eat(t::If) { + let predicate = self + .expression(0) + .trace_span(span, "in predicate of 'if' statement")?; + let span = span + predicate.span; + let block = self + .block() + .trace_span(span, "in block of 'if' statement")?; + let (block, span) = (block.0, span + block.1); + let else_ = if self.eat(t::Else).is_ok() { + Some(Box::new(self.if_else()?)) + } else { + None + }; + Ok(Statement { + kind: StatementKind::If { + predicate, + block, + else_, + }, + span, + }) + } else { + let (block, span) = self.block()?; + Ok(Statement { + kind: StatementKind::Block(block), + span, + }) + } + } +} diff --git a/src/token.rs b/src/token.rs index 2ebc6b9..3bbb0a7 100644 --- a/src/token.rs +++ b/src/token.rs @@ -49,7 +49,7 @@ pub enum TokenKind { Identifier(String), StringLiteral(String), - CharLiteral(char), + GlyphLiteral(char), IntegerLiteral(i64), FloatLiteral(f64), @@ -78,6 +78,7 @@ pub enum TokenKind { SmallComment(String), BigComment(String), + Error(Diagnostic), Idk, EOF, } @@ -103,6 +104,87 @@ impl PartialEq for TokenKind { impl Eq for TokenKind { } +impl std::fmt::Display for TokenKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use TokenKind::*; + write!( + f, + "'{}'", + match self { + LeftParen => "(", + RightParen => ")", + LeftBrace => "{", + RightBrace => "}", + LeftSquare => "[", + RightSquare => "]", + Comma => ",", + Colon => ":", + Semicolon => ";", + Dot => ".", + DotDot => "..", + Plus => "+", + Minus => "-", + Slash => "/", + Star => "*", + Percent => "%", + Arrow => "->", + FatArrow => "=>", + PlusEqual => "+=", + MinusEqual => "-=", + SlashEqual => "/=", + StarEqual => "*=", + PercentEqual => "%=", + Bang => "!", + BangEqual => "!=", + Question => "?", + QuestionEqual => "?=", + Equal => "=", + DoubleEqual => "==", + Greater => ">", + GreaterEqual => ">=", + Less => "<", + LessEqual => "<=", + Pipe => "|", + Ampersand => "&", + Carrot => "^", + Hash => "#", + DotDotEqual => "..=", + Identifier(i) => i, + StringLiteral(s) => s.as_str(), + GlyphLiteral(_) => "", + IntegerLiteral(_) => "", + FloatLiteral(_) => "", + If => "if", + Else => "else", + And => "and", + Or => "or", + Xor => "xor", + Not => "not", + Nand => "nand", + Nor => "nor", + Xnor => "xnor", + Print => "print", + Break => "break", + Return => "return", + Continue => "continue", + For => "for", + While => "while", + True => "true", + False => "false", + Struct => "struct", + Enum => "enum", + Union => "union", + Whitespace(_) => "", + SmallComment(_) => "", + BigComment(_) => "", + Error(_) => "", + Idk => unreachable!(), + EOF => "", + } + ) + } +} + #[derive(Clone, Debug)] pub struct Token(pub TokenKind, pub Span); @@ -118,7 +200,6 @@ pub struct Tokenizer> { iter: CharIter, column: usize, row: usize, - finished: bool, } impl> Tokenizer { @@ -127,14 +208,6 @@ impl> Tokenizer { iter: CharIter::new(iter), column: 1, row: 1, - finished: false, - } - } - - pub fn span(&self) -> Span { - Span { - column: self.column, - row: self.row, } } @@ -187,6 +260,16 @@ impl> Tokenizer { column: self.column, }; let current = match self.next_char() { + Some(std::char::REPLACEMENT_CHARACTER) => { + return t( + Error(Diagnostic { + reason: "Non-UTF8 encoded glyph".into(), + span: Some(position), + backtrace: vec![], + }), + position, + ); + }, Some(c) => c, None => return t(EOF, position), }; @@ -327,7 +410,7 @@ impl> Tokenizer { .reason("Single quote (') contains more than one character") .span(&position); } - let kind = CharLiteral( + let kind = GlyphLiteral( baked .chars() .next() @@ -351,8 +434,12 @@ impl> Tokenizer { // Only one dot per number let mut encountered_dot = false; while let Some(c) = self.peek(0) { - if c == '.' && !encountered_dot { - if let Some('.') = self.peek(1) { + if c == '.' { + if encountered_dot { + break; + } + let Some(next) = self.peek(1) else { break }; + if !next.is_ascii_digit() { break; } encountered_dot = true; @@ -407,19 +494,12 @@ impl> Iterator for Tokenizer { type Item = Token; fn next(&mut self) -> Option { - loop { - match self._next() { - Ok(Token(TokenKind::EOF, span)) => { - if self.finished { - return None; - } else { - self.finished = true; - return Some(Token(TokenKind::EOF, span)); - } - }, - Ok(r) => return Some(r), - _ => {}, - }; + match self._next() { + Ok(s) => Some(s), + Err(e) => Some(Token( + TokenKind::Error(e.clone()), + e.span.expect("error without span in tokenizer"), + )), } } } @@ -429,29 +509,35 @@ fn parse_number(num: &str) -> Result { let num = num.replace('_', ""); // Floating point (only decimal) if num.contains('.') { - num.parse::().map(|f| FloatLiteral(f)).coerce() + num + .parse::() + .map(|f| FloatLiteral(f)) + .reason("Could not parse real number") } // Hex integer else if let Some(hex) = num.strip_prefix("0x") { i64::from_str_radix(hex, 16) .map(|i| IntegerLiteral(i)) - .coerce() + .reason("Could not parse hex integer number") } // Octal integer else if let Some(oct) = num.strip_prefix("0o") { i64::from_str_radix(oct, 8) .map(|i| IntegerLiteral(i)) - .coerce() + .reason("Could not parse octal integer number") } // Binary integer else if let Some(bin) = num.strip_prefix("0b") { i64::from_str_radix(bin, 2) .map(|i| IntegerLiteral(i)) - .coerce() + .reason("Could not parse binary integer number") } // Decimal integer else { - num.parse::().map(|i| IntegerLiteral(i)).coerce() + num + .parse::() + .map(|i| IntegerLiteral(i)) + .reason("Could not parse integer number") } } diff --git a/src/treewalk.rs b/src/treewalk.rs index a074f5b..0b07252 100644 --- a/src/treewalk.rs +++ b/src/treewalk.rs @@ -14,6 +14,19 @@ enum Value { Undefined, } +impl std::fmt::Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Value::*; + match self { + Integer(i) => write!(f, "{i}"), + Real(r) => write!(f, "{r}"), + String(s) => write!(f, "{s}"), + Boolean(b) => write!(f, "{b}"), + Undefined => write!(f, "undefined"), + } + } +} + #[derive(Debug, Clone)] struct Scope { outer: Option>, @@ -189,6 +202,13 @@ impl> Interpreter { }, e::Unary { token, child } => self.evaluate_unary(token, *child), e::Parenthesis(e) => self.evaluate(*e), + e::Call { callee, args } => todo!(), + e::Field { namespace, field } => todo!(), + e::Function { + params, + returns, + body, + } => todo!(), } .span(&expr.span) } @@ -215,7 +235,7 @@ impl> Interpreter { }, s::Print(e) => { let e = self.evaluate(e)?; - println!("{e:?}"); + println!("{e}"); }, s::Expression(e) => { self.evaluate(e)?; @@ -231,6 +251,8 @@ impl> Interpreter { if let Value::Boolean(b) = value { if b { self.block(block)?; + } else if let Some(else_) = else_ { + self.execute(*else_)?; } } else { return error() @@ -252,6 +274,10 @@ impl> Interpreter { } } }, + s::Error(e) => { + eprintln!("{e}"); + panic!(); + }, } Ok(()) } diff --git a/src/types.rs b/src/types.rs new file mode 100644 index 0000000..4dc3a00 --- /dev/null +++ b/src/types.rs @@ -0,0 +1,91 @@ +use super::err::*; + +macro_rules! primitives { + ( $($i:ident),* ) => { + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + #[allow(non_camel_case_types, dead_code)] + pub enum Primitive { + whole_ambiguous, + integer_ambiguous, + real_ambiguous, + $($i,)* + } + + impl Primitive { + pub fn from_string(string: &'static str) -> Option { + match string { + $(stringify!{$i} => Some(Self::$i),)* + _ => None, + } + } + } + impl std::fmt::Display for Primitive { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Primitive::whole_ambiguous => write!(f, ""), + Primitive::integer_ambiguous => write!(f, ""), + Primitive::real_ambiguous => write!(f, ""), + $(Primitive::$i => write!(f, stringify!{$i}),)* + } + } + } + }; +} + +primitives! { + w8, w16, w32, w64, whole, + i8, i16, i32, i64, integer, + r32, r64, real, + boolean, + string, glyph +} + +impl Primitive { + pub fn coerce(a: Self, b: Self) -> Result { + use Primitive::*; + match (a, b) { + // Whole? coerces to any whole or integer + (whole_ambiguous, w @ (w8 | w16 | w32 | w64 | i8 | i16 | i32 | i64)) + | (w @ (w8 | w16 | w32 | w64 | i8 | i16 | i32 | i64), whole_ambiguous) => { + w + }, + // Integer? coerces to any integer + (integer_ambiguous, i @ (i8 | i16 | i32 | i64)) + | (i @ (i8 | i16 | i32 | i64), integer_ambiguous) => i, + _ => whole_ambiguous, + }; + todo!() + } +} + +use Primitive as p; + +// Implement math operations for regular types +macro_rules! selfsame_op { + ($trait:ident, $fn:ident, $($i:ident),* ) => { + impl std::ops::$trait for Primitive { + type Output = Result; + fn $fn(self, rhs: Self) -> Self::Output { + match (self, rhs) { + $((p::$i, p::$i) => Ok(p::$i),)* + _ => error() + .reason(format!("Operation not defined for primitives {} and {}", self, rhs)) + } + } + } + }; +} + +// Implement all regular math +macro_rules! all_selfsame { + ($($i:ident),*) => { + selfsame_op!(Add, add, $($i),*); + selfsame_op!(Sub, sub, $($i),*); + selfsame_op!(Mul, mul, $($i),*); + selfsame_op!(Div, div, $($i),*); + }; +} + +all_selfsame!( + w8, w16, w32, w64, whole, i8, i16, i32, i64, integer, r32, r64, real, string +);