From 7a33804ffe0b471723de940e128a1e587b6ec409 Mon Sep 17 00:00:00 2001 From: Logan Date: Tue, 15 Oct 2024 11:20:35 -0500 Subject: [PATCH] Initial --- demo.lang | 6 + src/err.rs | 151 +++++++++++ src/lookahead.rs | 84 +++++++ src/main.rs | 61 ++++- src/parse.rs | 453 +++++++++++++++++++++++++++++++++ src/token.rs | 632 ++++++++++++++++++++++++++++++++++------------- src/treewalk.rs | 280 +++++++++++++++++++++ 7 files changed, 1486 insertions(+), 181 deletions(-) create mode 100644 demo.lang create mode 100644 src/err.rs create mode 100644 src/lookahead.rs create mode 100644 src/parse.rs create mode 100644 src/treewalk.rs diff --git a/demo.lang b/demo.lang new file mode 100644 index 0000000..74c6b0e --- /dev/null +++ b/demo.lang @@ -0,0 +1,6 @@ +i := 0; + +while i < 10 { + i = i + 1; + print i; +} diff --git a/src/err.rs b/src/err.rs new file mode 100644 index 0000000..d447bb7 --- /dev/null +++ b/src/err.rs @@ -0,0 +1,151 @@ +use crate::Span; + +pub type Result = std::result::Result; + +pub fn error() -> Result { + Err(Diagnostic::new("")) +} + +#[derive(Clone)] +pub struct Diagnostic { + reason: String, + span: Option, + backtrace: Vec, +} + +impl Diagnostic { + pub fn new(reason: impl Into) -> Self { + Self { + reason: reason.into(), + span: None, + backtrace: vec![], + } + } +} + +impl std::fmt::Display for Diagnostic { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(span) = self.span.as_ref() { + write!(f, "({}:{}) {}\n", span.row, span.column, self.reason)?; + } else { + write!(f, "(E) {}\n", self.reason)?; + } + for b in self.backtrace.iter().rev() { + write!(f, "--> {}\n", b)?; + } + Ok(()) + } +} + +impl std::fmt::Debug for Diagnostic { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{self}") + } +} + +impl From for Diagnostic { + fn from(value: std::io::Error) -> Self { + Self { + reason: format!("{}", value), + span: None, + backtrace: vec![], + } + } +} + +impl From for Diagnostic { + fn from(value: std::num::ParseIntError) -> Self { + use std::num::IntErrorKind::*; + match value.kind() { + PosOverflow | NegOverflow => { + Diagnostic::new("Integer value is too large to represent") + }, + InvalidDigit => Diagnostic::new("Integer value containts invalid digits"), + _ => Diagnostic::new("Integer value could not be parsed"), + } + } +} + +impl From for Diagnostic { + fn from(_value: std::num::ParseFloatError) -> Self { + Diagnostic::new("Float value could not be parsed") + } +} + +pub trait IntoDiagnostic> { + fn reason(self, s: S) -> Result; + fn trace(self, s: S) -> Result; +} + +pub trait WithSpan { + fn span(self, span: &Span) -> Result; + fn no_span(self) -> Result; +} + +impl WithSpan for Result { + fn span(self, span: &Span) -> Result { + self.map_err(|mut e| { + e.span = e.span.or(Some(span.clone())); + e + }) + } + + fn no_span(self) -> Result { + self.map_err(|mut e| { + e.span = None; + e + }) + } +} + +pub trait CoerceDiagnostic { + fn coerce(self) -> Result; +} + +impl> IntoDiagnostic for Option { + fn reason(self, s: S) -> Result { + match self { + Some(t) => Ok(t), + None => Err(Diagnostic { + reason: s.into(), + span: None, + backtrace: vec![], + }), + } + } + + fn trace(self, s: S) -> Result { + match self { + Some(t) => Ok(t), + None => Err(Diagnostic { + reason: "".into(), + span: None, + backtrace: vec![s.into()], + }), + } + } +} + +impl, S: Into> IntoDiagnostic + for std::result::Result +{ + fn reason(self, s: S) -> Result { + self.map_err(|e| e.into()).map_err(|mut e| { + e.reason = s.into(); + e + }) + } + + fn trace(self, s: S) -> Result { + self.map_err(|e| e.into()).map_err(|mut e| { + e.backtrace.push(s.into()); + e + }) + } +} + +impl> CoerceDiagnostic for std::result::Result { + fn coerce(self) -> Result { + self.map_err(|e| e.into()) + } +} diff --git a/src/lookahead.rs b/src/lookahead.rs new file mode 100644 index 0000000..82dc6f9 --- /dev/null +++ b/src/lookahead.rs @@ -0,0 +1,84 @@ +pub struct Window +where + I: Iterator, +{ + iterator: I, + buffer: [Option; N], + exhausted: bool, + pub finished: bool, +} + +impl std::fmt::Debug for Window +where + I: Iterator, + T: std::fmt::Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.buffer) + } +} + +impl Window +where + I: Iterator, +{ + pub fn new(mut it: I) -> Self { + assert!(N > 0, "Lookahead buffer cannot be 0 sized"); + let mut s = Self { + buffer: std::array::from_fn(|_| it.next()), + iterator: it, + exhausted: false, + finished: false, + }; + s.normalize(); + s + } + + pub fn inner(&self) -> &I { + &self.iterator + } + + fn normalize(&mut self) { + for item in &mut self.buffer { + if self.exhausted { + *item = None; + } else if let None = item { + self.exhausted = true; + } + } + } + + pub fn peek(&self, n: usize) -> &Option { + debug_assert!(n < N, "Peeked further than buffer allows"); + &self.buffer[n] + } + + fn _advance(&mut self) { + for i in 1..N { + self.buffer[i - 1] = self.buffer[i].take(); + } + self.buffer[N - 1] = match self.iterator.next() { + Some(i) if !self.exhausted => Some(i), + _ => { + self.exhausted = true; + None + }, + }; + } +} + +impl Iterator for Window +where + I: Iterator, +{ + type Item = T; + + fn next(&mut self) -> Option { + let r = self.buffer[0].take(); + if let None = r { + self.finished = true; + } + self._advance(); + r + } +} diff --git a/src/main.rs b/src/main.rs index 762f6c6..c9b3857 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,18 +1,57 @@ +mod err; +mod lookahead; +mod parse; mod token; +mod treewalk; +use std::ops::Add; -fn main() { - repl(); +use lookahead::*; +use parse::*; +use token::*; +use treewalk::Interpreter; + +#[derive(Clone, Copy, Debug)] +pub struct Span { + pub row: usize, + pub column: usize, } -pub fn repl() { - let mut buffer = String::new(); - let stdin = std::io::stdin(); - loop { - stdin.read_line(&mut buffer).unwrap(); - let tokens = token::tokenize(&buffer); - for tok in tokens { - println!("{} : {:?}", &buffer[tok.start..tok.end], tok.ttype); +impl Add for Span { + type Output = Span; + + fn add(self, rhs: Span) -> Self::Output { + Span { + row: usize::min(self.row, rhs.row), + column: usize::max(self.column, rhs.column), } - buffer = String::new(); } } + +fn test_tokenization() { + let test_str = r#" + ( ) [ ] { } . .. , : + ; + - * / -> => += -= + *= /= ! != = == <= >= + ? ?= < > literal 10 + 0x10 0b10 10.0 1.0..2.0 + 2.0..=3.0 if else and + or xor not nand nor xnor + print break for while true + false "\u263b" '\x30' + "#; + let mut parser = Tokenizer::new(test_str.chars()); + while let Some(tok) = parser.next() { + println!("{tok:?}"); + } +} + +fn main() { + let src = include_str!("../demo.lang"); + println!("{src}"); + let tokens: Vec<_> = Tokenizer::new(src.chars()) + .filter(|t| t.0.is_meaningful()) + .collect(); + let parsed = Parser::new(tokens.into_iter()).file().unwrap(); + let mut interp = Interpreter::new(parsed.into_iter()); + interp.run().unwrap(); +} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..1fa257f --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,453 @@ +use crate::err::*; +use crate::{Span, Token, TokenKind}; + +#[derive(Clone)] +pub enum ExpressionKind { + Integer(i64), + Real(f64), + String(String), + Boolean(bool), + Identifier(String), + Binary { + token: TokenKind, + left: Box, + right: Box, + }, + Unary { + token: TokenKind, + child: Box, + }, + Parenthesis(Box), +} + +#[derive(Clone)] +pub struct Expression { + pub kind: ExpressionKind, + pub span: Span, +} + +impl Expression { + pub fn new(kind: ExpressionKind, span: Span) -> Self { + Self { kind, span } + } +} + +impl std::fmt::Debug for ExpressionKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use ExpressionKind as e; + match self { + e::Integer(i) => write!(f, "{i}"), + e::Binary { token, left, right } => { + write!(f, "({left:?} {token:?} {right:?})") + }, + e::Parenthesis(inner) => write!(f, "{inner:?}"), + e::Unary { token, child } => { + write!(f, "({token:?} {child:?})") + }, + e::Real(fp) => write!(f, "{fp}"), + e::String(s) => write!(f, r#""{s}""#), + e::Identifier(i) => write!(f, "{i}"), + e::Boolean(b) => write!(f, "{b}"), + } + } +} + +impl std::fmt::Debug for Expression { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.kind) + } +} + +#[derive(Debug, Clone)] +pub enum StatementKind { + Mutable { + name: String, + type_: Option, + value: Option, + }, + Immutable { + name: String, + type_: Option, + value: Expression, + }, + Assignment { + name: String, + value: Expression, + }, + If { + predicate: Expression, + block: Vec, + else_: Option>, + }, + While { + predicate: Expression, + block: Vec, + }, + Print(Expression), + Expression(Expression), + Block(Vec), +} + +#[derive(Debug, Clone)] +pub struct Statement { + pub kind: StatementKind, + pub span: Span, +} + +pub type Precedence = usize; + +fn binary_prec(tok: &TokenKind) -> Result<(Precedence, bool)> { + use TokenKind::*; + Ok(match tok { + Star | Slash | Percent => (10, false), + Plus | Minus => (9, false), + And | Nand => (8, false), + Xor | Xnor => (7, false), + Or | Nor => (6, false), + DoubleEqual | BangEqual | Less | LessEqual | Greater | GreaterEqual => { + (5, false) + }, + //Colon => Some((5, false)), + _ => { + return error() + .reason(format!("{:?} is not a valid binary operator", tok)); + }, + }) +} + +fn unary_prefix_prec(tok: &TokenKind) -> Result { + use TokenKind::*; + Ok(match tok { + Minus | Not => 11, + Break => 3, + _ => { + return error() + .reason(format!("{tok:?} is not a valid prefix unary operator")); + }, + }) +} + +fn unary_postfix_prec(tok: &TokenKind) -> Result { + use TokenKind::*; + Ok(match tok { + Question => 12, + Bang => 12, + _ => { + return error() + .reason(format!("{tok:?} is not a valid postfix unary operator")); + }, + }) +} + +const PARSER_LOOKAHEAD: usize = 3; + +type TokenIter = crate::Window; + +pub struct Parser> { + iter: TokenIter, +} + +impl> Parser { + pub fn new(iter: I) -> Self { + Self { + iter: TokenIter::new(iter), + } + } + + fn skip(&mut self, n: usize) { + for _ in 0..n { + let _ = self.next(); + } + } + + fn next(&mut self) -> Result { + self.iter.next().reason("Unexpected end of file") + } + + fn peek(&self, n: usize) -> Result { + self.iter.peek(n).clone().reason("Unexpected end of file") + } + + fn eat(&mut self, expect: TokenKind) -> Result { + match self.look(expect) { + Ok(t) => { + self.skip(1); + Ok(t) + }, + Err(e) => Err(e), + } + } + + fn look(&mut self, expect: TokenKind) -> Result { + let next = self.peek(0)?; + if next.0 == expect { + Ok(next) + } else { + error() + .reason(format!("Expected {expect:?}, found {:?}", next.0)) + .span(&next.1) + } + } + + pub fn file(&mut self) -> Result> { + use TokenKind as t; + let mut statements = vec![]; + loop { + // Trim extra ; + while self.eat(t::Semicolon).is_ok() {} + if self.eat(t::EOF).is_ok() { + return Ok(statements); + } + match self.statement() { + Ok(s) => statements.push(s), + Err(e) => { + return Err(e); + }, + } + } + } + + pub fn statement(&mut self) -> Result { + use StatementKind as s; + use TokenKind as t; + let next = self.peek(0); + let next2 = self.peek(1); + let statement = match (next, next2) { + // (im)mutable declaration + (Ok(Token(t::Identifier(name), span)), Ok(Token(t::Colon, _))) => { + self.skip(2); + let type_ = match self.eat(t::Identifier("".into())) { + Ok(Token(t::Identifier(s), _)) => Some(s), + _ => None, + }; + match self.eat(t::Equal).or_else(|_| self.eat(t::Colon)) { + Ok(Token(t::Colon, _)) => Statement { + kind: s::Immutable { + name, + type_, + value: self + .expression(0) + .trace("while parsing immutable declaration")?, + }, + span, + }, + Ok(Token(t::Equal, _)) => Statement { + kind: s::Mutable { + name, + type_, + value: Some( + self + .expression(0) + .trace("while parsing mutable declaration")?, + ), + }, + span, + }, + _ => return error().reason("Expected expression here"), + } + }, + (Ok(Token(t::Identifier(name), span)), Ok(Token(t::Equal, _))) => { + self.skip(2); + let value = self + .expression(0) + .trace("while parsing assignment expression")?; + Statement { + kind: s::Assignment { name, value }, + span, + } + }, + // If + (Ok(Token(t::If, span)), _) => { + self.skip(1); + let predicate = self + .expression(0) + .reason("Expected predicate after 'if' keyword") + .span(&span)?; + let block = self.block().trace("while parsing if statement")?; + return Ok(Statement { + span, + kind: s::If { + predicate, + block: block.0, + else_: None, + }, + }); + }, + // While + (Ok(Token(t::While, span)), _) => { + self.skip(1); + let predicate = self + .expression(0) + .reason("Expected predicate after 'while' keyword") + .span(&span)?; + let block = self.block().trace("while parsing while statement")?; + return Ok(Statement { + span, + kind: s::While { + predicate, + block: block.0, + }, + }); + }, + // (DEBUG) print + (Ok(Token(t::Print, span)), _) => { + self.skip(1); + let expr = self.expression(0).trace("while parsing print statement")?; + Statement { + span: span + expr.span, + kind: s::Print(expr), + } + }, + // Block + (Ok(Token(t::LeftBrace, _)), _) => { + // Skip check for semicolon + let (block, span) = + self.block().trace("while parsing block statement")?; + return Ok(Statement { + kind: s::Block(block), + span, + }); + }, + // Expression + _ => { + let expr = self + .expression(0) + .trace("while parsing expression statement")?; + Statement { + span: expr.span, + kind: s::Expression(expr), + } + }, + }; + // Check for semicolon + if self.eat(t::Semicolon).is_ok() { + Ok(statement) + } else { + error().reason("Expected ;") + } + } + + pub fn expression( + &mut self, + mut precedence: Precedence, + ) -> Result { + use ExpressionKind as e; + use TokenKind as t; + let next = self.peek(0)?; + // Unary prefix expression + let mut current = if let Ok(p) = unary_prefix_prec(&next.0) { + let operator = self.next().expect("unreachable"); + let child = self + .expression(p) + .trace(format!("while parsing unary {:?}", operator.0)) + .span(&operator.1)?; + let span = child.span + operator.1; + Expression::new( + e::Unary { + token: operator.0, + child: child.into(), + }, + span, + ) + } + // Terminal or paren + else { + self.primary()? + }; + // Precedence climbing loop + while let Ok(next) = self.peek(0) { + // Binary infix + if let Ok((new_precedence, left_assoc)) = binary_prec(&next.0) { + if (!left_assoc && new_precedence <= precedence) + || (new_precedence < precedence) + { + return Ok(current); + } + let operator = self.next().expect("unreachable"); + let rhs = self + .expression(new_precedence) + .trace(format!("while parsing binary {:?}", operator.0)) + .span(&operator.1)?; + let span = next.1 + rhs.span; + current = Expression::new( + e::Binary { + token: operator.0, + left: current.into(), + right: rhs.into(), + }, + span, + ); + } + // Unary postfix + else if let Ok(new_precedence) = unary_postfix_prec(&next.0) { + let operator = self.next().expect("unreachable"); + let span = next.1 + operator.1; + precedence = new_precedence; + current = Expression::new( + e::Unary { + token: operator.0, + child: current.into(), + }, + span, + ); + } else { + break; + } + } + Ok(current) + } + + fn primary(&mut self) -> Result { + use ExpressionKind as e; + use TokenKind as t; + let next = self.peek(0)?; + let span = next.1; + let kind = match next.0 { + t::IntegerLiteral(i) => e::Integer(i), + t::FloatLiteral(f) => e::Real(f), + t::StringLiteral(s) => e::String(s), + t::True => e::Boolean(true), + t::Identifier(i) => e::Identifier(i), + t::LeftParen => { + self.eat(t::LeftParen).expect("unreachable"); + let expr = self + .expression(0) + .trace("while parsing parenthesized expression")?; + self + .look(t::RightParen) + .reason("Unclosed '('") + .span(&expr.span)?; + e::Parenthesis(expr.into()) + }, + _ => { + return error() + .span(&span) + .reason(format!("Expected primary, found {:?}", next.0)); + }, + }; + self.skip(1); + Ok(Expression { kind, span }) + } + + fn block(&mut self) -> Result<(Vec, Span)> { + use TokenKind as t; + let mut span = self.eat(t::LeftBrace).reason("Expected block")?.1; + let mut statements = vec![]; + loop { + let next = self.peek(0)?; + span = span + next.1; + match self.eat(t::RightBrace) { + Ok(t) => { + span = span + t.1; + break; + }, + _ => { + let statement = self.statement()?; + span = span + statement.span; + statements.push(statement); + }, + }; + } + Ok((statements, span)) + } +} diff --git a/src/token.rs b/src/token.rs index 189641a..2ebc6b9 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1,22 +1,38 @@ +use crate::err::*; +use crate::Span; + #[derive(Debug, Clone)] -pub enum TokenType { - // Symbols +pub enum TokenKind { LeftParen, RightParen, - LeftSquare, - RightSquare, LeftBrace, RightBrace, + LeftSquare, + RightSquare, + Comma, + Colon, + Semicolon, + Dot, + DotDot, Plus, Minus, - Star, Slash, - Semicolon, + Star, + Percent, + Arrow, + FatArrow, + PlusEqual, + MinusEqual, + SlashEqual, + StarEqual, + PercentEqual, Bang, BangEqual, + Question, + QuestionEqual, Equal, DoubleEqual, Greater, @@ -24,187 +40,463 @@ pub enum TokenType { Less, LessEqual, - // Literals - String, - Character, - Number(f64), + Pipe, + Ampersand, + Carrot, + Hash, + + DotDotEqual, + + Identifier(String), + StringLiteral(String), + CharLiteral(char), + IntegerLiteral(i64), + FloatLiteral(f64), - // Words - Ident, - And, - Or, - Self_, - Struct, - True, - False, - Fn, If, Else, - Nil, + And, + Or, + Xor, + Not, + Nand, + Nor, + Xnor, Print, + Break, Return, - Super, - Let, - While, + Continue, For, + While, + True, + False, + Struct, + Enum, + Union, - // Special - Unrecognized, - TooLong, + Whitespace(String), + SmallComment(String), + BigComment(String), + + Idk, + EOF, } -/// Type, index -#[derive(Debug, Clone)] -pub struct Token { - pub ttype: TokenType, - pub start: usize, - pub end: usize, +impl TokenKind { + pub fn is_meaningful(&self) -> bool { + match self { + Self::Whitespace(_) + | Self::SmallComment(_) + | Self::BigComment(_) + | Self::Idk => false, + _ => true, + } + } } -pub fn tokenize(input: &str) -> Vec { - let input_str = input; - let mut input = input.char_indices().peekable(); - let mut tokens = vec![]; - 'outer: loop { - // Find next non-whitespace line - let (start, c) = 'ws: loop { - match input.next() { - // Stop at end of input - None => break 'outer, - Some((index, character)) if !character.is_whitespace() => { - break 'ws (index, character) - }, - _ => {}, - } - }; - let mut end = start + 1; - let mut advance = || {}; - let ttype = match c { - // Match single character tokens - '(' => TokenType::LeftParen, - ')' => TokenType::RightParen, - '[' => TokenType::LeftSquare, - ']' => TokenType::RightSquare, - '{' => TokenType::LeftBrace, - '}' => TokenType::RightBrace, - ',' => TokenType::Comma, - '.' => TokenType::Dot, - '+' => TokenType::Plus, - '-' => TokenType::Minus, - '*' => TokenType::Star, - '/' => TokenType::Slash, - ';' => TokenType::Semicolon, - // Match multicharacter tokens - '!' => match input.peek() { - Some((_, '=')) => { - input.next(); - end += 1; - TokenType::BangEqual - }, - _ => TokenType::Bang, +impl PartialEq for TokenKind { + fn eq(&self, other: &Self) -> bool { + std::mem::discriminant(self) == std::mem::discriminant(other) + } +} + +impl Eq for TokenKind { +} + +#[derive(Clone, Debug)] +pub struct Token(pub TokenKind, pub Span); + +fn t(tk: TokenKind, sp: Span) -> Result { + Ok(Token(tk, sp)) +} + +const TOKENIZER_LOOKAHEAD: usize = 2; + +type CharIter = crate::Window; + +pub struct Tokenizer> { + iter: CharIter, + column: usize, + row: usize, + finished: bool, +} + +impl> Tokenizer { + pub fn new(iter: I) -> Self { + Self { + iter: CharIter::new(iter), + column: 1, + row: 1, + finished: false, + } + } + + pub fn span(&self) -> Span { + Span { + column: self.column, + row: self.row, + } + } + + fn next_char(&mut self) -> Option { + match self.iter.next() { + Some(c) if c == '\n' => { + self.row += 1; + self.column = 1; + Some(c) }, - '=' => match input.peek() { - Some((_, '=')) => { - input.next(); - end += 1; - TokenType::DoubleEqual - }, - _ => TokenType::Equal, + Some(c) => { + self.column += 1; + Some(c) }, - '<' => match input.peek() { - Some((_, '=')) => { - input.next(); - end += 1; - TokenType::GreaterEqual + _ => None, + } + } + + fn delimited(&mut self, terminator: char) -> Option { + let mut buffer = String::new(); + let mut escape = false; + loop { + let c = match self.next_char() { + Some(c) if c == terminator && !escape => { + break; }, - _ => TokenType::Greater, - }, - '>' => match input.peek() { - Some((_, '=')) => { - input.next(); - end += 1; - TokenType::LessEqual - }, - _ => TokenType::Less, - }, - // Match keywords, identifiers, and literals - c if c.is_alphanumeric() => 'case: { - // Scan full word - while let Some((new_end, next)) = input.peek() { - if next.is_alphanumeric() || *next == '_' { - let _ = input.next(); + Some(c) => { + if c == '\\' { + escape = !escape; } else { - end = *new_end; + escape = false; + } + c + }, + None => return None, + }; + buffer.push(c) + } + Some(buffer) + } + + fn peek(&mut self, n: usize) -> Option { + self.iter.peek(n).clone() + } + + fn _next(&mut self) -> Result { + use TokenKind::*; + let position = Span { + row: self.row, + column: self.column, + }; + let current = match self.next_char() { + Some(c) => c, + None => return t(EOF, position), + }; + // Parse whitespace + if current.is_whitespace() { + let mut buffer = String::from(current); + while let Some(c) = self.peek(0) { + if !c.is_whitespace() { + break; + } + _ = self.next_char(); + buffer.push(c.clone()); + } + return t(Whitespace(buffer), position); + } + // Parse multiline comments + if let ('/', Some('*')) = (current, self.peek(0)) { + let _ = self.next_char(); + let mut comment_level = 1; + let mut buffer = String::new(); + while let Some(current) = self.next_char() { + // Ignore /* */ inside strings + if '\"' == current { + if let Some(inner_string) = self.delimited('\"') { + buffer.push('\"'); + buffer.push_str(&inner_string); + buffer.push('\"'); + continue; + } + } + if let ('/', Some('*')) = (current, self.peek(0)) { + comment_level += 1; + } else if let ('*', Some('/')) = (current, self.peek(0)) { + comment_level -= 1; + } + if comment_level == 0 { + let _ = self.next_char(); + break; + } + buffer.push(current); + } + return t(BigComment(buffer), position); + } + // Parse single line comments + if let ('/', Some('/')) = (current, self.peek(0)) { + let _ = self.next_char(); + let mut buffer = String::new(); + while let Some(c) = self.next_char() { + if c == '\n' { + break; + } + buffer.push(c); + } + return t(SmallComment(buffer), position); + } + let next = self.peek(0); + let next_next = self.peek(1); + // Match single character tokens + { + let not_next = move |c| Some(c) != next; + let kind = match current { + '(' => LeftParen, + ')' => RightParen, + '{' => LeftBrace, + '}' => RightBrace, + '[' => LeftSquare, + ']' => RightSquare, + ',' => Comma, + ':' => Colon, + ';' => Semicolon, + '|' => Pipe, + '&' => Ampersand, + '^' => Carrot, + '#' => Hash, + '.' if not_next('.') => Dot, + '+' if not_next('=') => Plus, + '-' if not_next('=') && not_next('>') => Minus, + '*' if not_next('=') => Star, + '/' if not_next('=') => Slash, + '%' if not_next('=') => Percent, + '!' if not_next('=') => Bang, + '?' if not_next('=') => Question, + '=' if not_next('=') && not_next('>') => Equal, + '<' if not_next('=') => Less, + '>' if not_next('=') => Greater, + _ => Idk, + }; + if kind != Idk { + return t(kind, position); + }; + } + // Match two character tokens + if let Some(next) = next { + let not_next_next = move |c| Some(c) != next_next; + let kind = match (current, next) { + ('.', '.') if not_next_next('=') => DotDot, + ('+', '=') => PlusEqual, + ('-', '=') => MinusEqual, + ('*', '=') => StarEqual, + ('/', '=') => SlashEqual, + ('%', '=') => PercentEqual, + ('=', '=') => DoubleEqual, + ('?', '=') => QuestionEqual, + ('!', '=') => BangEqual, + ('<', '=') => LessEqual, + ('>', '=') => GreaterEqual, + ('-', '>') => Arrow, + ('=', '>') => FatArrow, + _ => Idk, + }; + if kind != Idk { + let _ = self.next(); + return t(kind, position); + } + } + // Match three character tokens + if let (Some(next), Some(next_next)) = (next, next_next) { + let kind = match (current, next, next_next) { + ('.', '.', '=') => DotDotEqual, + _ => Idk, + }; + if kind != Idk { + let _ = self.next(); + let _ = self.next(); + return t(kind, position); + } + } + let mut buffer = String::new(); + // Match character + if current == '\'' { + let buffer = self + .delimited('\'') + .reason("Single quote (') was opened, but never closed") + .span(&position)?; + let baked = bake_string(&buffer)?; + if baked.len() != 1 { + return error() + .reason("Single quote (') contains more than one character") + .span(&position); + } + let kind = CharLiteral( + baked + .chars() + .next() + .reason("Single quote (') contains no characters") + .span(&position)?, + ); + return t(kind, position); + } + // Match string + if current == '"' { + let buffer = self + .delimited('\"') + .reason("Double quote (\") was opened, but never closed") + .span(&position)?; + let kind = StringLiteral(bake_string(&buffer)?); + return t(kind, position); + } + buffer.push(current); + // Match number + if current.is_ascii_digit() { + // Only one dot per number + let mut encountered_dot = false; + while let Some(c) = self.peek(0) { + if c == '.' && !encountered_dot { + if let Some('.') = self.peek(1) { break; } + encountered_dot = true; + } else if !(c == '_' || c == 'x' || c.is_ascii_hexdigit()) { + break; } - let word = &input_str[start..end]; - // Attempt to parse hex literal - if let Some(s) = - word.strip_prefix("0x").or_else(|| word.strip_prefix("0X")) - { - if let Ok(n) = u64::from_str_radix(s, 16) { - break 'case TokenType::Number(n as f64); - } else { - break 'case TokenType::Unrecognized; - } - } - // Attempt to parse binary literal - if let Some(s) = - word.strip_prefix("0b").or_else(|| word.strip_prefix("0B")) - { - if let Ok(n) = u64::from_str_radix(s, 2) { - break 'case TokenType::Number(n as f64); - } else { - break 'case TokenType::Unrecognized; - } - } - // Attempt to parse decimal literal - if let Ok(f) = word.parse::() { - break 'case TokenType::Number(f); - } - // Parse keyword or ident - match word { - "and" => TokenType::And, - "or" => TokenType::Or, - "self" => TokenType::Self_, - "struct" => TokenType::Struct, - "true" => TokenType::True, - "false" => TokenType::False, - "fn" => TokenType::Fn, - "if" => TokenType::If, - "else" => TokenType::Else, - "nil" => TokenType::Nil, - "print" => TokenType::Print, - "return" => TokenType::Return, - "super" => TokenType::Super, - "let" => TokenType::Let, - "while" => TokenType::While, - "for" => TokenType::For, - _ => TokenType::Ident, - } - }, - // Parse string - '"' => { - while let Some((new_end, next)) = input.next() { - match next { - '"' => { - end = new_end + 1; - break; - }, - // Skip escapes and deal with them later - '\\' => { - let _ = input.next(); - }, - _ => {}, - } - } - TokenType::String - }, - // Parse character - _ => TokenType::Unrecognized, - }; - tokens.push(Token { ttype, start, end }); + buffer.push(c); + let _ = self.next_char(); + } + return t(parse_number(&buffer).span(&position)?, position); + } + // Match keyword or identifier + while let Some(c) = self.peek(0) { + if c.is_alphanumeric() || c == '_' { + let _ = self.next_char(); + } else { + break; + } + buffer.push(c); + } + // Match keywords + { + let kind = match buffer.as_str() { + "if" => If, + "else" => Else, + "and" => And, + "or" => Or, + "xor" => Xor, + "nand" => Nand, + "nor" => Nor, + "xnor" => Xnor, + "for" => For, + "while" => While, + "print" => Print, + "break" => Break, + "return" => Return, + "continue" => Continue, + "not" => Not, + "true" => True, + "false" => False, + "struct" => Struct, + "enum" => Enum, + "union" => Union, + _ => Identifier(buffer), + }; + return t(kind, position); + } } - tokens +} + +impl> Iterator for Tokenizer { + type Item = Token; + + fn next(&mut self) -> Option { + loop { + match self._next() { + Ok(Token(TokenKind::EOF, span)) => { + if self.finished { + return None; + } else { + self.finished = true; + return Some(Token(TokenKind::EOF, span)); + } + }, + Ok(r) => return Some(r), + _ => {}, + }; + } + } +} + +fn parse_number(num: &str) -> Result { + use TokenKind::*; + let num = num.replace('_', ""); + // Floating point (only decimal) + if num.contains('.') { + num.parse::().map(|f| FloatLiteral(f)).coerce() + } + // Hex integer + else if let Some(hex) = num.strip_prefix("0x") { + i64::from_str_radix(hex, 16) + .map(|i| IntegerLiteral(i)) + .coerce() + } + // Octal integer + else if let Some(oct) = num.strip_prefix("0o") { + i64::from_str_radix(oct, 8) + .map(|i| IntegerLiteral(i)) + .coerce() + } + // Binary integer + else if let Some(bin) = num.strip_prefix("0b") { + i64::from_str_radix(bin, 2) + .map(|i| IntegerLiteral(i)) + .coerce() + } + // Decimal integer + else { + num.parse::().map(|i| IntegerLiteral(i)).coerce() + } +} + +fn bake_string(s: &str) -> Result { + let mut baked = String::with_capacity(s.len()); + let mut it = s.chars(); + loop { + match it.next() { + Some('\\') => baked.push(match it.next() { + Some('n') => '\n', // New line + Some('r') => '\r', // Carriage return + Some('t') => '\t', // Tab + Some('b') => '\x08', // Backspace + Some('\\') => '\\', // Backslash + Some('\0') => '\0', // Null + Some('"') => '\"', // Double quote + Some('\'') => '\'', // Single quote + Some('x') => { + // Ascii escapes + let mut a = || { + let a = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?; + let b = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?; + let num = (a << 4) | b; + char::from_u32(num) + }; + a().reason(format!("Found invalid ASCII (\\aXX) escape sequence"))? + }, + Some('u') => { + // Unicode escapes + let mut a = || { + let a = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?; + let b = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?; + let c = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?; + let d = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?; + let num = (a << 12) | (b << 8) | (c << 4) | d; + char::from_u32(num) + }; + a().reason("Found invalid Unicode (\\uXXXX) escape sequence")? + }, + _ => return Err(Diagnostic::new("Found invalid escape sequence")), + }), + // Unremarkable character + Some(c) => baked.push(c), + None => break, + } + } + Ok(baked) } diff --git a/src/treewalk.rs b/src/treewalk.rs new file mode 100644 index 0000000..a074f5b --- /dev/null +++ b/src/treewalk.rs @@ -0,0 +1,280 @@ +use crate::err::*; +use std::collections::HashMap; + +use crate::{ + Expression, ExpressionKind, Statement, StatementKind, Token, TokenKind, +}; + +#[derive(Debug, Clone)] +enum Value { + Integer(i64), + Real(f64), + String(String), + Boolean(bool), + Undefined, +} + +#[derive(Debug, Clone)] +struct Scope { + outer: Option>, + declarations: HashMap, +} + +impl Scope { + fn new() -> Self { + Self { + outer: None, + declarations: Default::default(), + } + } + + fn enscope(&mut self) -> &mut Self { + *self = Self { + outer: Some(Box::new(self.clone())), + declarations: HashMap::new(), + }; + self + } + + fn descope(&mut self) -> &mut Self { + if let Some(outer) = &self.outer { + *self = *outer.clone(); + } + self + } + + fn declare(&mut self, key: String) -> Result<()> { + if self.declarations.contains_key(&key) { + return error() + .reason(format!("Re-declaration of '{key}' in same scope")); + } + self.declarations.insert(key, Value::Undefined); + Ok(()) + } + + fn assign(&mut self, key: String, value: Value) -> Result<()> { + if !self.declarations.contains_key(&key) { + if let Some(outer) = &mut self.outer { + return outer.assign(key, value); + } + return error() + .reason(format!("Assignemnt to '{key}' before declaration")); + } + self.declarations.insert(key, value); + Ok(()) + } + + fn access(&self, key: String) -> Result { + match self.declarations.get(&key) { + Some(v) => Ok(v.clone()), + None => { + if let Some(outer) = &self.outer { + outer.access(key) + } else { + error().reason(format!("'{key}' was never declared")) + } + }, + } + } +} + +pub struct Interpreter> { + scope: Scope, + iter: I, +} + +impl> Interpreter { + pub fn new(iter: I) -> Self { + Self { + scope: Scope::new(), + iter, + } + } + + fn evaluate_unary( + &mut self, + token: TokenKind, + child: Expression, + ) -> Result { + use TokenKind as t; + use Value as v; + let val = self.evaluate(child)?; + Ok(match val { + v::Integer(i) => v::Integer(match token { + t::Plus => i, + t::Minus => -i, + _ => { + return error() + .reason(format!("Unary {token:?} is undefined for integers")); + }, + }), + v::Real(r) => v::Real(match token { + t::Plus => r, + t::Minus => -r, + _ => { + return error() + .reason(format!("Unary {token:?} is undefined for reals")); + }, + }), + v::Boolean(b) => v::Boolean(match token { + t::Not => !b, + _ => { + return error() + .reason(format!("Unary {token:?} is undefined for booleans")); + }, + }), + _ => { + return error() + .reason(format!("Binary {token:?} is undefined for {val:?}",)); + }, + }) + } + + fn evaluate_binary( + &mut self, + token: TokenKind, + left: Expression, + right: Expression, + ) -> Result { + use TokenKind as t; + use Value::*; + let left = self.evaluate(left)?; + let right = self.evaluate(right)?; + Ok(match (left.clone(), right.clone()) { + (Integer(l), Integer(r)) => match token { + t::Plus => Integer(l + r), + t::Minus => Integer(l - r), + t::Star => Integer(l * r), + t::Slash => Integer(l / r), + t::Percent => Integer(l % r), + t::DoubleEqual => Boolean(l == r), + t::Less => Boolean(l < r), + t::Greater => Boolean(l > r), + t::LessEqual => Boolean(l <= r), + t::GreaterEqual => Boolean(l >= r), + t => { + return error() + .reason(format!("Binary {t:?} is undefined for integers")); + }, + }, + (Real(l), Real(r)) => Real(match token { + t::Plus => l + r, + t::Minus => l - r, + t::Star => l * r, + t::Slash => l / r, + t => { + return error() + .reason(format!("Binary {t:?} is undefined for reals")); + }, + }), + _ => { + return error().reason(format!( + "Binary {:?} is undefined for {:?} and {:?}", + token, left, right + )); + }, + }) + } + + fn evaluate(&mut self, expr: Expression) -> Result { + use ExpressionKind as e; + match expr.kind { + e::Integer(i) => Ok(Value::Integer(i)), + e::Real(r) => Ok(Value::Real(r)), + e::String(s) => Ok(Value::String(s)), + e::Boolean(b) => Ok(Value::Boolean(b)), + e::Identifier(i) => self.scope.access(i), + e::Binary { token, left, right } => { + self.evaluate_binary(token, *left, *right) + }, + e::Unary { token, child } => self.evaluate_unary(token, *child), + e::Parenthesis(e) => self.evaluate(*e), + } + .span(&expr.span) + } + + pub fn execute(&mut self, statement: Statement) -> Result<()> { + use StatementKind as s; + match statement.kind { + s::Mutable { name, value, .. } => { + self.scope.declare(name.clone())?; + if let Some(value) = value { + let value = self.evaluate(value)?; + self.scope.assign(name, value)?; + } + }, + s::Immutable { name, value, .. } => { + self.scope.declare(name.clone())?; + let value = self.evaluate(value)?; + self.scope.assign(name, value)?; + }, + s::Assignment { name, value } => { + let span = value.span; + let value = self.evaluate(value).span(&span)?; + self.scope.assign(name, value).span(&span)?; + }, + s::Print(e) => { + let e = self.evaluate(e)?; + println!("{e:?}"); + }, + s::Expression(e) => { + self.evaluate(e)?; + }, + s::Block(block) => self.block(block)?, + s::If { + predicate, + block, + else_, + } => { + let span = predicate.span; + let value = self.evaluate(predicate)?; + if let Value::Boolean(b) = value { + if b { + self.block(block)?; + } + } else { + return error() + .reason("Predicate for 'if' statement must be a boolean") + .span(&span); + } + }, + s::While { predicate, block } => { + let span = predicate.span; + loop { + match self.evaluate(predicate.clone())? { + Value::Boolean(true) => self.block(block.clone())?, + Value::Boolean(false) => break, + _ => { + return error() + .reason("Predicate for 'while' statement must be a boolean") + .span(&span); + }, + } + } + }, + } + Ok(()) + } + + fn block(&mut self, block: Vec) -> Result<()> { + self.scope.enscope(); + for s in block.into_iter() { + let span = s.span; + self.execute(s).span(&span)?; + } + self.scope.descope(); + Ok(()) + } + + pub fn run(&mut self) -> Result<()> { + loop { + let next = match self.iter.next() { + Some(n) => n, + None => break, + }; + let span = next.span; + self.execute(next).span(&span)?; + } + Ok(()) + } +}