This commit is contained in:
Logan 2024-10-15 11:20:35 -05:00
parent b55135245d
commit 7a33804ffe
7 changed files with 1486 additions and 181 deletions

6
demo.lang Normal file
View file

@ -0,0 +1,6 @@
i := 0;
while i < 10 {
i = i + 1;
print i;
}

151
src/err.rs Normal file
View file

@ -0,0 +1,151 @@
use crate::Span;
pub type Result<T> = std::result::Result<T, Diagnostic>;
pub fn error<T>() -> Result<T> {
Err(Diagnostic::new(""))
}
#[derive(Clone)]
pub struct Diagnostic {
reason: String,
span: Option<Span>,
backtrace: Vec<String>,
}
impl Diagnostic {
pub fn new(reason: impl Into<String>) -> Self {
Self {
reason: reason.into(),
span: None,
backtrace: vec![],
}
}
}
impl std::fmt::Display for Diagnostic {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(span) = self.span.as_ref() {
write!(f, "({}:{}) {}\n", span.row, span.column, self.reason)?;
} else {
write!(f, "(E) {}\n", self.reason)?;
}
for b in self.backtrace.iter().rev() {
write!(f, "--> {}\n", b)?;
}
Ok(())
}
}
impl std::fmt::Debug for Diagnostic {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{self}")
}
}
impl From<std::io::Error> for Diagnostic {
fn from(value: std::io::Error) -> Self {
Self {
reason: format!("{}", value),
span: None,
backtrace: vec![],
}
}
}
impl From<std::num::ParseIntError> for Diagnostic {
fn from(value: std::num::ParseIntError) -> Self {
use std::num::IntErrorKind::*;
match value.kind() {
PosOverflow | NegOverflow => {
Diagnostic::new("Integer value is too large to represent")
},
InvalidDigit => Diagnostic::new("Integer value containts invalid digits"),
_ => Diagnostic::new("Integer value could not be parsed"),
}
}
}
impl From<std::num::ParseFloatError> for Diagnostic {
fn from(_value: std::num::ParseFloatError) -> Self {
Diagnostic::new("Float value could not be parsed")
}
}
pub trait IntoDiagnostic<T, S: Into<String>> {
fn reason(self, s: S) -> Result<T>;
fn trace(self, s: S) -> Result<T>;
}
pub trait WithSpan<T> {
fn span(self, span: &Span) -> Result<T>;
fn no_span(self) -> Result<T>;
}
impl<T> WithSpan<T> for Result<T> {
fn span(self, span: &Span) -> Result<T> {
self.map_err(|mut e| {
e.span = e.span.or(Some(span.clone()));
e
})
}
fn no_span(self) -> Result<T> {
self.map_err(|mut e| {
e.span = None;
e
})
}
}
pub trait CoerceDiagnostic<T> {
fn coerce(self) -> Result<T>;
}
impl<T, S: Into<String>> IntoDiagnostic<T, S> for Option<T> {
fn reason(self, s: S) -> Result<T> {
match self {
Some(t) => Ok(t),
None => Err(Diagnostic {
reason: s.into(),
span: None,
backtrace: vec![],
}),
}
}
fn trace(self, s: S) -> Result<T> {
match self {
Some(t) => Ok(t),
None => Err(Diagnostic {
reason: "".into(),
span: None,
backtrace: vec![s.into()],
}),
}
}
}
impl<T, E: Into<Diagnostic>, S: Into<String>> IntoDiagnostic<T, S>
for std::result::Result<T, E>
{
fn reason(self, s: S) -> Result<T> {
self.map_err(|e| e.into()).map_err(|mut e| {
e.reason = s.into();
e
})
}
fn trace(self, s: S) -> Result<T> {
self.map_err(|e| e.into()).map_err(|mut e| {
e.backtrace.push(s.into());
e
})
}
}
impl<T, E: Into<Diagnostic>> CoerceDiagnostic<T> for std::result::Result<T, E> {
fn coerce(self) -> Result<T> {
self.map_err(|e| e.into())
}
}

84
src/lookahead.rs Normal file
View file

@ -0,0 +1,84 @@
pub struct Window<const N: usize, T, I>
where
I: Iterator<Item = T>,
{
iterator: I,
buffer: [Option<T>; N],
exhausted: bool,
pub finished: bool,
}
impl<const N: usize, T, I> std::fmt::Debug for Window<N, T, I>
where
I: Iterator<Item = T>,
T: std::fmt::Debug,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.buffer)
}
}
impl<const N: usize, T, I> Window<N, T, I>
where
I: Iterator<Item = T>,
{
pub fn new(mut it: I) -> Self {
assert!(N > 0, "Lookahead buffer cannot be 0 sized");
let mut s = Self {
buffer: std::array::from_fn(|_| it.next()),
iterator: it,
exhausted: false,
finished: false,
};
s.normalize();
s
}
pub fn inner(&self) -> &I {
&self.iterator
}
fn normalize(&mut self) {
for item in &mut self.buffer {
if self.exhausted {
*item = None;
} else if let None = item {
self.exhausted = true;
}
}
}
pub fn peek(&self, n: usize) -> &Option<T> {
debug_assert!(n < N, "Peeked further than buffer allows");
&self.buffer[n]
}
fn _advance(&mut self) {
for i in 1..N {
self.buffer[i - 1] = self.buffer[i].take();
}
self.buffer[N - 1] = match self.iterator.next() {
Some(i) if !self.exhausted => Some(i),
_ => {
self.exhausted = true;
None
},
};
}
}
impl<const N: usize, T, I> Iterator for Window<N, T, I>
where
I: Iterator<Item = T>,
{
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
let r = self.buffer[0].take();
if let None = r {
self.finished = true;
}
self._advance();
r
}
}

View file

@ -1,18 +1,57 @@
mod err;
mod lookahead;
mod parse;
mod token; mod token;
mod treewalk;
use std::ops::Add;
use lookahead::*;
use parse::*;
use token::*;
use treewalk::Interpreter;
#[derive(Clone, Copy, Debug)]
pub struct Span {
pub row: usize,
pub column: usize,
}
impl Add<Span> for Span {
type Output = Span;
fn add(self, rhs: Span) -> Self::Output {
Span {
row: usize::min(self.row, rhs.row),
column: usize::max(self.column, rhs.column),
}
}
}
fn test_tokenization() {
let test_str = r#"
( ) [ ] { } . .. , :
; + - * / -> => += -=
*= /= ! != = == <= >=
? ?= < > literal 10
0x10 0b10 10.0 1.0..2.0
2.0..=3.0 if else and
or xor not nand nor xnor
print break for while true
false "\u263b" '\x30'
"#;
let mut parser = Tokenizer::new(test_str.chars());
while let Some(tok) = parser.next() {
println!("{tok:?}");
}
}
fn main() { fn main() {
repl(); let src = include_str!("../demo.lang");
} println!("{src}");
let tokens: Vec<_> = Tokenizer::new(src.chars())
pub fn repl() { .filter(|t| t.0.is_meaningful())
let mut buffer = String::new(); .collect();
let stdin = std::io::stdin(); let parsed = Parser::new(tokens.into_iter()).file().unwrap();
loop { let mut interp = Interpreter::new(parsed.into_iter());
stdin.read_line(&mut buffer).unwrap(); interp.run().unwrap();
let tokens = token::tokenize(&buffer);
for tok in tokens {
println!("{} : {:?}", &buffer[tok.start..tok.end], tok.ttype);
}
buffer = String::new();
}
} }

453
src/parse.rs Normal file
View file

@ -0,0 +1,453 @@
use crate::err::*;
use crate::{Span, Token, TokenKind};
#[derive(Clone)]
pub enum ExpressionKind {
Integer(i64),
Real(f64),
String(String),
Boolean(bool),
Identifier(String),
Binary {
token: TokenKind,
left: Box<Expression>,
right: Box<Expression>,
},
Unary {
token: TokenKind,
child: Box<Expression>,
},
Parenthesis(Box<Expression>),
}
#[derive(Clone)]
pub struct Expression {
pub kind: ExpressionKind,
pub span: Span,
}
impl Expression {
pub fn new(kind: ExpressionKind, span: Span) -> Self {
Self { kind, span }
}
}
impl std::fmt::Debug for ExpressionKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use ExpressionKind as e;
match self {
e::Integer(i) => write!(f, "{i}"),
e::Binary { token, left, right } => {
write!(f, "({left:?} {token:?} {right:?})")
},
e::Parenthesis(inner) => write!(f, "{inner:?}"),
e::Unary { token, child } => {
write!(f, "({token:?} {child:?})")
},
e::Real(fp) => write!(f, "{fp}"),
e::String(s) => write!(f, r#""{s}""#),
e::Identifier(i) => write!(f, "{i}"),
e::Boolean(b) => write!(f, "{b}"),
}
}
}
impl std::fmt::Debug for Expression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.kind)
}
}
#[derive(Debug, Clone)]
pub enum StatementKind {
Mutable {
name: String,
type_: Option<String>,
value: Option<Expression>,
},
Immutable {
name: String,
type_: Option<String>,
value: Expression,
},
Assignment {
name: String,
value: Expression,
},
If {
predicate: Expression,
block: Vec<Statement>,
else_: Option<Box<Statement>>,
},
While {
predicate: Expression,
block: Vec<Statement>,
},
Print(Expression),
Expression(Expression),
Block(Vec<Statement>),
}
#[derive(Debug, Clone)]
pub struct Statement {
pub kind: StatementKind,
pub span: Span,
}
pub type Precedence = usize;
fn binary_prec(tok: &TokenKind) -> Result<(Precedence, bool)> {
use TokenKind::*;
Ok(match tok {
Star | Slash | Percent => (10, false),
Plus | Minus => (9, false),
And | Nand => (8, false),
Xor | Xnor => (7, false),
Or | Nor => (6, false),
DoubleEqual | BangEqual | Less | LessEqual | Greater | GreaterEqual => {
(5, false)
},
//Colon => Some((5, false)),
_ => {
return error()
.reason(format!("{:?} is not a valid binary operator", tok));
},
})
}
fn unary_prefix_prec(tok: &TokenKind) -> Result<Precedence> {
use TokenKind::*;
Ok(match tok {
Minus | Not => 11,
Break => 3,
_ => {
return error()
.reason(format!("{tok:?} is not a valid prefix unary operator"));
},
})
}
fn unary_postfix_prec(tok: &TokenKind) -> Result<Precedence> {
use TokenKind::*;
Ok(match tok {
Question => 12,
Bang => 12,
_ => {
return error()
.reason(format!("{tok:?} is not a valid postfix unary operator"));
},
})
}
const PARSER_LOOKAHEAD: usize = 3;
type TokenIter<I> = crate::Window<PARSER_LOOKAHEAD, Token, I>;
pub struct Parser<I: Iterator<Item = Token>> {
iter: TokenIter<I>,
}
impl<I: Iterator<Item = Token>> Parser<I> {
pub fn new(iter: I) -> Self {
Self {
iter: TokenIter::new(iter),
}
}
fn skip(&mut self, n: usize) {
for _ in 0..n {
let _ = self.next();
}
}
fn next(&mut self) -> Result<Token> {
self.iter.next().reason("Unexpected end of file")
}
fn peek(&self, n: usize) -> Result<Token> {
self.iter.peek(n).clone().reason("Unexpected end of file")
}
fn eat(&mut self, expect: TokenKind) -> Result<Token> {
match self.look(expect) {
Ok(t) => {
self.skip(1);
Ok(t)
},
Err(e) => Err(e),
}
}
fn look(&mut self, expect: TokenKind) -> Result<Token> {
let next = self.peek(0)?;
if next.0 == expect {
Ok(next)
} else {
error()
.reason(format!("Expected {expect:?}, found {:?}", next.0))
.span(&next.1)
}
}
pub fn file(&mut self) -> Result<Vec<Statement>> {
use TokenKind as t;
let mut statements = vec![];
loop {
// Trim extra ;
while self.eat(t::Semicolon).is_ok() {}
if self.eat(t::EOF).is_ok() {
return Ok(statements);
}
match self.statement() {
Ok(s) => statements.push(s),
Err(e) => {
return Err(e);
},
}
}
}
pub fn statement(&mut self) -> Result<Statement> {
use StatementKind as s;
use TokenKind as t;
let next = self.peek(0);
let next2 = self.peek(1);
let statement = match (next, next2) {
// (im)mutable declaration
(Ok(Token(t::Identifier(name), span)), Ok(Token(t::Colon, _))) => {
self.skip(2);
let type_ = match self.eat(t::Identifier("".into())) {
Ok(Token(t::Identifier(s), _)) => Some(s),
_ => None,
};
match self.eat(t::Equal).or_else(|_| self.eat(t::Colon)) {
Ok(Token(t::Colon, _)) => Statement {
kind: s::Immutable {
name,
type_,
value: self
.expression(0)
.trace("while parsing immutable declaration")?,
},
span,
},
Ok(Token(t::Equal, _)) => Statement {
kind: s::Mutable {
name,
type_,
value: Some(
self
.expression(0)
.trace("while parsing mutable declaration")?,
),
},
span,
},
_ => return error().reason("Expected expression here"),
}
},
(Ok(Token(t::Identifier(name), span)), Ok(Token(t::Equal, _))) => {
self.skip(2);
let value = self
.expression(0)
.trace("while parsing assignment expression")?;
Statement {
kind: s::Assignment { name, value },
span,
}
},
// If
(Ok(Token(t::If, span)), _) => {
self.skip(1);
let predicate = self
.expression(0)
.reason("Expected predicate after 'if' keyword")
.span(&span)?;
let block = self.block().trace("while parsing if statement")?;
return Ok(Statement {
span,
kind: s::If {
predicate,
block: block.0,
else_: None,
},
});
},
// While
(Ok(Token(t::While, span)), _) => {
self.skip(1);
let predicate = self
.expression(0)
.reason("Expected predicate after 'while' keyword")
.span(&span)?;
let block = self.block().trace("while parsing while statement")?;
return Ok(Statement {
span,
kind: s::While {
predicate,
block: block.0,
},
});
},
// (DEBUG) print
(Ok(Token(t::Print, span)), _) => {
self.skip(1);
let expr = self.expression(0).trace("while parsing print statement")?;
Statement {
span: span + expr.span,
kind: s::Print(expr),
}
},
// Block
(Ok(Token(t::LeftBrace, _)), _) => {
// Skip check for semicolon
let (block, span) =
self.block().trace("while parsing block statement")?;
return Ok(Statement {
kind: s::Block(block),
span,
});
},
// Expression
_ => {
let expr = self
.expression(0)
.trace("while parsing expression statement")?;
Statement {
span: expr.span,
kind: s::Expression(expr),
}
},
};
// Check for semicolon
if self.eat(t::Semicolon).is_ok() {
Ok(statement)
} else {
error().reason("Expected ;")
}
}
pub fn expression(
&mut self,
mut precedence: Precedence,
) -> Result<Expression> {
use ExpressionKind as e;
use TokenKind as t;
let next = self.peek(0)?;
// Unary prefix expression
let mut current = if let Ok(p) = unary_prefix_prec(&next.0) {
let operator = self.next().expect("unreachable");
let child = self
.expression(p)
.trace(format!("while parsing unary {:?}", operator.0))
.span(&operator.1)?;
let span = child.span + operator.1;
Expression::new(
e::Unary {
token: operator.0,
child: child.into(),
},
span,
)
}
// Terminal or paren
else {
self.primary()?
};
// Precedence climbing loop
while let Ok(next) = self.peek(0) {
// Binary infix
if let Ok((new_precedence, left_assoc)) = binary_prec(&next.0) {
if (!left_assoc && new_precedence <= precedence)
|| (new_precedence < precedence)
{
return Ok(current);
}
let operator = self.next().expect("unreachable");
let rhs = self
.expression(new_precedence)
.trace(format!("while parsing binary {:?}", operator.0))
.span(&operator.1)?;
let span = next.1 + rhs.span;
current = Expression::new(
e::Binary {
token: operator.0,
left: current.into(),
right: rhs.into(),
},
span,
);
}
// Unary postfix
else if let Ok(new_precedence) = unary_postfix_prec(&next.0) {
let operator = self.next().expect("unreachable");
let span = next.1 + operator.1;
precedence = new_precedence;
current = Expression::new(
e::Unary {
token: operator.0,
child: current.into(),
},
span,
);
} else {
break;
}
}
Ok(current)
}
fn primary(&mut self) -> Result<Expression> {
use ExpressionKind as e;
use TokenKind as t;
let next = self.peek(0)?;
let span = next.1;
let kind = match next.0 {
t::IntegerLiteral(i) => e::Integer(i),
t::FloatLiteral(f) => e::Real(f),
t::StringLiteral(s) => e::String(s),
t::True => e::Boolean(true),
t::Identifier(i) => e::Identifier(i),
t::LeftParen => {
self.eat(t::LeftParen).expect("unreachable");
let expr = self
.expression(0)
.trace("while parsing parenthesized expression")?;
self
.look(t::RightParen)
.reason("Unclosed '('")
.span(&expr.span)?;
e::Parenthesis(expr.into())
},
_ => {
return error()
.span(&span)
.reason(format!("Expected primary, found {:?}", next.0));
},
};
self.skip(1);
Ok(Expression { kind, span })
}
fn block(&mut self) -> Result<(Vec<Statement>, Span)> {
use TokenKind as t;
let mut span = self.eat(t::LeftBrace).reason("Expected block")?.1;
let mut statements = vec![];
loop {
let next = self.peek(0)?;
span = span + next.1;
match self.eat(t::RightBrace) {
Ok(t) => {
span = span + t.1;
break;
},
_ => {
let statement = self.statement()?;
span = span + statement.span;
statements.push(statement);
},
};
}
Ok((statements, span))
}
}

View file

@ -1,22 +1,38 @@
use crate::err::*;
use crate::Span;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum TokenType { pub enum TokenKind {
// Symbols
LeftParen, LeftParen,
RightParen, RightParen,
LeftSquare,
RightSquare,
LeftBrace, LeftBrace,
RightBrace, RightBrace,
LeftSquare,
RightSquare,
Comma, Comma,
Colon,
Semicolon,
Dot, Dot,
DotDot,
Plus, Plus,
Minus, Minus,
Star,
Slash, Slash,
Semicolon, Star,
Percent,
Arrow,
FatArrow,
PlusEqual,
MinusEqual,
SlashEqual,
StarEqual,
PercentEqual,
Bang, Bang,
BangEqual, BangEqual,
Question,
QuestionEqual,
Equal, Equal,
DoubleEqual, DoubleEqual,
Greater, Greater,
@ -24,187 +40,463 @@ pub enum TokenType {
Less, Less,
LessEqual, LessEqual,
// Literals Pipe,
String, Ampersand,
Character, Carrot,
Number(f64), Hash,
DotDotEqual,
Identifier(String),
StringLiteral(String),
CharLiteral(char),
IntegerLiteral(i64),
FloatLiteral(f64),
// Words
Ident,
And,
Or,
Self_,
Struct,
True,
False,
Fn,
If, If,
Else, Else,
Nil, And,
Or,
Xor,
Not,
Nand,
Nor,
Xnor,
Print, Print,
Break,
Return, Return,
Super, Continue,
Let,
While,
For, For,
While,
True,
False,
Struct,
Enum,
Union,
// Special Whitespace(String),
Unrecognized, SmallComment(String),
TooLong, BigComment(String),
Idk,
EOF,
} }
/// Type, index impl TokenKind {
#[derive(Debug, Clone)] pub fn is_meaningful(&self) -> bool {
pub struct Token { match self {
pub ttype: TokenType, Self::Whitespace(_)
pub start: usize, | Self::SmallComment(_)
pub end: usize, | Self::BigComment(_)
} | Self::Idk => false,
_ => true,
pub fn tokenize(input: &str) -> Vec<Token> {
let input_str = input;
let mut input = input.char_indices().peekable();
let mut tokens = vec![];
'outer: loop {
// Find next non-whitespace line
let (start, c) = 'ws: loop {
match input.next() {
// Stop at end of input
None => break 'outer,
Some((index, character)) if !character.is_whitespace() => {
break 'ws (index, character)
},
_ => {},
} }
}
}
impl PartialEq for TokenKind {
fn eq(&self, other: &Self) -> bool {
std::mem::discriminant(self) == std::mem::discriminant(other)
}
}
impl Eq for TokenKind {
}
#[derive(Clone, Debug)]
pub struct Token(pub TokenKind, pub Span);
fn t(tk: TokenKind, sp: Span) -> Result<Token> {
Ok(Token(tk, sp))
}
const TOKENIZER_LOOKAHEAD: usize = 2;
type CharIter<I> = crate::Window<TOKENIZER_LOOKAHEAD, char, I>;
pub struct Tokenizer<I: Iterator<Item = char>> {
iter: CharIter<I>,
column: usize,
row: usize,
finished: bool,
}
impl<I: Iterator<Item = char>> Tokenizer<I> {
pub fn new(iter: I) -> Self {
Self {
iter: CharIter::new(iter),
column: 1,
row: 1,
finished: false,
}
}
pub fn span(&self) -> Span {
Span {
column: self.column,
row: self.row,
}
}
fn next_char(&mut self) -> Option<char> {
match self.iter.next() {
Some(c) if c == '\n' => {
self.row += 1;
self.column = 1;
Some(c)
},
Some(c) => {
self.column += 1;
Some(c)
},
_ => None,
}
}
fn delimited(&mut self, terminator: char) -> Option<String> {
let mut buffer = String::new();
let mut escape = false;
loop {
let c = match self.next_char() {
Some(c) if c == terminator && !escape => {
break;
},
Some(c) => {
if c == '\\' {
escape = !escape;
} else {
escape = false;
}
c
},
None => return None,
}; };
let mut end = start + 1; buffer.push(c)
let mut advance = || {}; }
let ttype = match c { Some(buffer)
}
fn peek(&mut self, n: usize) -> Option<char> {
self.iter.peek(n).clone()
}
fn _next(&mut self) -> Result<Token> {
use TokenKind::*;
let position = Span {
row: self.row,
column: self.column,
};
let current = match self.next_char() {
Some(c) => c,
None => return t(EOF, position),
};
// Parse whitespace
if current.is_whitespace() {
let mut buffer = String::from(current);
while let Some(c) = self.peek(0) {
if !c.is_whitespace() {
break;
}
_ = self.next_char();
buffer.push(c.clone());
}
return t(Whitespace(buffer), position);
}
// Parse multiline comments
if let ('/', Some('*')) = (current, self.peek(0)) {
let _ = self.next_char();
let mut comment_level = 1;
let mut buffer = String::new();
while let Some(current) = self.next_char() {
// Ignore /* */ inside strings
if '\"' == current {
if let Some(inner_string) = self.delimited('\"') {
buffer.push('\"');
buffer.push_str(&inner_string);
buffer.push('\"');
continue;
}
}
if let ('/', Some('*')) = (current, self.peek(0)) {
comment_level += 1;
} else if let ('*', Some('/')) = (current, self.peek(0)) {
comment_level -= 1;
}
if comment_level == 0 {
let _ = self.next_char();
break;
}
buffer.push(current);
}
return t(BigComment(buffer), position);
}
// Parse single line comments
if let ('/', Some('/')) = (current, self.peek(0)) {
let _ = self.next_char();
let mut buffer = String::new();
while let Some(c) = self.next_char() {
if c == '\n' {
break;
}
buffer.push(c);
}
return t(SmallComment(buffer), position);
}
let next = self.peek(0);
let next_next = self.peek(1);
// Match single character tokens // Match single character tokens
'(' => TokenType::LeftParen,
')' => TokenType::RightParen,
'[' => TokenType::LeftSquare,
']' => TokenType::RightSquare,
'{' => TokenType::LeftBrace,
'}' => TokenType::RightBrace,
',' => TokenType::Comma,
'.' => TokenType::Dot,
'+' => TokenType::Plus,
'-' => TokenType::Minus,
'*' => TokenType::Star,
'/' => TokenType::Slash,
';' => TokenType::Semicolon,
// Match multicharacter tokens
'!' => match input.peek() {
Some((_, '=')) => {
input.next();
end += 1;
TokenType::BangEqual
},
_ => TokenType::Bang,
},
'=' => match input.peek() {
Some((_, '=')) => {
input.next();
end += 1;
TokenType::DoubleEqual
},
_ => TokenType::Equal,
},
'<' => match input.peek() {
Some((_, '=')) => {
input.next();
end += 1;
TokenType::GreaterEqual
},
_ => TokenType::Greater,
},
'>' => match input.peek() {
Some((_, '=')) => {
input.next();
end += 1;
TokenType::LessEqual
},
_ => TokenType::Less,
},
// Match keywords, identifiers, and literals
c if c.is_alphanumeric() => 'case: {
// Scan full word
while let Some((new_end, next)) = input.peek() {
if next.is_alphanumeric() || *next == '_' {
let _ = input.next();
} else {
end = *new_end;
break;
}
}
let word = &input_str[start..end];
// Attempt to parse hex literal
if let Some(s) =
word.strip_prefix("0x").or_else(|| word.strip_prefix("0X"))
{ {
if let Ok(n) = u64::from_str_radix(s, 16) { let not_next = move |c| Some(c) != next;
break 'case TokenType::Number(n as f64); let kind = match current {
} else { '(' => LeftParen,
break 'case TokenType::Unrecognized; ')' => RightParen,
} '{' => LeftBrace,
} '}' => RightBrace,
// Attempt to parse binary literal '[' => LeftSquare,
if let Some(s) = ']' => RightSquare,
word.strip_prefix("0b").or_else(|| word.strip_prefix("0B")) ',' => Comma,
{ ':' => Colon,
if let Ok(n) = u64::from_str_radix(s, 2) { ';' => Semicolon,
break 'case TokenType::Number(n as f64); '|' => Pipe,
} else { '&' => Ampersand,
break 'case TokenType::Unrecognized; '^' => Carrot,
} '#' => Hash,
} '.' if not_next('.') => Dot,
// Attempt to parse decimal literal '+' if not_next('=') => Plus,
if let Ok(f) = word.parse::<f64>() { '-' if not_next('=') && not_next('>') => Minus,
break 'case TokenType::Number(f); '*' if not_next('=') => Star,
} '/' if not_next('=') => Slash,
// Parse keyword or ident '%' if not_next('=') => Percent,
match word { '!' if not_next('=') => Bang,
"and" => TokenType::And, '?' if not_next('=') => Question,
"or" => TokenType::Or, '=' if not_next('=') && not_next('>') => Equal,
"self" => TokenType::Self_, '<' if not_next('=') => Less,
"struct" => TokenType::Struct, '>' if not_next('=') => Greater,
"true" => TokenType::True, _ => Idk,
"false" => TokenType::False, };
"fn" => TokenType::Fn, if kind != Idk {
"if" => TokenType::If, return t(kind, position);
"else" => TokenType::Else,
"nil" => TokenType::Nil,
"print" => TokenType::Print,
"return" => TokenType::Return,
"super" => TokenType::Super,
"let" => TokenType::Let,
"while" => TokenType::While,
"for" => TokenType::For,
_ => TokenType::Ident,
}
},
// Parse string
'"' => {
while let Some((new_end, next)) = input.next() {
match next {
'"' => {
end = new_end + 1;
break;
},
// Skip escapes and deal with them later
'\\' => {
let _ = input.next();
},
_ => {},
}
}
TokenType::String
},
// Parse character
_ => TokenType::Unrecognized,
}; };
tokens.push(Token { ttype, start, end });
} }
tokens // Match two character tokens
if let Some(next) = next {
let not_next_next = move |c| Some(c) != next_next;
let kind = match (current, next) {
('.', '.') if not_next_next('=') => DotDot,
('+', '=') => PlusEqual,
('-', '=') => MinusEqual,
('*', '=') => StarEqual,
('/', '=') => SlashEqual,
('%', '=') => PercentEqual,
('=', '=') => DoubleEqual,
('?', '=') => QuestionEqual,
('!', '=') => BangEqual,
('<', '=') => LessEqual,
('>', '=') => GreaterEqual,
('-', '>') => Arrow,
('=', '>') => FatArrow,
_ => Idk,
};
if kind != Idk {
let _ = self.next();
return t(kind, position);
}
}
// Match three character tokens
if let (Some(next), Some(next_next)) = (next, next_next) {
let kind = match (current, next, next_next) {
('.', '.', '=') => DotDotEqual,
_ => Idk,
};
if kind != Idk {
let _ = self.next();
let _ = self.next();
return t(kind, position);
}
}
let mut buffer = String::new();
// Match character
if current == '\'' {
let buffer = self
.delimited('\'')
.reason("Single quote (') was opened, but never closed")
.span(&position)?;
let baked = bake_string(&buffer)?;
if baked.len() != 1 {
return error()
.reason("Single quote (') contains more than one character")
.span(&position);
}
let kind = CharLiteral(
baked
.chars()
.next()
.reason("Single quote (') contains no characters")
.span(&position)?,
);
return t(kind, position);
}
// Match string
if current == '"' {
let buffer = self
.delimited('\"')
.reason("Double quote (\") was opened, but never closed")
.span(&position)?;
let kind = StringLiteral(bake_string(&buffer)?);
return t(kind, position);
}
buffer.push(current);
// Match number
if current.is_ascii_digit() {
// Only one dot per number
let mut encountered_dot = false;
while let Some(c) = self.peek(0) {
if c == '.' && !encountered_dot {
if let Some('.') = self.peek(1) {
break;
}
encountered_dot = true;
} else if !(c == '_' || c == 'x' || c.is_ascii_hexdigit()) {
break;
}
buffer.push(c);
let _ = self.next_char();
}
return t(parse_number(&buffer).span(&position)?, position);
}
// Match keyword or identifier
while let Some(c) = self.peek(0) {
if c.is_alphanumeric() || c == '_' {
let _ = self.next_char();
} else {
break;
}
buffer.push(c);
}
// Match keywords
{
let kind = match buffer.as_str() {
"if" => If,
"else" => Else,
"and" => And,
"or" => Or,
"xor" => Xor,
"nand" => Nand,
"nor" => Nor,
"xnor" => Xnor,
"for" => For,
"while" => While,
"print" => Print,
"break" => Break,
"return" => Return,
"continue" => Continue,
"not" => Not,
"true" => True,
"false" => False,
"struct" => Struct,
"enum" => Enum,
"union" => Union,
_ => Identifier(buffer),
};
return t(kind, position);
}
}
}
impl<I: Iterator<Item = char>> Iterator for Tokenizer<I> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
loop {
match self._next() {
Ok(Token(TokenKind::EOF, span)) => {
if self.finished {
return None;
} else {
self.finished = true;
return Some(Token(TokenKind::EOF, span));
}
},
Ok(r) => return Some(r),
_ => {},
};
}
}
}
fn parse_number(num: &str) -> Result<TokenKind> {
use TokenKind::*;
let num = num.replace('_', "");
// Floating point (only decimal)
if num.contains('.') {
num.parse::<f64>().map(|f| FloatLiteral(f)).coerce()
}
// Hex integer
else if let Some(hex) = num.strip_prefix("0x") {
i64::from_str_radix(hex, 16)
.map(|i| IntegerLiteral(i))
.coerce()
}
// Octal integer
else if let Some(oct) = num.strip_prefix("0o") {
i64::from_str_radix(oct, 8)
.map(|i| IntegerLiteral(i))
.coerce()
}
// Binary integer
else if let Some(bin) = num.strip_prefix("0b") {
i64::from_str_radix(bin, 2)
.map(|i| IntegerLiteral(i))
.coerce()
}
// Decimal integer
else {
num.parse::<i64>().map(|i| IntegerLiteral(i)).coerce()
}
}
fn bake_string(s: &str) -> Result<String> {
let mut baked = String::with_capacity(s.len());
let mut it = s.chars();
loop {
match it.next() {
Some('\\') => baked.push(match it.next() {
Some('n') => '\n', // New line
Some('r') => '\r', // Carriage return
Some('t') => '\t', // Tab
Some('b') => '\x08', // Backspace
Some('\\') => '\\', // Backslash
Some('\0') => '\0', // Null
Some('"') => '\"', // Double quote
Some('\'') => '\'', // Single quote
Some('x') => {
// Ascii escapes
let mut a = || {
let a = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let b = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let num = (a << 4) | b;
char::from_u32(num)
};
a().reason(format!("Found invalid ASCII (\\aXX) escape sequence"))?
},
Some('u') => {
// Unicode escapes
let mut a = || {
let a = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let b = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let c = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let d = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let num = (a << 12) | (b << 8) | (c << 4) | d;
char::from_u32(num)
};
a().reason("Found invalid Unicode (\\uXXXX) escape sequence")?
},
_ => return Err(Diagnostic::new("Found invalid escape sequence")),
}),
// Unremarkable character
Some(c) => baked.push(c),
None => break,
}
}
Ok(baked)
} }

280
src/treewalk.rs Normal file
View file

@ -0,0 +1,280 @@
use crate::err::*;
use std::collections::HashMap;
use crate::{
Expression, ExpressionKind, Statement, StatementKind, Token, TokenKind,
};
#[derive(Debug, Clone)]
enum Value {
Integer(i64),
Real(f64),
String(String),
Boolean(bool),
Undefined,
}
#[derive(Debug, Clone)]
struct Scope {
outer: Option<Box<Scope>>,
declarations: HashMap<String, Value>,
}
impl Scope {
fn new() -> Self {
Self {
outer: None,
declarations: Default::default(),
}
}
fn enscope(&mut self) -> &mut Self {
*self = Self {
outer: Some(Box::new(self.clone())),
declarations: HashMap::new(),
};
self
}
fn descope(&mut self) -> &mut Self {
if let Some(outer) = &self.outer {
*self = *outer.clone();
}
self
}
fn declare(&mut self, key: String) -> Result<()> {
if self.declarations.contains_key(&key) {
return error()
.reason(format!("Re-declaration of '{key}' in same scope"));
}
self.declarations.insert(key, Value::Undefined);
Ok(())
}
fn assign(&mut self, key: String, value: Value) -> Result<()> {
if !self.declarations.contains_key(&key) {
if let Some(outer) = &mut self.outer {
return outer.assign(key, value);
}
return error()
.reason(format!("Assignemnt to '{key}' before declaration"));
}
self.declarations.insert(key, value);
Ok(())
}
fn access(&self, key: String) -> Result<Value> {
match self.declarations.get(&key) {
Some(v) => Ok(v.clone()),
None => {
if let Some(outer) = &self.outer {
outer.access(key)
} else {
error().reason(format!("'{key}' was never declared"))
}
},
}
}
}
pub struct Interpreter<I: Iterator<Item = Statement>> {
scope: Scope,
iter: I,
}
impl<I: Iterator<Item = Statement>> Interpreter<I> {
pub fn new(iter: I) -> Self {
Self {
scope: Scope::new(),
iter,
}
}
fn evaluate_unary(
&mut self,
token: TokenKind,
child: Expression,
) -> Result<Value> {
use TokenKind as t;
use Value as v;
let val = self.evaluate(child)?;
Ok(match val {
v::Integer(i) => v::Integer(match token {
t::Plus => i,
t::Minus => -i,
_ => {
return error()
.reason(format!("Unary {token:?} is undefined for integers"));
},
}),
v::Real(r) => v::Real(match token {
t::Plus => r,
t::Minus => -r,
_ => {
return error()
.reason(format!("Unary {token:?} is undefined for reals"));
},
}),
v::Boolean(b) => v::Boolean(match token {
t::Not => !b,
_ => {
return error()
.reason(format!("Unary {token:?} is undefined for booleans"));
},
}),
_ => {
return error()
.reason(format!("Binary {token:?} is undefined for {val:?}",));
},
})
}
fn evaluate_binary(
&mut self,
token: TokenKind,
left: Expression,
right: Expression,
) -> Result<Value> {
use TokenKind as t;
use Value::*;
let left = self.evaluate(left)?;
let right = self.evaluate(right)?;
Ok(match (left.clone(), right.clone()) {
(Integer(l), Integer(r)) => match token {
t::Plus => Integer(l + r),
t::Minus => Integer(l - r),
t::Star => Integer(l * r),
t::Slash => Integer(l / r),
t::Percent => Integer(l % r),
t::DoubleEqual => Boolean(l == r),
t::Less => Boolean(l < r),
t::Greater => Boolean(l > r),
t::LessEqual => Boolean(l <= r),
t::GreaterEqual => Boolean(l >= r),
t => {
return error()
.reason(format!("Binary {t:?} is undefined for integers"));
},
},
(Real(l), Real(r)) => Real(match token {
t::Plus => l + r,
t::Minus => l - r,
t::Star => l * r,
t::Slash => l / r,
t => {
return error()
.reason(format!("Binary {t:?} is undefined for reals"));
},
}),
_ => {
return error().reason(format!(
"Binary {:?} is undefined for {:?} and {:?}",
token, left, right
));
},
})
}
fn evaluate(&mut self, expr: Expression) -> Result<Value> {
use ExpressionKind as e;
match expr.kind {
e::Integer(i) => Ok(Value::Integer(i)),
e::Real(r) => Ok(Value::Real(r)),
e::String(s) => Ok(Value::String(s)),
e::Boolean(b) => Ok(Value::Boolean(b)),
e::Identifier(i) => self.scope.access(i),
e::Binary { token, left, right } => {
self.evaluate_binary(token, *left, *right)
},
e::Unary { token, child } => self.evaluate_unary(token, *child),
e::Parenthesis(e) => self.evaluate(*e),
}
.span(&expr.span)
}
pub fn execute(&mut self, statement: Statement) -> Result<()> {
use StatementKind as s;
match statement.kind {
s::Mutable { name, value, .. } => {
self.scope.declare(name.clone())?;
if let Some(value) = value {
let value = self.evaluate(value)?;
self.scope.assign(name, value)?;
}
},
s::Immutable { name, value, .. } => {
self.scope.declare(name.clone())?;
let value = self.evaluate(value)?;
self.scope.assign(name, value)?;
},
s::Assignment { name, value } => {
let span = value.span;
let value = self.evaluate(value).span(&span)?;
self.scope.assign(name, value).span(&span)?;
},
s::Print(e) => {
let e = self.evaluate(e)?;
println!("{e:?}");
},
s::Expression(e) => {
self.evaluate(e)?;
},
s::Block(block) => self.block(block)?,
s::If {
predicate,
block,
else_,
} => {
let span = predicate.span;
let value = self.evaluate(predicate)?;
if let Value::Boolean(b) = value {
if b {
self.block(block)?;
}
} else {
return error()
.reason("Predicate for 'if' statement must be a boolean")
.span(&span);
}
},
s::While { predicate, block } => {
let span = predicate.span;
loop {
match self.evaluate(predicate.clone())? {
Value::Boolean(true) => self.block(block.clone())?,
Value::Boolean(false) => break,
_ => {
return error()
.reason("Predicate for 'while' statement must be a boolean")
.span(&span);
},
}
}
},
}
Ok(())
}
fn block(&mut self, block: Vec<Statement>) -> Result<()> {
self.scope.enscope();
for s in block.into_iter() {
let span = s.span;
self.execute(s).span(&span)?;
}
self.scope.descope();
Ok(())
}
pub fn run(&mut self) -> Result<()> {
loop {
let next = match self.iter.next() {
Some(n) => n,
None => break,
};
let span = next.span;
self.execute(next).span(&span)?;
}
Ok(())
}
}