This commit is contained in:
Logan 2024-10-15 11:20:35 -05:00
parent b55135245d
commit 7a33804ffe
7 changed files with 1486 additions and 181 deletions

6
demo.lang Normal file
View file

@ -0,0 +1,6 @@
i := 0;
while i < 10 {
i = i + 1;
print i;
}

151
src/err.rs Normal file
View file

@ -0,0 +1,151 @@
use crate::Span;
pub type Result<T> = std::result::Result<T, Diagnostic>;
pub fn error<T>() -> Result<T> {
Err(Diagnostic::new(""))
}
#[derive(Clone)]
pub struct Diagnostic {
reason: String,
span: Option<Span>,
backtrace: Vec<String>,
}
impl Diagnostic {
pub fn new(reason: impl Into<String>) -> Self {
Self {
reason: reason.into(),
span: None,
backtrace: vec![],
}
}
}
impl std::fmt::Display for Diagnostic {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(span) = self.span.as_ref() {
write!(f, "({}:{}) {}\n", span.row, span.column, self.reason)?;
} else {
write!(f, "(E) {}\n", self.reason)?;
}
for b in self.backtrace.iter().rev() {
write!(f, "--> {}\n", b)?;
}
Ok(())
}
}
impl std::fmt::Debug for Diagnostic {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{self}")
}
}
impl From<std::io::Error> for Diagnostic {
fn from(value: std::io::Error) -> Self {
Self {
reason: format!("{}", value),
span: None,
backtrace: vec![],
}
}
}
impl From<std::num::ParseIntError> for Diagnostic {
fn from(value: std::num::ParseIntError) -> Self {
use std::num::IntErrorKind::*;
match value.kind() {
PosOverflow | NegOverflow => {
Diagnostic::new("Integer value is too large to represent")
},
InvalidDigit => Diagnostic::new("Integer value containts invalid digits"),
_ => Diagnostic::new("Integer value could not be parsed"),
}
}
}
impl From<std::num::ParseFloatError> for Diagnostic {
fn from(_value: std::num::ParseFloatError) -> Self {
Diagnostic::new("Float value could not be parsed")
}
}
pub trait IntoDiagnostic<T, S: Into<String>> {
fn reason(self, s: S) -> Result<T>;
fn trace(self, s: S) -> Result<T>;
}
pub trait WithSpan<T> {
fn span(self, span: &Span) -> Result<T>;
fn no_span(self) -> Result<T>;
}
impl<T> WithSpan<T> for Result<T> {
fn span(self, span: &Span) -> Result<T> {
self.map_err(|mut e| {
e.span = e.span.or(Some(span.clone()));
e
})
}
fn no_span(self) -> Result<T> {
self.map_err(|mut e| {
e.span = None;
e
})
}
}
pub trait CoerceDiagnostic<T> {
fn coerce(self) -> Result<T>;
}
impl<T, S: Into<String>> IntoDiagnostic<T, S> for Option<T> {
fn reason(self, s: S) -> Result<T> {
match self {
Some(t) => Ok(t),
None => Err(Diagnostic {
reason: s.into(),
span: None,
backtrace: vec![],
}),
}
}
fn trace(self, s: S) -> Result<T> {
match self {
Some(t) => Ok(t),
None => Err(Diagnostic {
reason: "".into(),
span: None,
backtrace: vec![s.into()],
}),
}
}
}
impl<T, E: Into<Diagnostic>, S: Into<String>> IntoDiagnostic<T, S>
for std::result::Result<T, E>
{
fn reason(self, s: S) -> Result<T> {
self.map_err(|e| e.into()).map_err(|mut e| {
e.reason = s.into();
e
})
}
fn trace(self, s: S) -> Result<T> {
self.map_err(|e| e.into()).map_err(|mut e| {
e.backtrace.push(s.into());
e
})
}
}
impl<T, E: Into<Diagnostic>> CoerceDiagnostic<T> for std::result::Result<T, E> {
fn coerce(self) -> Result<T> {
self.map_err(|e| e.into())
}
}

84
src/lookahead.rs Normal file
View file

@ -0,0 +1,84 @@
pub struct Window<const N: usize, T, I>
where
I: Iterator<Item = T>,
{
iterator: I,
buffer: [Option<T>; N],
exhausted: bool,
pub finished: bool,
}
impl<const N: usize, T, I> std::fmt::Debug for Window<N, T, I>
where
I: Iterator<Item = T>,
T: std::fmt::Debug,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.buffer)
}
}
impl<const N: usize, T, I> Window<N, T, I>
where
I: Iterator<Item = T>,
{
pub fn new(mut it: I) -> Self {
assert!(N > 0, "Lookahead buffer cannot be 0 sized");
let mut s = Self {
buffer: std::array::from_fn(|_| it.next()),
iterator: it,
exhausted: false,
finished: false,
};
s.normalize();
s
}
pub fn inner(&self) -> &I {
&self.iterator
}
fn normalize(&mut self) {
for item in &mut self.buffer {
if self.exhausted {
*item = None;
} else if let None = item {
self.exhausted = true;
}
}
}
pub fn peek(&self, n: usize) -> &Option<T> {
debug_assert!(n < N, "Peeked further than buffer allows");
&self.buffer[n]
}
fn _advance(&mut self) {
for i in 1..N {
self.buffer[i - 1] = self.buffer[i].take();
}
self.buffer[N - 1] = match self.iterator.next() {
Some(i) if !self.exhausted => Some(i),
_ => {
self.exhausted = true;
None
},
};
}
}
impl<const N: usize, T, I> Iterator for Window<N, T, I>
where
I: Iterator<Item = T>,
{
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
let r = self.buffer[0].take();
if let None = r {
self.finished = true;
}
self._advance();
r
}
}

View file

@ -1,18 +1,57 @@
mod err;
mod lookahead;
mod parse;
mod token;
mod treewalk;
use std::ops::Add;
use lookahead::*;
use parse::*;
use token::*;
use treewalk::Interpreter;
#[derive(Clone, Copy, Debug)]
pub struct Span {
pub row: usize,
pub column: usize,
}
impl Add<Span> for Span {
type Output = Span;
fn add(self, rhs: Span) -> Self::Output {
Span {
row: usize::min(self.row, rhs.row),
column: usize::max(self.column, rhs.column),
}
}
}
fn test_tokenization() {
let test_str = r#"
( ) [ ] { } . .. , :
; + - * / -> => += -=
*= /= ! != = == <= >=
? ?= < > literal 10
0x10 0b10 10.0 1.0..2.0
2.0..=3.0 if else and
or xor not nand nor xnor
print break for while true
false "\u263b" '\x30'
"#;
let mut parser = Tokenizer::new(test_str.chars());
while let Some(tok) = parser.next() {
println!("{tok:?}");
}
}
fn main() {
repl();
}
pub fn repl() {
let mut buffer = String::new();
let stdin = std::io::stdin();
loop {
stdin.read_line(&mut buffer).unwrap();
let tokens = token::tokenize(&buffer);
for tok in tokens {
println!("{} : {:?}", &buffer[tok.start..tok.end], tok.ttype);
}
buffer = String::new();
}
let src = include_str!("../demo.lang");
println!("{src}");
let tokens: Vec<_> = Tokenizer::new(src.chars())
.filter(|t| t.0.is_meaningful())
.collect();
let parsed = Parser::new(tokens.into_iter()).file().unwrap();
let mut interp = Interpreter::new(parsed.into_iter());
interp.run().unwrap();
}

453
src/parse.rs Normal file
View file

@ -0,0 +1,453 @@
use crate::err::*;
use crate::{Span, Token, TokenKind};
#[derive(Clone)]
pub enum ExpressionKind {
Integer(i64),
Real(f64),
String(String),
Boolean(bool),
Identifier(String),
Binary {
token: TokenKind,
left: Box<Expression>,
right: Box<Expression>,
},
Unary {
token: TokenKind,
child: Box<Expression>,
},
Parenthesis(Box<Expression>),
}
#[derive(Clone)]
pub struct Expression {
pub kind: ExpressionKind,
pub span: Span,
}
impl Expression {
pub fn new(kind: ExpressionKind, span: Span) -> Self {
Self { kind, span }
}
}
impl std::fmt::Debug for ExpressionKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use ExpressionKind as e;
match self {
e::Integer(i) => write!(f, "{i}"),
e::Binary { token, left, right } => {
write!(f, "({left:?} {token:?} {right:?})")
},
e::Parenthesis(inner) => write!(f, "{inner:?}"),
e::Unary { token, child } => {
write!(f, "({token:?} {child:?})")
},
e::Real(fp) => write!(f, "{fp}"),
e::String(s) => write!(f, r#""{s}""#),
e::Identifier(i) => write!(f, "{i}"),
e::Boolean(b) => write!(f, "{b}"),
}
}
}
impl std::fmt::Debug for Expression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.kind)
}
}
#[derive(Debug, Clone)]
pub enum StatementKind {
Mutable {
name: String,
type_: Option<String>,
value: Option<Expression>,
},
Immutable {
name: String,
type_: Option<String>,
value: Expression,
},
Assignment {
name: String,
value: Expression,
},
If {
predicate: Expression,
block: Vec<Statement>,
else_: Option<Box<Statement>>,
},
While {
predicate: Expression,
block: Vec<Statement>,
},
Print(Expression),
Expression(Expression),
Block(Vec<Statement>),
}
#[derive(Debug, Clone)]
pub struct Statement {
pub kind: StatementKind,
pub span: Span,
}
pub type Precedence = usize;
fn binary_prec(tok: &TokenKind) -> Result<(Precedence, bool)> {
use TokenKind::*;
Ok(match tok {
Star | Slash | Percent => (10, false),
Plus | Minus => (9, false),
And | Nand => (8, false),
Xor | Xnor => (7, false),
Or | Nor => (6, false),
DoubleEqual | BangEqual | Less | LessEqual | Greater | GreaterEqual => {
(5, false)
},
//Colon => Some((5, false)),
_ => {
return error()
.reason(format!("{:?} is not a valid binary operator", tok));
},
})
}
fn unary_prefix_prec(tok: &TokenKind) -> Result<Precedence> {
use TokenKind::*;
Ok(match tok {
Minus | Not => 11,
Break => 3,
_ => {
return error()
.reason(format!("{tok:?} is not a valid prefix unary operator"));
},
})
}
fn unary_postfix_prec(tok: &TokenKind) -> Result<Precedence> {
use TokenKind::*;
Ok(match tok {
Question => 12,
Bang => 12,
_ => {
return error()
.reason(format!("{tok:?} is not a valid postfix unary operator"));
},
})
}
const PARSER_LOOKAHEAD: usize = 3;
type TokenIter<I> = crate::Window<PARSER_LOOKAHEAD, Token, I>;
pub struct Parser<I: Iterator<Item = Token>> {
iter: TokenIter<I>,
}
impl<I: Iterator<Item = Token>> Parser<I> {
pub fn new(iter: I) -> Self {
Self {
iter: TokenIter::new(iter),
}
}
fn skip(&mut self, n: usize) {
for _ in 0..n {
let _ = self.next();
}
}
fn next(&mut self) -> Result<Token> {
self.iter.next().reason("Unexpected end of file")
}
fn peek(&self, n: usize) -> Result<Token> {
self.iter.peek(n).clone().reason("Unexpected end of file")
}
fn eat(&mut self, expect: TokenKind) -> Result<Token> {
match self.look(expect) {
Ok(t) => {
self.skip(1);
Ok(t)
},
Err(e) => Err(e),
}
}
fn look(&mut self, expect: TokenKind) -> Result<Token> {
let next = self.peek(0)?;
if next.0 == expect {
Ok(next)
} else {
error()
.reason(format!("Expected {expect:?}, found {:?}", next.0))
.span(&next.1)
}
}
pub fn file(&mut self) -> Result<Vec<Statement>> {
use TokenKind as t;
let mut statements = vec![];
loop {
// Trim extra ;
while self.eat(t::Semicolon).is_ok() {}
if self.eat(t::EOF).is_ok() {
return Ok(statements);
}
match self.statement() {
Ok(s) => statements.push(s),
Err(e) => {
return Err(e);
},
}
}
}
pub fn statement(&mut self) -> Result<Statement> {
use StatementKind as s;
use TokenKind as t;
let next = self.peek(0);
let next2 = self.peek(1);
let statement = match (next, next2) {
// (im)mutable declaration
(Ok(Token(t::Identifier(name), span)), Ok(Token(t::Colon, _))) => {
self.skip(2);
let type_ = match self.eat(t::Identifier("".into())) {
Ok(Token(t::Identifier(s), _)) => Some(s),
_ => None,
};
match self.eat(t::Equal).or_else(|_| self.eat(t::Colon)) {
Ok(Token(t::Colon, _)) => Statement {
kind: s::Immutable {
name,
type_,
value: self
.expression(0)
.trace("while parsing immutable declaration")?,
},
span,
},
Ok(Token(t::Equal, _)) => Statement {
kind: s::Mutable {
name,
type_,
value: Some(
self
.expression(0)
.trace("while parsing mutable declaration")?,
),
},
span,
},
_ => return error().reason("Expected expression here"),
}
},
(Ok(Token(t::Identifier(name), span)), Ok(Token(t::Equal, _))) => {
self.skip(2);
let value = self
.expression(0)
.trace("while parsing assignment expression")?;
Statement {
kind: s::Assignment { name, value },
span,
}
},
// If
(Ok(Token(t::If, span)), _) => {
self.skip(1);
let predicate = self
.expression(0)
.reason("Expected predicate after 'if' keyword")
.span(&span)?;
let block = self.block().trace("while parsing if statement")?;
return Ok(Statement {
span,
kind: s::If {
predicate,
block: block.0,
else_: None,
},
});
},
// While
(Ok(Token(t::While, span)), _) => {
self.skip(1);
let predicate = self
.expression(0)
.reason("Expected predicate after 'while' keyword")
.span(&span)?;
let block = self.block().trace("while parsing while statement")?;
return Ok(Statement {
span,
kind: s::While {
predicate,
block: block.0,
},
});
},
// (DEBUG) print
(Ok(Token(t::Print, span)), _) => {
self.skip(1);
let expr = self.expression(0).trace("while parsing print statement")?;
Statement {
span: span + expr.span,
kind: s::Print(expr),
}
},
// Block
(Ok(Token(t::LeftBrace, _)), _) => {
// Skip check for semicolon
let (block, span) =
self.block().trace("while parsing block statement")?;
return Ok(Statement {
kind: s::Block(block),
span,
});
},
// Expression
_ => {
let expr = self
.expression(0)
.trace("while parsing expression statement")?;
Statement {
span: expr.span,
kind: s::Expression(expr),
}
},
};
// Check for semicolon
if self.eat(t::Semicolon).is_ok() {
Ok(statement)
} else {
error().reason("Expected ;")
}
}
pub fn expression(
&mut self,
mut precedence: Precedence,
) -> Result<Expression> {
use ExpressionKind as e;
use TokenKind as t;
let next = self.peek(0)?;
// Unary prefix expression
let mut current = if let Ok(p) = unary_prefix_prec(&next.0) {
let operator = self.next().expect("unreachable");
let child = self
.expression(p)
.trace(format!("while parsing unary {:?}", operator.0))
.span(&operator.1)?;
let span = child.span + operator.1;
Expression::new(
e::Unary {
token: operator.0,
child: child.into(),
},
span,
)
}
// Terminal or paren
else {
self.primary()?
};
// Precedence climbing loop
while let Ok(next) = self.peek(0) {
// Binary infix
if let Ok((new_precedence, left_assoc)) = binary_prec(&next.0) {
if (!left_assoc && new_precedence <= precedence)
|| (new_precedence < precedence)
{
return Ok(current);
}
let operator = self.next().expect("unreachable");
let rhs = self
.expression(new_precedence)
.trace(format!("while parsing binary {:?}", operator.0))
.span(&operator.1)?;
let span = next.1 + rhs.span;
current = Expression::new(
e::Binary {
token: operator.0,
left: current.into(),
right: rhs.into(),
},
span,
);
}
// Unary postfix
else if let Ok(new_precedence) = unary_postfix_prec(&next.0) {
let operator = self.next().expect("unreachable");
let span = next.1 + operator.1;
precedence = new_precedence;
current = Expression::new(
e::Unary {
token: operator.0,
child: current.into(),
},
span,
);
} else {
break;
}
}
Ok(current)
}
fn primary(&mut self) -> Result<Expression> {
use ExpressionKind as e;
use TokenKind as t;
let next = self.peek(0)?;
let span = next.1;
let kind = match next.0 {
t::IntegerLiteral(i) => e::Integer(i),
t::FloatLiteral(f) => e::Real(f),
t::StringLiteral(s) => e::String(s),
t::True => e::Boolean(true),
t::Identifier(i) => e::Identifier(i),
t::LeftParen => {
self.eat(t::LeftParen).expect("unreachable");
let expr = self
.expression(0)
.trace("while parsing parenthesized expression")?;
self
.look(t::RightParen)
.reason("Unclosed '('")
.span(&expr.span)?;
e::Parenthesis(expr.into())
},
_ => {
return error()
.span(&span)
.reason(format!("Expected primary, found {:?}", next.0));
},
};
self.skip(1);
Ok(Expression { kind, span })
}
fn block(&mut self) -> Result<(Vec<Statement>, Span)> {
use TokenKind as t;
let mut span = self.eat(t::LeftBrace).reason("Expected block")?.1;
let mut statements = vec![];
loop {
let next = self.peek(0)?;
span = span + next.1;
match self.eat(t::RightBrace) {
Ok(t) => {
span = span + t.1;
break;
},
_ => {
let statement = self.statement()?;
span = span + statement.span;
statements.push(statement);
},
};
}
Ok((statements, span))
}
}

View file

@ -1,22 +1,38 @@
use crate::err::*;
use crate::Span;
#[derive(Debug, Clone)]
pub enum TokenType {
// Symbols
pub enum TokenKind {
LeftParen,
RightParen,
LeftSquare,
RightSquare,
LeftBrace,
RightBrace,
LeftSquare,
RightSquare,
Comma,
Colon,
Semicolon,
Dot,
DotDot,
Plus,
Minus,
Star,
Slash,
Semicolon,
Star,
Percent,
Arrow,
FatArrow,
PlusEqual,
MinusEqual,
SlashEqual,
StarEqual,
PercentEqual,
Bang,
BangEqual,
Question,
QuestionEqual,
Equal,
DoubleEqual,
Greater,
@ -24,187 +40,463 @@ pub enum TokenType {
Less,
LessEqual,
// Literals
String,
Character,
Number(f64),
Pipe,
Ampersand,
Carrot,
Hash,
DotDotEqual,
Identifier(String),
StringLiteral(String),
CharLiteral(char),
IntegerLiteral(i64),
FloatLiteral(f64),
// Words
Ident,
And,
Or,
Self_,
Struct,
True,
False,
Fn,
If,
Else,
Nil,
And,
Or,
Xor,
Not,
Nand,
Nor,
Xnor,
Print,
Break,
Return,
Super,
Let,
While,
Continue,
For,
While,
True,
False,
Struct,
Enum,
Union,
// Special
Unrecognized,
TooLong,
Whitespace(String),
SmallComment(String),
BigComment(String),
Idk,
EOF,
}
/// Type, index
#[derive(Debug, Clone)]
pub struct Token {
pub ttype: TokenType,
pub start: usize,
pub end: usize,
}
pub fn tokenize(input: &str) -> Vec<Token> {
let input_str = input;
let mut input = input.char_indices().peekable();
let mut tokens = vec![];
'outer: loop {
// Find next non-whitespace line
let (start, c) = 'ws: loop {
match input.next() {
// Stop at end of input
None => break 'outer,
Some((index, character)) if !character.is_whitespace() => {
break 'ws (index, character)
},
_ => {},
impl TokenKind {
pub fn is_meaningful(&self) -> bool {
match self {
Self::Whitespace(_)
| Self::SmallComment(_)
| Self::BigComment(_)
| Self::Idk => false,
_ => true,
}
}
}
impl PartialEq for TokenKind {
fn eq(&self, other: &Self) -> bool {
std::mem::discriminant(self) == std::mem::discriminant(other)
}
}
impl Eq for TokenKind {
}
#[derive(Clone, Debug)]
pub struct Token(pub TokenKind, pub Span);
fn t(tk: TokenKind, sp: Span) -> Result<Token> {
Ok(Token(tk, sp))
}
const TOKENIZER_LOOKAHEAD: usize = 2;
type CharIter<I> = crate::Window<TOKENIZER_LOOKAHEAD, char, I>;
pub struct Tokenizer<I: Iterator<Item = char>> {
iter: CharIter<I>,
column: usize,
row: usize,
finished: bool,
}
impl<I: Iterator<Item = char>> Tokenizer<I> {
pub fn new(iter: I) -> Self {
Self {
iter: CharIter::new(iter),
column: 1,
row: 1,
finished: false,
}
}
pub fn span(&self) -> Span {
Span {
column: self.column,
row: self.row,
}
}
fn next_char(&mut self) -> Option<char> {
match self.iter.next() {
Some(c) if c == '\n' => {
self.row += 1;
self.column = 1;
Some(c)
},
Some(c) => {
self.column += 1;
Some(c)
},
_ => None,
}
}
fn delimited(&mut self, terminator: char) -> Option<String> {
let mut buffer = String::new();
let mut escape = false;
loop {
let c = match self.next_char() {
Some(c) if c == terminator && !escape => {
break;
},
Some(c) => {
if c == '\\' {
escape = !escape;
} else {
escape = false;
}
c
},
None => return None,
};
let mut end = start + 1;
let mut advance = || {};
let ttype = match c {
buffer.push(c)
}
Some(buffer)
}
fn peek(&mut self, n: usize) -> Option<char> {
self.iter.peek(n).clone()
}
fn _next(&mut self) -> Result<Token> {
use TokenKind::*;
let position = Span {
row: self.row,
column: self.column,
};
let current = match self.next_char() {
Some(c) => c,
None => return t(EOF, position),
};
// Parse whitespace
if current.is_whitespace() {
let mut buffer = String::from(current);
while let Some(c) = self.peek(0) {
if !c.is_whitespace() {
break;
}
_ = self.next_char();
buffer.push(c.clone());
}
return t(Whitespace(buffer), position);
}
// Parse multiline comments
if let ('/', Some('*')) = (current, self.peek(0)) {
let _ = self.next_char();
let mut comment_level = 1;
let mut buffer = String::new();
while let Some(current) = self.next_char() {
// Ignore /* */ inside strings
if '\"' == current {
if let Some(inner_string) = self.delimited('\"') {
buffer.push('\"');
buffer.push_str(&inner_string);
buffer.push('\"');
continue;
}
}
if let ('/', Some('*')) = (current, self.peek(0)) {
comment_level += 1;
} else if let ('*', Some('/')) = (current, self.peek(0)) {
comment_level -= 1;
}
if comment_level == 0 {
let _ = self.next_char();
break;
}
buffer.push(current);
}
return t(BigComment(buffer), position);
}
// Parse single line comments
if let ('/', Some('/')) = (current, self.peek(0)) {
let _ = self.next_char();
let mut buffer = String::new();
while let Some(c) = self.next_char() {
if c == '\n' {
break;
}
buffer.push(c);
}
return t(SmallComment(buffer), position);
}
let next = self.peek(0);
let next_next = self.peek(1);
// Match single character tokens
'(' => TokenType::LeftParen,
')' => TokenType::RightParen,
'[' => TokenType::LeftSquare,
']' => TokenType::RightSquare,
'{' => TokenType::LeftBrace,
'}' => TokenType::RightBrace,
',' => TokenType::Comma,
'.' => TokenType::Dot,
'+' => TokenType::Plus,
'-' => TokenType::Minus,
'*' => TokenType::Star,
'/' => TokenType::Slash,
';' => TokenType::Semicolon,
// Match multicharacter tokens
'!' => match input.peek() {
Some((_, '=')) => {
input.next();
end += 1;
TokenType::BangEqual
},
_ => TokenType::Bang,
},
'=' => match input.peek() {
Some((_, '=')) => {
input.next();
end += 1;
TokenType::DoubleEqual
},
_ => TokenType::Equal,
},
'<' => match input.peek() {
Some((_, '=')) => {
input.next();
end += 1;
TokenType::GreaterEqual
},
_ => TokenType::Greater,
},
'>' => match input.peek() {
Some((_, '=')) => {
input.next();
end += 1;
TokenType::LessEqual
},
_ => TokenType::Less,
},
// Match keywords, identifiers, and literals
c if c.is_alphanumeric() => 'case: {
// Scan full word
while let Some((new_end, next)) = input.peek() {
if next.is_alphanumeric() || *next == '_' {
let _ = input.next();
} else {
end = *new_end;
break;
}
}
let word = &input_str[start..end];
// Attempt to parse hex literal
if let Some(s) =
word.strip_prefix("0x").or_else(|| word.strip_prefix("0X"))
{
if let Ok(n) = u64::from_str_radix(s, 16) {
break 'case TokenType::Number(n as f64);
} else {
break 'case TokenType::Unrecognized;
}
}
// Attempt to parse binary literal
if let Some(s) =
word.strip_prefix("0b").or_else(|| word.strip_prefix("0B"))
{
if let Ok(n) = u64::from_str_radix(s, 2) {
break 'case TokenType::Number(n as f64);
} else {
break 'case TokenType::Unrecognized;
}
}
// Attempt to parse decimal literal
if let Ok(f) = word.parse::<f64>() {
break 'case TokenType::Number(f);
}
// Parse keyword or ident
match word {
"and" => TokenType::And,
"or" => TokenType::Or,
"self" => TokenType::Self_,
"struct" => TokenType::Struct,
"true" => TokenType::True,
"false" => TokenType::False,
"fn" => TokenType::Fn,
"if" => TokenType::If,
"else" => TokenType::Else,
"nil" => TokenType::Nil,
"print" => TokenType::Print,
"return" => TokenType::Return,
"super" => TokenType::Super,
"let" => TokenType::Let,
"while" => TokenType::While,
"for" => TokenType::For,
_ => TokenType::Ident,
}
},
// Parse string
'"' => {
while let Some((new_end, next)) = input.next() {
match next {
'"' => {
end = new_end + 1;
break;
},
// Skip escapes and deal with them later
'\\' => {
let _ = input.next();
},
_ => {},
}
}
TokenType::String
},
// Parse character
_ => TokenType::Unrecognized,
let not_next = move |c| Some(c) != next;
let kind = match current {
'(' => LeftParen,
')' => RightParen,
'{' => LeftBrace,
'}' => RightBrace,
'[' => LeftSquare,
']' => RightSquare,
',' => Comma,
':' => Colon,
';' => Semicolon,
'|' => Pipe,
'&' => Ampersand,
'^' => Carrot,
'#' => Hash,
'.' if not_next('.') => Dot,
'+' if not_next('=') => Plus,
'-' if not_next('=') && not_next('>') => Minus,
'*' if not_next('=') => Star,
'/' if not_next('=') => Slash,
'%' if not_next('=') => Percent,
'!' if not_next('=') => Bang,
'?' if not_next('=') => Question,
'=' if not_next('=') && not_next('>') => Equal,
'<' if not_next('=') => Less,
'>' if not_next('=') => Greater,
_ => Idk,
};
if kind != Idk {
return t(kind, position);
};
tokens.push(Token { ttype, start, end });
}
tokens
// Match two character tokens
if let Some(next) = next {
let not_next_next = move |c| Some(c) != next_next;
let kind = match (current, next) {
('.', '.') if not_next_next('=') => DotDot,
('+', '=') => PlusEqual,
('-', '=') => MinusEqual,
('*', '=') => StarEqual,
('/', '=') => SlashEqual,
('%', '=') => PercentEqual,
('=', '=') => DoubleEqual,
('?', '=') => QuestionEqual,
('!', '=') => BangEqual,
('<', '=') => LessEqual,
('>', '=') => GreaterEqual,
('-', '>') => Arrow,
('=', '>') => FatArrow,
_ => Idk,
};
if kind != Idk {
let _ = self.next();
return t(kind, position);
}
}
// Match three character tokens
if let (Some(next), Some(next_next)) = (next, next_next) {
let kind = match (current, next, next_next) {
('.', '.', '=') => DotDotEqual,
_ => Idk,
};
if kind != Idk {
let _ = self.next();
let _ = self.next();
return t(kind, position);
}
}
let mut buffer = String::new();
// Match character
if current == '\'' {
let buffer = self
.delimited('\'')
.reason("Single quote (') was opened, but never closed")
.span(&position)?;
let baked = bake_string(&buffer)?;
if baked.len() != 1 {
return error()
.reason("Single quote (') contains more than one character")
.span(&position);
}
let kind = CharLiteral(
baked
.chars()
.next()
.reason("Single quote (') contains no characters")
.span(&position)?,
);
return t(kind, position);
}
// Match string
if current == '"' {
let buffer = self
.delimited('\"')
.reason("Double quote (\") was opened, but never closed")
.span(&position)?;
let kind = StringLiteral(bake_string(&buffer)?);
return t(kind, position);
}
buffer.push(current);
// Match number
if current.is_ascii_digit() {
// Only one dot per number
let mut encountered_dot = false;
while let Some(c) = self.peek(0) {
if c == '.' && !encountered_dot {
if let Some('.') = self.peek(1) {
break;
}
encountered_dot = true;
} else if !(c == '_' || c == 'x' || c.is_ascii_hexdigit()) {
break;
}
buffer.push(c);
let _ = self.next_char();
}
return t(parse_number(&buffer).span(&position)?, position);
}
// Match keyword or identifier
while let Some(c) = self.peek(0) {
if c.is_alphanumeric() || c == '_' {
let _ = self.next_char();
} else {
break;
}
buffer.push(c);
}
// Match keywords
{
let kind = match buffer.as_str() {
"if" => If,
"else" => Else,
"and" => And,
"or" => Or,
"xor" => Xor,
"nand" => Nand,
"nor" => Nor,
"xnor" => Xnor,
"for" => For,
"while" => While,
"print" => Print,
"break" => Break,
"return" => Return,
"continue" => Continue,
"not" => Not,
"true" => True,
"false" => False,
"struct" => Struct,
"enum" => Enum,
"union" => Union,
_ => Identifier(buffer),
};
return t(kind, position);
}
}
}
impl<I: Iterator<Item = char>> Iterator for Tokenizer<I> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
loop {
match self._next() {
Ok(Token(TokenKind::EOF, span)) => {
if self.finished {
return None;
} else {
self.finished = true;
return Some(Token(TokenKind::EOF, span));
}
},
Ok(r) => return Some(r),
_ => {},
};
}
}
}
fn parse_number(num: &str) -> Result<TokenKind> {
use TokenKind::*;
let num = num.replace('_', "");
// Floating point (only decimal)
if num.contains('.') {
num.parse::<f64>().map(|f| FloatLiteral(f)).coerce()
}
// Hex integer
else if let Some(hex) = num.strip_prefix("0x") {
i64::from_str_radix(hex, 16)
.map(|i| IntegerLiteral(i))
.coerce()
}
// Octal integer
else if let Some(oct) = num.strip_prefix("0o") {
i64::from_str_radix(oct, 8)
.map(|i| IntegerLiteral(i))
.coerce()
}
// Binary integer
else if let Some(bin) = num.strip_prefix("0b") {
i64::from_str_radix(bin, 2)
.map(|i| IntegerLiteral(i))
.coerce()
}
// Decimal integer
else {
num.parse::<i64>().map(|i| IntegerLiteral(i)).coerce()
}
}
fn bake_string(s: &str) -> Result<String> {
let mut baked = String::with_capacity(s.len());
let mut it = s.chars();
loop {
match it.next() {
Some('\\') => baked.push(match it.next() {
Some('n') => '\n', // New line
Some('r') => '\r', // Carriage return
Some('t') => '\t', // Tab
Some('b') => '\x08', // Backspace
Some('\\') => '\\', // Backslash
Some('\0') => '\0', // Null
Some('"') => '\"', // Double quote
Some('\'') => '\'', // Single quote
Some('x') => {
// Ascii escapes
let mut a = || {
let a = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let b = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let num = (a << 4) | b;
char::from_u32(num)
};
a().reason(format!("Found invalid ASCII (\\aXX) escape sequence"))?
},
Some('u') => {
// Unicode escapes
let mut a = || {
let a = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let b = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let c = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let d = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
let num = (a << 12) | (b << 8) | (c << 4) | d;
char::from_u32(num)
};
a().reason("Found invalid Unicode (\\uXXXX) escape sequence")?
},
_ => return Err(Diagnostic::new("Found invalid escape sequence")),
}),
// Unremarkable character
Some(c) => baked.push(c),
None => break,
}
}
Ok(baked)
}

280
src/treewalk.rs Normal file
View file

@ -0,0 +1,280 @@
use crate::err::*;
use std::collections::HashMap;
use crate::{
Expression, ExpressionKind, Statement, StatementKind, Token, TokenKind,
};
#[derive(Debug, Clone)]
enum Value {
Integer(i64),
Real(f64),
String(String),
Boolean(bool),
Undefined,
}
#[derive(Debug, Clone)]
struct Scope {
outer: Option<Box<Scope>>,
declarations: HashMap<String, Value>,
}
impl Scope {
fn new() -> Self {
Self {
outer: None,
declarations: Default::default(),
}
}
fn enscope(&mut self) -> &mut Self {
*self = Self {
outer: Some(Box::new(self.clone())),
declarations: HashMap::new(),
};
self
}
fn descope(&mut self) -> &mut Self {
if let Some(outer) = &self.outer {
*self = *outer.clone();
}
self
}
fn declare(&mut self, key: String) -> Result<()> {
if self.declarations.contains_key(&key) {
return error()
.reason(format!("Re-declaration of '{key}' in same scope"));
}
self.declarations.insert(key, Value::Undefined);
Ok(())
}
fn assign(&mut self, key: String, value: Value) -> Result<()> {
if !self.declarations.contains_key(&key) {
if let Some(outer) = &mut self.outer {
return outer.assign(key, value);
}
return error()
.reason(format!("Assignemnt to '{key}' before declaration"));
}
self.declarations.insert(key, value);
Ok(())
}
fn access(&self, key: String) -> Result<Value> {
match self.declarations.get(&key) {
Some(v) => Ok(v.clone()),
None => {
if let Some(outer) = &self.outer {
outer.access(key)
} else {
error().reason(format!("'{key}' was never declared"))
}
},
}
}
}
pub struct Interpreter<I: Iterator<Item = Statement>> {
scope: Scope,
iter: I,
}
impl<I: Iterator<Item = Statement>> Interpreter<I> {
pub fn new(iter: I) -> Self {
Self {
scope: Scope::new(),
iter,
}
}
fn evaluate_unary(
&mut self,
token: TokenKind,
child: Expression,
) -> Result<Value> {
use TokenKind as t;
use Value as v;
let val = self.evaluate(child)?;
Ok(match val {
v::Integer(i) => v::Integer(match token {
t::Plus => i,
t::Minus => -i,
_ => {
return error()
.reason(format!("Unary {token:?} is undefined for integers"));
},
}),
v::Real(r) => v::Real(match token {
t::Plus => r,
t::Minus => -r,
_ => {
return error()
.reason(format!("Unary {token:?} is undefined for reals"));
},
}),
v::Boolean(b) => v::Boolean(match token {
t::Not => !b,
_ => {
return error()
.reason(format!("Unary {token:?} is undefined for booleans"));
},
}),
_ => {
return error()
.reason(format!("Binary {token:?} is undefined for {val:?}",));
},
})
}
fn evaluate_binary(
&mut self,
token: TokenKind,
left: Expression,
right: Expression,
) -> Result<Value> {
use TokenKind as t;
use Value::*;
let left = self.evaluate(left)?;
let right = self.evaluate(right)?;
Ok(match (left.clone(), right.clone()) {
(Integer(l), Integer(r)) => match token {
t::Plus => Integer(l + r),
t::Minus => Integer(l - r),
t::Star => Integer(l * r),
t::Slash => Integer(l / r),
t::Percent => Integer(l % r),
t::DoubleEqual => Boolean(l == r),
t::Less => Boolean(l < r),
t::Greater => Boolean(l > r),
t::LessEqual => Boolean(l <= r),
t::GreaterEqual => Boolean(l >= r),
t => {
return error()
.reason(format!("Binary {t:?} is undefined for integers"));
},
},
(Real(l), Real(r)) => Real(match token {
t::Plus => l + r,
t::Minus => l - r,
t::Star => l * r,
t::Slash => l / r,
t => {
return error()
.reason(format!("Binary {t:?} is undefined for reals"));
},
}),
_ => {
return error().reason(format!(
"Binary {:?} is undefined for {:?} and {:?}",
token, left, right
));
},
})
}
fn evaluate(&mut self, expr: Expression) -> Result<Value> {
use ExpressionKind as e;
match expr.kind {
e::Integer(i) => Ok(Value::Integer(i)),
e::Real(r) => Ok(Value::Real(r)),
e::String(s) => Ok(Value::String(s)),
e::Boolean(b) => Ok(Value::Boolean(b)),
e::Identifier(i) => self.scope.access(i),
e::Binary { token, left, right } => {
self.evaluate_binary(token, *left, *right)
},
e::Unary { token, child } => self.evaluate_unary(token, *child),
e::Parenthesis(e) => self.evaluate(*e),
}
.span(&expr.span)
}
pub fn execute(&mut self, statement: Statement) -> Result<()> {
use StatementKind as s;
match statement.kind {
s::Mutable { name, value, .. } => {
self.scope.declare(name.clone())?;
if let Some(value) = value {
let value = self.evaluate(value)?;
self.scope.assign(name, value)?;
}
},
s::Immutable { name, value, .. } => {
self.scope.declare(name.clone())?;
let value = self.evaluate(value)?;
self.scope.assign(name, value)?;
},
s::Assignment { name, value } => {
let span = value.span;
let value = self.evaluate(value).span(&span)?;
self.scope.assign(name, value).span(&span)?;
},
s::Print(e) => {
let e = self.evaluate(e)?;
println!("{e:?}");
},
s::Expression(e) => {
self.evaluate(e)?;
},
s::Block(block) => self.block(block)?,
s::If {
predicate,
block,
else_,
} => {
let span = predicate.span;
let value = self.evaluate(predicate)?;
if let Value::Boolean(b) = value {
if b {
self.block(block)?;
}
} else {
return error()
.reason("Predicate for 'if' statement must be a boolean")
.span(&span);
}
},
s::While { predicate, block } => {
let span = predicate.span;
loop {
match self.evaluate(predicate.clone())? {
Value::Boolean(true) => self.block(block.clone())?,
Value::Boolean(false) => break,
_ => {
return error()
.reason("Predicate for 'while' statement must be a boolean")
.span(&span);
},
}
}
},
}
Ok(())
}
fn block(&mut self, block: Vec<Statement>) -> Result<()> {
self.scope.enscope();
for s in block.into_iter() {
let span = s.span;
self.execute(s).span(&span)?;
}
self.scope.descope();
Ok(())
}
pub fn run(&mut self) -> Result<()> {
loop {
let next = match self.iter.next() {
Some(n) => n,
None => break,
};
let span = next.span;
self.execute(next).span(&span)?;
}
Ok(())
}
}