Initial
This commit is contained in:
parent
b55135245d
commit
7a33804ffe
151
src/err.rs
Normal file
151
src/err.rs
Normal file
|
@ -0,0 +1,151 @@
|
|||
use crate::Span;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Diagnostic>;
|
||||
|
||||
pub fn error<T>() -> Result<T> {
|
||||
Err(Diagnostic::new(""))
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Diagnostic {
|
||||
reason: String,
|
||||
span: Option<Span>,
|
||||
backtrace: Vec<String>,
|
||||
}
|
||||
|
||||
impl Diagnostic {
|
||||
pub fn new(reason: impl Into<String>) -> Self {
|
||||
Self {
|
||||
reason: reason.into(),
|
||||
span: None,
|
||||
backtrace: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Diagnostic {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(span) = self.span.as_ref() {
|
||||
write!(f, "({}:{}) {}\n", span.row, span.column, self.reason)?;
|
||||
} else {
|
||||
write!(f, "(E) {}\n", self.reason)?;
|
||||
}
|
||||
for b in self.backtrace.iter().rev() {
|
||||
write!(f, "--> {}\n", b)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Diagnostic {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{self}")
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for Diagnostic {
|
||||
fn from(value: std::io::Error) -> Self {
|
||||
Self {
|
||||
reason: format!("{}", value),
|
||||
span: None,
|
||||
backtrace: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::num::ParseIntError> for Diagnostic {
|
||||
fn from(value: std::num::ParseIntError) -> Self {
|
||||
use std::num::IntErrorKind::*;
|
||||
match value.kind() {
|
||||
PosOverflow | NegOverflow => {
|
||||
Diagnostic::new("Integer value is too large to represent")
|
||||
},
|
||||
InvalidDigit => Diagnostic::new("Integer value containts invalid digits"),
|
||||
_ => Diagnostic::new("Integer value could not be parsed"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::num::ParseFloatError> for Diagnostic {
|
||||
fn from(_value: std::num::ParseFloatError) -> Self {
|
||||
Diagnostic::new("Float value could not be parsed")
|
||||
}
|
||||
}
|
||||
|
||||
pub trait IntoDiagnostic<T, S: Into<String>> {
|
||||
fn reason(self, s: S) -> Result<T>;
|
||||
fn trace(self, s: S) -> Result<T>;
|
||||
}
|
||||
|
||||
pub trait WithSpan<T> {
|
||||
fn span(self, span: &Span) -> Result<T>;
|
||||
fn no_span(self) -> Result<T>;
|
||||
}
|
||||
|
||||
impl<T> WithSpan<T> for Result<T> {
|
||||
fn span(self, span: &Span) -> Result<T> {
|
||||
self.map_err(|mut e| {
|
||||
e.span = e.span.or(Some(span.clone()));
|
||||
e
|
||||
})
|
||||
}
|
||||
|
||||
fn no_span(self) -> Result<T> {
|
||||
self.map_err(|mut e| {
|
||||
e.span = None;
|
||||
e
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CoerceDiagnostic<T> {
|
||||
fn coerce(self) -> Result<T>;
|
||||
}
|
||||
|
||||
impl<T, S: Into<String>> IntoDiagnostic<T, S> for Option<T> {
|
||||
fn reason(self, s: S) -> Result<T> {
|
||||
match self {
|
||||
Some(t) => Ok(t),
|
||||
None => Err(Diagnostic {
|
||||
reason: s.into(),
|
||||
span: None,
|
||||
backtrace: vec![],
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn trace(self, s: S) -> Result<T> {
|
||||
match self {
|
||||
Some(t) => Ok(t),
|
||||
None => Err(Diagnostic {
|
||||
reason: "".into(),
|
||||
span: None,
|
||||
backtrace: vec![s.into()],
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, E: Into<Diagnostic>, S: Into<String>> IntoDiagnostic<T, S>
|
||||
for std::result::Result<T, E>
|
||||
{
|
||||
fn reason(self, s: S) -> Result<T> {
|
||||
self.map_err(|e| e.into()).map_err(|mut e| {
|
||||
e.reason = s.into();
|
||||
e
|
||||
})
|
||||
}
|
||||
|
||||
fn trace(self, s: S) -> Result<T> {
|
||||
self.map_err(|e| e.into()).map_err(|mut e| {
|
||||
e.backtrace.push(s.into());
|
||||
e
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, E: Into<Diagnostic>> CoerceDiagnostic<T> for std::result::Result<T, E> {
|
||||
fn coerce(self) -> Result<T> {
|
||||
self.map_err(|e| e.into())
|
||||
}
|
||||
}
|
84
src/lookahead.rs
Normal file
84
src/lookahead.rs
Normal file
|
@ -0,0 +1,84 @@
|
|||
pub struct Window<const N: usize, T, I>
|
||||
where
|
||||
I: Iterator<Item = T>,
|
||||
{
|
||||
iterator: I,
|
||||
buffer: [Option<T>; N],
|
||||
exhausted: bool,
|
||||
pub finished: bool,
|
||||
}
|
||||
|
||||
impl<const N: usize, T, I> std::fmt::Debug for Window<N, T, I>
|
||||
where
|
||||
I: Iterator<Item = T>,
|
||||
T: std::fmt::Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self.buffer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize, T, I> Window<N, T, I>
|
||||
where
|
||||
I: Iterator<Item = T>,
|
||||
{
|
||||
pub fn new(mut it: I) -> Self {
|
||||
assert!(N > 0, "Lookahead buffer cannot be 0 sized");
|
||||
let mut s = Self {
|
||||
buffer: std::array::from_fn(|_| it.next()),
|
||||
iterator: it,
|
||||
exhausted: false,
|
||||
finished: false,
|
||||
};
|
||||
s.normalize();
|
||||
s
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> &I {
|
||||
&self.iterator
|
||||
}
|
||||
|
||||
fn normalize(&mut self) {
|
||||
for item in &mut self.buffer {
|
||||
if self.exhausted {
|
||||
*item = None;
|
||||
} else if let None = item {
|
||||
self.exhausted = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek(&self, n: usize) -> &Option<T> {
|
||||
debug_assert!(n < N, "Peeked further than buffer allows");
|
||||
&self.buffer[n]
|
||||
}
|
||||
|
||||
fn _advance(&mut self) {
|
||||
for i in 1..N {
|
||||
self.buffer[i - 1] = self.buffer[i].take();
|
||||
}
|
||||
self.buffer[N - 1] = match self.iterator.next() {
|
||||
Some(i) if !self.exhausted => Some(i),
|
||||
_ => {
|
||||
self.exhausted = true;
|
||||
None
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize, T, I> Iterator for Window<N, T, I>
|
||||
where
|
||||
I: Iterator<Item = T>,
|
||||
{
|
||||
type Item = T;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let r = self.buffer[0].take();
|
||||
if let None = r {
|
||||
self.finished = true;
|
||||
}
|
||||
self._advance();
|
||||
r
|
||||
}
|
||||
}
|
67
src/main.rs
67
src/main.rs
|
@ -1,18 +1,57 @@
|
|||
mod err;
|
||||
mod lookahead;
|
||||
mod parse;
|
||||
mod token;
|
||||
mod treewalk;
|
||||
use std::ops::Add;
|
||||
|
||||
use lookahead::*;
|
||||
use parse::*;
|
||||
use token::*;
|
||||
use treewalk::Interpreter;
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Span {
|
||||
pub row: usize,
|
||||
pub column: usize,
|
||||
}
|
||||
|
||||
impl Add<Span> for Span {
|
||||
type Output = Span;
|
||||
|
||||
fn add(self, rhs: Span) -> Self::Output {
|
||||
Span {
|
||||
row: usize::min(self.row, rhs.row),
|
||||
column: usize::max(self.column, rhs.column),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn test_tokenization() {
|
||||
let test_str = r#"
|
||||
( ) [ ] { } . .. , :
|
||||
; + - * / -> => += -=
|
||||
*= /= ! != = == <= >=
|
||||
? ?= < > literal 10
|
||||
0x10 0b10 10.0 1.0..2.0
|
||||
2.0..=3.0 if else and
|
||||
or xor not nand nor xnor
|
||||
print break for while true
|
||||
false "\u263b" '\x30'
|
||||
"#;
|
||||
let mut parser = Tokenizer::new(test_str.chars());
|
||||
while let Some(tok) = parser.next() {
|
||||
println!("{tok:?}");
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
repl();
|
||||
}
|
||||
|
||||
pub fn repl() {
|
||||
let mut buffer = String::new();
|
||||
let stdin = std::io::stdin();
|
||||
loop {
|
||||
stdin.read_line(&mut buffer).unwrap();
|
||||
let tokens = token::tokenize(&buffer);
|
||||
for tok in tokens {
|
||||
println!("{} : {:?}", &buffer[tok.start..tok.end], tok.ttype);
|
||||
}
|
||||
buffer = String::new();
|
||||
}
|
||||
let src = include_str!("../demo.lang");
|
||||
println!("{src}");
|
||||
let tokens: Vec<_> = Tokenizer::new(src.chars())
|
||||
.filter(|t| t.0.is_meaningful())
|
||||
.collect();
|
||||
let parsed = Parser::new(tokens.into_iter()).file().unwrap();
|
||||
let mut interp = Interpreter::new(parsed.into_iter());
|
||||
interp.run().unwrap();
|
||||
}
|
||||
|
|
453
src/parse.rs
Normal file
453
src/parse.rs
Normal file
|
@ -0,0 +1,453 @@
|
|||
use crate::err::*;
|
||||
use crate::{Span, Token, TokenKind};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum ExpressionKind {
|
||||
Integer(i64),
|
||||
Real(f64),
|
||||
String(String),
|
||||
Boolean(bool),
|
||||
Identifier(String),
|
||||
Binary {
|
||||
token: TokenKind,
|
||||
left: Box<Expression>,
|
||||
right: Box<Expression>,
|
||||
},
|
||||
Unary {
|
||||
token: TokenKind,
|
||||
child: Box<Expression>,
|
||||
},
|
||||
Parenthesis(Box<Expression>),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Expression {
|
||||
pub kind: ExpressionKind,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
pub fn new(kind: ExpressionKind, span: Span) -> Self {
|
||||
Self { kind, span }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ExpressionKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
use ExpressionKind as e;
|
||||
match self {
|
||||
e::Integer(i) => write!(f, "{i}"),
|
||||
e::Binary { token, left, right } => {
|
||||
write!(f, "({left:?} {token:?} {right:?})")
|
||||
},
|
||||
e::Parenthesis(inner) => write!(f, "{inner:?}"),
|
||||
e::Unary { token, child } => {
|
||||
write!(f, "({token:?} {child:?})")
|
||||
},
|
||||
e::Real(fp) => write!(f, "{fp}"),
|
||||
e::String(s) => write!(f, r#""{s}""#),
|
||||
e::Identifier(i) => write!(f, "{i}"),
|
||||
e::Boolean(b) => write!(f, "{b}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Expression {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self.kind)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum StatementKind {
|
||||
Mutable {
|
||||
name: String,
|
||||
type_: Option<String>,
|
||||
value: Option<Expression>,
|
||||
},
|
||||
Immutable {
|
||||
name: String,
|
||||
type_: Option<String>,
|
||||
value: Expression,
|
||||
},
|
||||
Assignment {
|
||||
name: String,
|
||||
value: Expression,
|
||||
},
|
||||
If {
|
||||
predicate: Expression,
|
||||
block: Vec<Statement>,
|
||||
else_: Option<Box<Statement>>,
|
||||
},
|
||||
While {
|
||||
predicate: Expression,
|
||||
block: Vec<Statement>,
|
||||
},
|
||||
Print(Expression),
|
||||
Expression(Expression),
|
||||
Block(Vec<Statement>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Statement {
|
||||
pub kind: StatementKind,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
pub type Precedence = usize;
|
||||
|
||||
fn binary_prec(tok: &TokenKind) -> Result<(Precedence, bool)> {
|
||||
use TokenKind::*;
|
||||
Ok(match tok {
|
||||
Star | Slash | Percent => (10, false),
|
||||
Plus | Minus => (9, false),
|
||||
And | Nand => (8, false),
|
||||
Xor | Xnor => (7, false),
|
||||
Or | Nor => (6, false),
|
||||
DoubleEqual | BangEqual | Less | LessEqual | Greater | GreaterEqual => {
|
||||
(5, false)
|
||||
},
|
||||
//Colon => Some((5, false)),
|
||||
_ => {
|
||||
return error()
|
||||
.reason(format!("{:?} is not a valid binary operator", tok));
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn unary_prefix_prec(tok: &TokenKind) -> Result<Precedence> {
|
||||
use TokenKind::*;
|
||||
Ok(match tok {
|
||||
Minus | Not => 11,
|
||||
Break => 3,
|
||||
_ => {
|
||||
return error()
|
||||
.reason(format!("{tok:?} is not a valid prefix unary operator"));
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn unary_postfix_prec(tok: &TokenKind) -> Result<Precedence> {
|
||||
use TokenKind::*;
|
||||
Ok(match tok {
|
||||
Question => 12,
|
||||
Bang => 12,
|
||||
_ => {
|
||||
return error()
|
||||
.reason(format!("{tok:?} is not a valid postfix unary operator"));
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
const PARSER_LOOKAHEAD: usize = 3;
|
||||
|
||||
type TokenIter<I> = crate::Window<PARSER_LOOKAHEAD, Token, I>;
|
||||
|
||||
pub struct Parser<I: Iterator<Item = Token>> {
|
||||
iter: TokenIter<I>,
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = Token>> Parser<I> {
|
||||
pub fn new(iter: I) -> Self {
|
||||
Self {
|
||||
iter: TokenIter::new(iter),
|
||||
}
|
||||
}
|
||||
|
||||
fn skip(&mut self, n: usize) {
|
||||
for _ in 0..n {
|
||||
let _ = self.next();
|
||||
}
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Result<Token> {
|
||||
self.iter.next().reason("Unexpected end of file")
|
||||
}
|
||||
|
||||
fn peek(&self, n: usize) -> Result<Token> {
|
||||
self.iter.peek(n).clone().reason("Unexpected end of file")
|
||||
}
|
||||
|
||||
fn eat(&mut self, expect: TokenKind) -> Result<Token> {
|
||||
match self.look(expect) {
|
||||
Ok(t) => {
|
||||
self.skip(1);
|
||||
Ok(t)
|
||||
},
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
fn look(&mut self, expect: TokenKind) -> Result<Token> {
|
||||
let next = self.peek(0)?;
|
||||
if next.0 == expect {
|
||||
Ok(next)
|
||||
} else {
|
||||
error()
|
||||
.reason(format!("Expected {expect:?}, found {:?}", next.0))
|
||||
.span(&next.1)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn file(&mut self) -> Result<Vec<Statement>> {
|
||||
use TokenKind as t;
|
||||
let mut statements = vec![];
|
||||
loop {
|
||||
// Trim extra ;
|
||||
while self.eat(t::Semicolon).is_ok() {}
|
||||
if self.eat(t::EOF).is_ok() {
|
||||
return Ok(statements);
|
||||
}
|
||||
match self.statement() {
|
||||
Ok(s) => statements.push(s),
|
||||
Err(e) => {
|
||||
return Err(e);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn statement(&mut self) -> Result<Statement> {
|
||||
use StatementKind as s;
|
||||
use TokenKind as t;
|
||||
let next = self.peek(0);
|
||||
let next2 = self.peek(1);
|
||||
let statement = match (next, next2) {
|
||||
// (im)mutable declaration
|
||||
(Ok(Token(t::Identifier(name), span)), Ok(Token(t::Colon, _))) => {
|
||||
self.skip(2);
|
||||
let type_ = match self.eat(t::Identifier("".into())) {
|
||||
Ok(Token(t::Identifier(s), _)) => Some(s),
|
||||
_ => None,
|
||||
};
|
||||
match self.eat(t::Equal).or_else(|_| self.eat(t::Colon)) {
|
||||
Ok(Token(t::Colon, _)) => Statement {
|
||||
kind: s::Immutable {
|
||||
name,
|
||||
type_,
|
||||
value: self
|
||||
.expression(0)
|
||||
.trace("while parsing immutable declaration")?,
|
||||
},
|
||||
span,
|
||||
},
|
||||
Ok(Token(t::Equal, _)) => Statement {
|
||||
kind: s::Mutable {
|
||||
name,
|
||||
type_,
|
||||
value: Some(
|
||||
self
|
||||
.expression(0)
|
||||
.trace("while parsing mutable declaration")?,
|
||||
),
|
||||
},
|
||||
span,
|
||||
},
|
||||
_ => return error().reason("Expected expression here"),
|
||||
}
|
||||
},
|
||||
(Ok(Token(t::Identifier(name), span)), Ok(Token(t::Equal, _))) => {
|
||||
self.skip(2);
|
||||
let value = self
|
||||
.expression(0)
|
||||
.trace("while parsing assignment expression")?;
|
||||
Statement {
|
||||
kind: s::Assignment { name, value },
|
||||
span,
|
||||
}
|
||||
},
|
||||
// If
|
||||
(Ok(Token(t::If, span)), _) => {
|
||||
self.skip(1);
|
||||
let predicate = self
|
||||
.expression(0)
|
||||
.reason("Expected predicate after 'if' keyword")
|
||||
.span(&span)?;
|
||||
let block = self.block().trace("while parsing if statement")?;
|
||||
return Ok(Statement {
|
||||
span,
|
||||
kind: s::If {
|
||||
predicate,
|
||||
block: block.0,
|
||||
else_: None,
|
||||
},
|
||||
});
|
||||
},
|
||||
// While
|
||||
(Ok(Token(t::While, span)), _) => {
|
||||
self.skip(1);
|
||||
let predicate = self
|
||||
.expression(0)
|
||||
.reason("Expected predicate after 'while' keyword")
|
||||
.span(&span)?;
|
||||
let block = self.block().trace("while parsing while statement")?;
|
||||
return Ok(Statement {
|
||||
span,
|
||||
kind: s::While {
|
||||
predicate,
|
||||
block: block.0,
|
||||
},
|
||||
});
|
||||
},
|
||||
// (DEBUG) print
|
||||
(Ok(Token(t::Print, span)), _) => {
|
||||
self.skip(1);
|
||||
let expr = self.expression(0).trace("while parsing print statement")?;
|
||||
Statement {
|
||||
span: span + expr.span,
|
||||
kind: s::Print(expr),
|
||||
}
|
||||
},
|
||||
// Block
|
||||
(Ok(Token(t::LeftBrace, _)), _) => {
|
||||
// Skip check for semicolon
|
||||
let (block, span) =
|
||||
self.block().trace("while parsing block statement")?;
|
||||
return Ok(Statement {
|
||||
kind: s::Block(block),
|
||||
span,
|
||||
});
|
||||
},
|
||||
// Expression
|
||||
_ => {
|
||||
let expr = self
|
||||
.expression(0)
|
||||
.trace("while parsing expression statement")?;
|
||||
Statement {
|
||||
span: expr.span,
|
||||
kind: s::Expression(expr),
|
||||
}
|
||||
},
|
||||
};
|
||||
// Check for semicolon
|
||||
if self.eat(t::Semicolon).is_ok() {
|
||||
Ok(statement)
|
||||
} else {
|
||||
error().reason("Expected ;")
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expression(
|
||||
&mut self,
|
||||
mut precedence: Precedence,
|
||||
) -> Result<Expression> {
|
||||
use ExpressionKind as e;
|
||||
use TokenKind as t;
|
||||
let next = self.peek(0)?;
|
||||
// Unary prefix expression
|
||||
let mut current = if let Ok(p) = unary_prefix_prec(&next.0) {
|
||||
let operator = self.next().expect("unreachable");
|
||||
let child = self
|
||||
.expression(p)
|
||||
.trace(format!("while parsing unary {:?}", operator.0))
|
||||
.span(&operator.1)?;
|
||||
let span = child.span + operator.1;
|
||||
Expression::new(
|
||||
e::Unary {
|
||||
token: operator.0,
|
||||
child: child.into(),
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
// Terminal or paren
|
||||
else {
|
||||
self.primary()?
|
||||
};
|
||||
// Precedence climbing loop
|
||||
while let Ok(next) = self.peek(0) {
|
||||
// Binary infix
|
||||
if let Ok((new_precedence, left_assoc)) = binary_prec(&next.0) {
|
||||
if (!left_assoc && new_precedence <= precedence)
|
||||
|| (new_precedence < precedence)
|
||||
{
|
||||
return Ok(current);
|
||||
}
|
||||
let operator = self.next().expect("unreachable");
|
||||
let rhs = self
|
||||
.expression(new_precedence)
|
||||
.trace(format!("while parsing binary {:?}", operator.0))
|
||||
.span(&operator.1)?;
|
||||
let span = next.1 + rhs.span;
|
||||
current = Expression::new(
|
||||
e::Binary {
|
||||
token: operator.0,
|
||||
left: current.into(),
|
||||
right: rhs.into(),
|
||||
},
|
||||
span,
|
||||
);
|
||||
}
|
||||
// Unary postfix
|
||||
else if let Ok(new_precedence) = unary_postfix_prec(&next.0) {
|
||||
let operator = self.next().expect("unreachable");
|
||||
let span = next.1 + operator.1;
|
||||
precedence = new_precedence;
|
||||
current = Expression::new(
|
||||
e::Unary {
|
||||
token: operator.0,
|
||||
child: current.into(),
|
||||
},
|
||||
span,
|
||||
);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(current)
|
||||
}
|
||||
|
||||
fn primary(&mut self) -> Result<Expression> {
|
||||
use ExpressionKind as e;
|
||||
use TokenKind as t;
|
||||
let next = self.peek(0)?;
|
||||
let span = next.1;
|
||||
let kind = match next.0 {
|
||||
t::IntegerLiteral(i) => e::Integer(i),
|
||||
t::FloatLiteral(f) => e::Real(f),
|
||||
t::StringLiteral(s) => e::String(s),
|
||||
t::True => e::Boolean(true),
|
||||
t::Identifier(i) => e::Identifier(i),
|
||||
t::LeftParen => {
|
||||
self.eat(t::LeftParen).expect("unreachable");
|
||||
let expr = self
|
||||
.expression(0)
|
||||
.trace("while parsing parenthesized expression")?;
|
||||
self
|
||||
.look(t::RightParen)
|
||||
.reason("Unclosed '('")
|
||||
.span(&expr.span)?;
|
||||
e::Parenthesis(expr.into())
|
||||
},
|
||||
_ => {
|
||||
return error()
|
||||
.span(&span)
|
||||
.reason(format!("Expected primary, found {:?}", next.0));
|
||||
},
|
||||
};
|
||||
self.skip(1);
|
||||
Ok(Expression { kind, span })
|
||||
}
|
||||
|
||||
fn block(&mut self) -> Result<(Vec<Statement>, Span)> {
|
||||
use TokenKind as t;
|
||||
let mut span = self.eat(t::LeftBrace).reason("Expected block")?.1;
|
||||
let mut statements = vec![];
|
||||
loop {
|
||||
let next = self.peek(0)?;
|
||||
span = span + next.1;
|
||||
match self.eat(t::RightBrace) {
|
||||
Ok(t) => {
|
||||
span = span + t.1;
|
||||
break;
|
||||
},
|
||||
_ => {
|
||||
let statement = self.statement()?;
|
||||
span = span + statement.span;
|
||||
statements.push(statement);
|
||||
},
|
||||
};
|
||||
}
|
||||
Ok((statements, span))
|
||||
}
|
||||
}
|
634
src/token.rs
634
src/token.rs
|
@ -1,22 +1,38 @@
|
|||
use crate::err::*;
|
||||
use crate::Span;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum TokenType {
|
||||
// Symbols
|
||||
pub enum TokenKind {
|
||||
LeftParen,
|
||||
RightParen,
|
||||
LeftSquare,
|
||||
RightSquare,
|
||||
LeftBrace,
|
||||
RightBrace,
|
||||
LeftSquare,
|
||||
RightSquare,
|
||||
|
||||
Comma,
|
||||
Colon,
|
||||
Semicolon,
|
||||
|
||||
Dot,
|
||||
DotDot,
|
||||
Plus,
|
||||
Minus,
|
||||
Star,
|
||||
Slash,
|
||||
Semicolon,
|
||||
Star,
|
||||
Percent,
|
||||
Arrow,
|
||||
FatArrow,
|
||||
PlusEqual,
|
||||
MinusEqual,
|
||||
SlashEqual,
|
||||
StarEqual,
|
||||
PercentEqual,
|
||||
|
||||
Bang,
|
||||
BangEqual,
|
||||
Question,
|
||||
QuestionEqual,
|
||||
Equal,
|
||||
DoubleEqual,
|
||||
Greater,
|
||||
|
@ -24,187 +40,463 @@ pub enum TokenType {
|
|||
Less,
|
||||
LessEqual,
|
||||
|
||||
// Literals
|
||||
String,
|
||||
Character,
|
||||
Number(f64),
|
||||
Pipe,
|
||||
Ampersand,
|
||||
Carrot,
|
||||
Hash,
|
||||
|
||||
DotDotEqual,
|
||||
|
||||
Identifier(String),
|
||||
StringLiteral(String),
|
||||
CharLiteral(char),
|
||||
IntegerLiteral(i64),
|
||||
FloatLiteral(f64),
|
||||
|
||||
// Words
|
||||
Ident,
|
||||
And,
|
||||
Or,
|
||||
Self_,
|
||||
Struct,
|
||||
True,
|
||||
False,
|
||||
Fn,
|
||||
If,
|
||||
Else,
|
||||
Nil,
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
Not,
|
||||
Nand,
|
||||
Nor,
|
||||
Xnor,
|
||||
Print,
|
||||
Break,
|
||||
Return,
|
||||
Super,
|
||||
Let,
|
||||
While,
|
||||
Continue,
|
||||
For,
|
||||
While,
|
||||
True,
|
||||
False,
|
||||
Struct,
|
||||
Enum,
|
||||
Union,
|
||||
|
||||
// Special
|
||||
Unrecognized,
|
||||
TooLong,
|
||||
Whitespace(String),
|
||||
SmallComment(String),
|
||||
BigComment(String),
|
||||
|
||||
Idk,
|
||||
EOF,
|
||||
}
|
||||
|
||||
/// Type, index
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token {
|
||||
pub ttype: TokenType,
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
impl TokenKind {
|
||||
pub fn is_meaningful(&self) -> bool {
|
||||
match self {
|
||||
Self::Whitespace(_)
|
||||
| Self::SmallComment(_)
|
||||
| Self::BigComment(_)
|
||||
| Self::Idk => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize(input: &str) -> Vec<Token> {
|
||||
let input_str = input;
|
||||
let mut input = input.char_indices().peekable();
|
||||
let mut tokens = vec![];
|
||||
'outer: loop {
|
||||
// Find next non-whitespace line
|
||||
let (start, c) = 'ws: loop {
|
||||
match input.next() {
|
||||
// Stop at end of input
|
||||
None => break 'outer,
|
||||
Some((index, character)) if !character.is_whitespace() => {
|
||||
break 'ws (index, character)
|
||||
impl PartialEq for TokenKind {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
std::mem::discriminant(self) == std::mem::discriminant(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for TokenKind {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Token(pub TokenKind, pub Span);
|
||||
|
||||
fn t(tk: TokenKind, sp: Span) -> Result<Token> {
|
||||
Ok(Token(tk, sp))
|
||||
}
|
||||
|
||||
const TOKENIZER_LOOKAHEAD: usize = 2;
|
||||
|
||||
type CharIter<I> = crate::Window<TOKENIZER_LOOKAHEAD, char, I>;
|
||||
|
||||
pub struct Tokenizer<I: Iterator<Item = char>> {
|
||||
iter: CharIter<I>,
|
||||
column: usize,
|
||||
row: usize,
|
||||
finished: bool,
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = char>> Tokenizer<I> {
|
||||
pub fn new(iter: I) -> Self {
|
||||
Self {
|
||||
iter: CharIter::new(iter),
|
||||
column: 1,
|
||||
row: 1,
|
||||
finished: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn span(&self) -> Span {
|
||||
Span {
|
||||
column: self.column,
|
||||
row: self.row,
|
||||
}
|
||||
}
|
||||
|
||||
fn next_char(&mut self) -> Option<char> {
|
||||
match self.iter.next() {
|
||||
Some(c) if c == '\n' => {
|
||||
self.row += 1;
|
||||
self.column = 1;
|
||||
Some(c)
|
||||
},
|
||||
_ => {},
|
||||
Some(c) => {
|
||||
self.column += 1;
|
||||
Some(c)
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn delimited(&mut self, terminator: char) -> Option<String> {
|
||||
let mut buffer = String::new();
|
||||
let mut escape = false;
|
||||
loop {
|
||||
let c = match self.next_char() {
|
||||
Some(c) if c == terminator && !escape => {
|
||||
break;
|
||||
},
|
||||
Some(c) => {
|
||||
if c == '\\' {
|
||||
escape = !escape;
|
||||
} else {
|
||||
escape = false;
|
||||
}
|
||||
c
|
||||
},
|
||||
None => return None,
|
||||
};
|
||||
let mut end = start + 1;
|
||||
let mut advance = || {};
|
||||
let ttype = match c {
|
||||
buffer.push(c)
|
||||
}
|
||||
Some(buffer)
|
||||
}
|
||||
|
||||
fn peek(&mut self, n: usize) -> Option<char> {
|
||||
self.iter.peek(n).clone()
|
||||
}
|
||||
|
||||
fn _next(&mut self) -> Result<Token> {
|
||||
use TokenKind::*;
|
||||
let position = Span {
|
||||
row: self.row,
|
||||
column: self.column,
|
||||
};
|
||||
let current = match self.next_char() {
|
||||
Some(c) => c,
|
||||
None => return t(EOF, position),
|
||||
};
|
||||
// Parse whitespace
|
||||
if current.is_whitespace() {
|
||||
let mut buffer = String::from(current);
|
||||
while let Some(c) = self.peek(0) {
|
||||
if !c.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
_ = self.next_char();
|
||||
buffer.push(c.clone());
|
||||
}
|
||||
return t(Whitespace(buffer), position);
|
||||
}
|
||||
// Parse multiline comments
|
||||
if let ('/', Some('*')) = (current, self.peek(0)) {
|
||||
let _ = self.next_char();
|
||||
let mut comment_level = 1;
|
||||
let mut buffer = String::new();
|
||||
while let Some(current) = self.next_char() {
|
||||
// Ignore /* */ inside strings
|
||||
if '\"' == current {
|
||||
if let Some(inner_string) = self.delimited('\"') {
|
||||
buffer.push('\"');
|
||||
buffer.push_str(&inner_string);
|
||||
buffer.push('\"');
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if let ('/', Some('*')) = (current, self.peek(0)) {
|
||||
comment_level += 1;
|
||||
} else if let ('*', Some('/')) = (current, self.peek(0)) {
|
||||
comment_level -= 1;
|
||||
}
|
||||
if comment_level == 0 {
|
||||
let _ = self.next_char();
|
||||
break;
|
||||
}
|
||||
buffer.push(current);
|
||||
}
|
||||
return t(BigComment(buffer), position);
|
||||
}
|
||||
// Parse single line comments
|
||||
if let ('/', Some('/')) = (current, self.peek(0)) {
|
||||
let _ = self.next_char();
|
||||
let mut buffer = String::new();
|
||||
while let Some(c) = self.next_char() {
|
||||
if c == '\n' {
|
||||
break;
|
||||
}
|
||||
buffer.push(c);
|
||||
}
|
||||
return t(SmallComment(buffer), position);
|
||||
}
|
||||
let next = self.peek(0);
|
||||
let next_next = self.peek(1);
|
||||
// Match single character tokens
|
||||
'(' => TokenType::LeftParen,
|
||||
')' => TokenType::RightParen,
|
||||
'[' => TokenType::LeftSquare,
|
||||
']' => TokenType::RightSquare,
|
||||
'{' => TokenType::LeftBrace,
|
||||
'}' => TokenType::RightBrace,
|
||||
',' => TokenType::Comma,
|
||||
'.' => TokenType::Dot,
|
||||
'+' => TokenType::Plus,
|
||||
'-' => TokenType::Minus,
|
||||
'*' => TokenType::Star,
|
||||
'/' => TokenType::Slash,
|
||||
';' => TokenType::Semicolon,
|
||||
// Match multicharacter tokens
|
||||
'!' => match input.peek() {
|
||||
Some((_, '=')) => {
|
||||
input.next();
|
||||
end += 1;
|
||||
TokenType::BangEqual
|
||||
},
|
||||
_ => TokenType::Bang,
|
||||
},
|
||||
'=' => match input.peek() {
|
||||
Some((_, '=')) => {
|
||||
input.next();
|
||||
end += 1;
|
||||
TokenType::DoubleEqual
|
||||
},
|
||||
_ => TokenType::Equal,
|
||||
},
|
||||
'<' => match input.peek() {
|
||||
Some((_, '=')) => {
|
||||
input.next();
|
||||
end += 1;
|
||||
TokenType::GreaterEqual
|
||||
},
|
||||
_ => TokenType::Greater,
|
||||
},
|
||||
'>' => match input.peek() {
|
||||
Some((_, '=')) => {
|
||||
input.next();
|
||||
end += 1;
|
||||
TokenType::LessEqual
|
||||
},
|
||||
_ => TokenType::Less,
|
||||
},
|
||||
// Match keywords, identifiers, and literals
|
||||
c if c.is_alphanumeric() => 'case: {
|
||||
// Scan full word
|
||||
while let Some((new_end, next)) = input.peek() {
|
||||
if next.is_alphanumeric() || *next == '_' {
|
||||
let _ = input.next();
|
||||
} else {
|
||||
end = *new_end;
|
||||
break;
|
||||
}
|
||||
}
|
||||
let word = &input_str[start..end];
|
||||
// Attempt to parse hex literal
|
||||
if let Some(s) =
|
||||
word.strip_prefix("0x").or_else(|| word.strip_prefix("0X"))
|
||||
{
|
||||
if let Ok(n) = u64::from_str_radix(s, 16) {
|
||||
break 'case TokenType::Number(n as f64);
|
||||
} else {
|
||||
break 'case TokenType::Unrecognized;
|
||||
}
|
||||
}
|
||||
// Attempt to parse binary literal
|
||||
if let Some(s) =
|
||||
word.strip_prefix("0b").or_else(|| word.strip_prefix("0B"))
|
||||
{
|
||||
if let Ok(n) = u64::from_str_radix(s, 2) {
|
||||
break 'case TokenType::Number(n as f64);
|
||||
} else {
|
||||
break 'case TokenType::Unrecognized;
|
||||
}
|
||||
}
|
||||
// Attempt to parse decimal literal
|
||||
if let Ok(f) = word.parse::<f64>() {
|
||||
break 'case TokenType::Number(f);
|
||||
}
|
||||
// Parse keyword or ident
|
||||
match word {
|
||||
"and" => TokenType::And,
|
||||
"or" => TokenType::Or,
|
||||
"self" => TokenType::Self_,
|
||||
"struct" => TokenType::Struct,
|
||||
"true" => TokenType::True,
|
||||
"false" => TokenType::False,
|
||||
"fn" => TokenType::Fn,
|
||||
"if" => TokenType::If,
|
||||
"else" => TokenType::Else,
|
||||
"nil" => TokenType::Nil,
|
||||
"print" => TokenType::Print,
|
||||
"return" => TokenType::Return,
|
||||
"super" => TokenType::Super,
|
||||
"let" => TokenType::Let,
|
||||
"while" => TokenType::While,
|
||||
"for" => TokenType::For,
|
||||
_ => TokenType::Ident,
|
||||
}
|
||||
},
|
||||
// Parse string
|
||||
'"' => {
|
||||
while let Some((new_end, next)) = input.next() {
|
||||
match next {
|
||||
'"' => {
|
||||
end = new_end + 1;
|
||||
break;
|
||||
},
|
||||
// Skip escapes and deal with them later
|
||||
'\\' => {
|
||||
let _ = input.next();
|
||||
},
|
||||
_ => {},
|
||||
}
|
||||
}
|
||||
TokenType::String
|
||||
},
|
||||
// Parse character
|
||||
_ => TokenType::Unrecognized,
|
||||
let not_next = move |c| Some(c) != next;
|
||||
let kind = match current {
|
||||
'(' => LeftParen,
|
||||
')' => RightParen,
|
||||
'{' => LeftBrace,
|
||||
'}' => RightBrace,
|
||||
'[' => LeftSquare,
|
||||
']' => RightSquare,
|
||||
',' => Comma,
|
||||
':' => Colon,
|
||||
';' => Semicolon,
|
||||
'|' => Pipe,
|
||||
'&' => Ampersand,
|
||||
'^' => Carrot,
|
||||
'#' => Hash,
|
||||
'.' if not_next('.') => Dot,
|
||||
'+' if not_next('=') => Plus,
|
||||
'-' if not_next('=') && not_next('>') => Minus,
|
||||
'*' if not_next('=') => Star,
|
||||
'/' if not_next('=') => Slash,
|
||||
'%' if not_next('=') => Percent,
|
||||
'!' if not_next('=') => Bang,
|
||||
'?' if not_next('=') => Question,
|
||||
'=' if not_next('=') && not_next('>') => Equal,
|
||||
'<' if not_next('=') => Less,
|
||||
'>' if not_next('=') => Greater,
|
||||
_ => Idk,
|
||||
};
|
||||
if kind != Idk {
|
||||
return t(kind, position);
|
||||
};
|
||||
tokens.push(Token { ttype, start, end });
|
||||
}
|
||||
tokens
|
||||
// Match two character tokens
|
||||
if let Some(next) = next {
|
||||
let not_next_next = move |c| Some(c) != next_next;
|
||||
let kind = match (current, next) {
|
||||
('.', '.') if not_next_next('=') => DotDot,
|
||||
('+', '=') => PlusEqual,
|
||||
('-', '=') => MinusEqual,
|
||||
('*', '=') => StarEqual,
|
||||
('/', '=') => SlashEqual,
|
||||
('%', '=') => PercentEqual,
|
||||
('=', '=') => DoubleEqual,
|
||||
('?', '=') => QuestionEqual,
|
||||
('!', '=') => BangEqual,
|
||||
('<', '=') => LessEqual,
|
||||
('>', '=') => GreaterEqual,
|
||||
('-', '>') => Arrow,
|
||||
('=', '>') => FatArrow,
|
||||
_ => Idk,
|
||||
};
|
||||
if kind != Idk {
|
||||
let _ = self.next();
|
||||
return t(kind, position);
|
||||
}
|
||||
}
|
||||
// Match three character tokens
|
||||
if let (Some(next), Some(next_next)) = (next, next_next) {
|
||||
let kind = match (current, next, next_next) {
|
||||
('.', '.', '=') => DotDotEqual,
|
||||
_ => Idk,
|
||||
};
|
||||
if kind != Idk {
|
||||
let _ = self.next();
|
||||
let _ = self.next();
|
||||
return t(kind, position);
|
||||
}
|
||||
}
|
||||
let mut buffer = String::new();
|
||||
// Match character
|
||||
if current == '\'' {
|
||||
let buffer = self
|
||||
.delimited('\'')
|
||||
.reason("Single quote (') was opened, but never closed")
|
||||
.span(&position)?;
|
||||
let baked = bake_string(&buffer)?;
|
||||
if baked.len() != 1 {
|
||||
return error()
|
||||
.reason("Single quote (') contains more than one character")
|
||||
.span(&position);
|
||||
}
|
||||
let kind = CharLiteral(
|
||||
baked
|
||||
.chars()
|
||||
.next()
|
||||
.reason("Single quote (') contains no characters")
|
||||
.span(&position)?,
|
||||
);
|
||||
return t(kind, position);
|
||||
}
|
||||
// Match string
|
||||
if current == '"' {
|
||||
let buffer = self
|
||||
.delimited('\"')
|
||||
.reason("Double quote (\") was opened, but never closed")
|
||||
.span(&position)?;
|
||||
let kind = StringLiteral(bake_string(&buffer)?);
|
||||
return t(kind, position);
|
||||
}
|
||||
buffer.push(current);
|
||||
// Match number
|
||||
if current.is_ascii_digit() {
|
||||
// Only one dot per number
|
||||
let mut encountered_dot = false;
|
||||
while let Some(c) = self.peek(0) {
|
||||
if c == '.' && !encountered_dot {
|
||||
if let Some('.') = self.peek(1) {
|
||||
break;
|
||||
}
|
||||
encountered_dot = true;
|
||||
} else if !(c == '_' || c == 'x' || c.is_ascii_hexdigit()) {
|
||||
break;
|
||||
}
|
||||
buffer.push(c);
|
||||
let _ = self.next_char();
|
||||
}
|
||||
return t(parse_number(&buffer).span(&position)?, position);
|
||||
}
|
||||
// Match keyword or identifier
|
||||
while let Some(c) = self.peek(0) {
|
||||
if c.is_alphanumeric() || c == '_' {
|
||||
let _ = self.next_char();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
buffer.push(c);
|
||||
}
|
||||
// Match keywords
|
||||
{
|
||||
let kind = match buffer.as_str() {
|
||||
"if" => If,
|
||||
"else" => Else,
|
||||
"and" => And,
|
||||
"or" => Or,
|
||||
"xor" => Xor,
|
||||
"nand" => Nand,
|
||||
"nor" => Nor,
|
||||
"xnor" => Xnor,
|
||||
"for" => For,
|
||||
"while" => While,
|
||||
"print" => Print,
|
||||
"break" => Break,
|
||||
"return" => Return,
|
||||
"continue" => Continue,
|
||||
"not" => Not,
|
||||
"true" => True,
|
||||
"false" => False,
|
||||
"struct" => Struct,
|
||||
"enum" => Enum,
|
||||
"union" => Union,
|
||||
_ => Identifier(buffer),
|
||||
};
|
||||
return t(kind, position);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = char>> Iterator for Tokenizer<I> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
match self._next() {
|
||||
Ok(Token(TokenKind::EOF, span)) => {
|
||||
if self.finished {
|
||||
return None;
|
||||
} else {
|
||||
self.finished = true;
|
||||
return Some(Token(TokenKind::EOF, span));
|
||||
}
|
||||
},
|
||||
Ok(r) => return Some(r),
|
||||
_ => {},
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_number(num: &str) -> Result<TokenKind> {
|
||||
use TokenKind::*;
|
||||
let num = num.replace('_', "");
|
||||
// Floating point (only decimal)
|
||||
if num.contains('.') {
|
||||
num.parse::<f64>().map(|f| FloatLiteral(f)).coerce()
|
||||
}
|
||||
// Hex integer
|
||||
else if let Some(hex) = num.strip_prefix("0x") {
|
||||
i64::from_str_radix(hex, 16)
|
||||
.map(|i| IntegerLiteral(i))
|
||||
.coerce()
|
||||
}
|
||||
// Octal integer
|
||||
else if let Some(oct) = num.strip_prefix("0o") {
|
||||
i64::from_str_radix(oct, 8)
|
||||
.map(|i| IntegerLiteral(i))
|
||||
.coerce()
|
||||
}
|
||||
// Binary integer
|
||||
else if let Some(bin) = num.strip_prefix("0b") {
|
||||
i64::from_str_radix(bin, 2)
|
||||
.map(|i| IntegerLiteral(i))
|
||||
.coerce()
|
||||
}
|
||||
// Decimal integer
|
||||
else {
|
||||
num.parse::<i64>().map(|i| IntegerLiteral(i)).coerce()
|
||||
}
|
||||
}
|
||||
|
||||
fn bake_string(s: &str) -> Result<String> {
|
||||
let mut baked = String::with_capacity(s.len());
|
||||
let mut it = s.chars();
|
||||
loop {
|
||||
match it.next() {
|
||||
Some('\\') => baked.push(match it.next() {
|
||||
Some('n') => '\n', // New line
|
||||
Some('r') => '\r', // Carriage return
|
||||
Some('t') => '\t', // Tab
|
||||
Some('b') => '\x08', // Backspace
|
||||
Some('\\') => '\\', // Backslash
|
||||
Some('\0') => '\0', // Null
|
||||
Some('"') => '\"', // Double quote
|
||||
Some('\'') => '\'', // Single quote
|
||||
Some('x') => {
|
||||
// Ascii escapes
|
||||
let mut a = || {
|
||||
let a = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
|
||||
let b = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
|
||||
let num = (a << 4) | b;
|
||||
char::from_u32(num)
|
||||
};
|
||||
a().reason(format!("Found invalid ASCII (\\aXX) escape sequence"))?
|
||||
},
|
||||
Some('u') => {
|
||||
// Unicode escapes
|
||||
let mut a = || {
|
||||
let a = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
|
||||
let b = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
|
||||
let c = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
|
||||
let d = u32::from_str_radix(&it.next()?.to_string(), 16).ok()?;
|
||||
let num = (a << 12) | (b << 8) | (c << 4) | d;
|
||||
char::from_u32(num)
|
||||
};
|
||||
a().reason("Found invalid Unicode (\\uXXXX) escape sequence")?
|
||||
},
|
||||
_ => return Err(Diagnostic::new("Found invalid escape sequence")),
|
||||
}),
|
||||
// Unremarkable character
|
||||
Some(c) => baked.push(c),
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
Ok(baked)
|
||||
}
|
||||
|
|
280
src/treewalk.rs
Normal file
280
src/treewalk.rs
Normal file
|
@ -0,0 +1,280 @@
|
|||
use crate::err::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{
|
||||
Expression, ExpressionKind, Statement, StatementKind, Token, TokenKind,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Value {
|
||||
Integer(i64),
|
||||
Real(f64),
|
||||
String(String),
|
||||
Boolean(bool),
|
||||
Undefined,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct Scope {
|
||||
outer: Option<Box<Scope>>,
|
||||
declarations: HashMap<String, Value>,
|
||||
}
|
||||
|
||||
impl Scope {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
outer: None,
|
||||
declarations: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn enscope(&mut self) -> &mut Self {
|
||||
*self = Self {
|
||||
outer: Some(Box::new(self.clone())),
|
||||
declarations: HashMap::new(),
|
||||
};
|
||||
self
|
||||
}
|
||||
|
||||
fn descope(&mut self) -> &mut Self {
|
||||
if let Some(outer) = &self.outer {
|
||||
*self = *outer.clone();
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn declare(&mut self, key: String) -> Result<()> {
|
||||
if self.declarations.contains_key(&key) {
|
||||
return error()
|
||||
.reason(format!("Re-declaration of '{key}' in same scope"));
|
||||
}
|
||||
self.declarations.insert(key, Value::Undefined);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn assign(&mut self, key: String, value: Value) -> Result<()> {
|
||||
if !self.declarations.contains_key(&key) {
|
||||
if let Some(outer) = &mut self.outer {
|
||||
return outer.assign(key, value);
|
||||
}
|
||||
return error()
|
||||
.reason(format!("Assignemnt to '{key}' before declaration"));
|
||||
}
|
||||
self.declarations.insert(key, value);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn access(&self, key: String) -> Result<Value> {
|
||||
match self.declarations.get(&key) {
|
||||
Some(v) => Ok(v.clone()),
|
||||
None => {
|
||||
if let Some(outer) = &self.outer {
|
||||
outer.access(key)
|
||||
} else {
|
||||
error().reason(format!("'{key}' was never declared"))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Interpreter<I: Iterator<Item = Statement>> {
|
||||
scope: Scope,
|
||||
iter: I,
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = Statement>> Interpreter<I> {
|
||||
pub fn new(iter: I) -> Self {
|
||||
Self {
|
||||
scope: Scope::new(),
|
||||
iter,
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate_unary(
|
||||
&mut self,
|
||||
token: TokenKind,
|
||||
child: Expression,
|
||||
) -> Result<Value> {
|
||||
use TokenKind as t;
|
||||
use Value as v;
|
||||
let val = self.evaluate(child)?;
|
||||
Ok(match val {
|
||||
v::Integer(i) => v::Integer(match token {
|
||||
t::Plus => i,
|
||||
t::Minus => -i,
|
||||
_ => {
|
||||
return error()
|
||||
.reason(format!("Unary {token:?} is undefined for integers"));
|
||||
},
|
||||
}),
|
||||
v::Real(r) => v::Real(match token {
|
||||
t::Plus => r,
|
||||
t::Minus => -r,
|
||||
_ => {
|
||||
return error()
|
||||
.reason(format!("Unary {token:?} is undefined for reals"));
|
||||
},
|
||||
}),
|
||||
v::Boolean(b) => v::Boolean(match token {
|
||||
t::Not => !b,
|
||||
_ => {
|
||||
return error()
|
||||
.reason(format!("Unary {token:?} is undefined for booleans"));
|
||||
},
|
||||
}),
|
||||
_ => {
|
||||
return error()
|
||||
.reason(format!("Binary {token:?} is undefined for {val:?}",));
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn evaluate_binary(
|
||||
&mut self,
|
||||
token: TokenKind,
|
||||
left: Expression,
|
||||
right: Expression,
|
||||
) -> Result<Value> {
|
||||
use TokenKind as t;
|
||||
use Value::*;
|
||||
let left = self.evaluate(left)?;
|
||||
let right = self.evaluate(right)?;
|
||||
Ok(match (left.clone(), right.clone()) {
|
||||
(Integer(l), Integer(r)) => match token {
|
||||
t::Plus => Integer(l + r),
|
||||
t::Minus => Integer(l - r),
|
||||
t::Star => Integer(l * r),
|
||||
t::Slash => Integer(l / r),
|
||||
t::Percent => Integer(l % r),
|
||||
t::DoubleEqual => Boolean(l == r),
|
||||
t::Less => Boolean(l < r),
|
||||
t::Greater => Boolean(l > r),
|
||||
t::LessEqual => Boolean(l <= r),
|
||||
t::GreaterEqual => Boolean(l >= r),
|
||||
t => {
|
||||
return error()
|
||||
.reason(format!("Binary {t:?} is undefined for integers"));
|
||||
},
|
||||
},
|
||||
(Real(l), Real(r)) => Real(match token {
|
||||
t::Plus => l + r,
|
||||
t::Minus => l - r,
|
||||
t::Star => l * r,
|
||||
t::Slash => l / r,
|
||||
t => {
|
||||
return error()
|
||||
.reason(format!("Binary {t:?} is undefined for reals"));
|
||||
},
|
||||
}),
|
||||
_ => {
|
||||
return error().reason(format!(
|
||||
"Binary {:?} is undefined for {:?} and {:?}",
|
||||
token, left, right
|
||||
));
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn evaluate(&mut self, expr: Expression) -> Result<Value> {
|
||||
use ExpressionKind as e;
|
||||
match expr.kind {
|
||||
e::Integer(i) => Ok(Value::Integer(i)),
|
||||
e::Real(r) => Ok(Value::Real(r)),
|
||||
e::String(s) => Ok(Value::String(s)),
|
||||
e::Boolean(b) => Ok(Value::Boolean(b)),
|
||||
e::Identifier(i) => self.scope.access(i),
|
||||
e::Binary { token, left, right } => {
|
||||
self.evaluate_binary(token, *left, *right)
|
||||
},
|
||||
e::Unary { token, child } => self.evaluate_unary(token, *child),
|
||||
e::Parenthesis(e) => self.evaluate(*e),
|
||||
}
|
||||
.span(&expr.span)
|
||||
}
|
||||
|
||||
pub fn execute(&mut self, statement: Statement) -> Result<()> {
|
||||
use StatementKind as s;
|
||||
match statement.kind {
|
||||
s::Mutable { name, value, .. } => {
|
||||
self.scope.declare(name.clone())?;
|
||||
if let Some(value) = value {
|
||||
let value = self.evaluate(value)?;
|
||||
self.scope.assign(name, value)?;
|
||||
}
|
||||
},
|
||||
s::Immutable { name, value, .. } => {
|
||||
self.scope.declare(name.clone())?;
|
||||
let value = self.evaluate(value)?;
|
||||
self.scope.assign(name, value)?;
|
||||
},
|
||||
s::Assignment { name, value } => {
|
||||
let span = value.span;
|
||||
let value = self.evaluate(value).span(&span)?;
|
||||
self.scope.assign(name, value).span(&span)?;
|
||||
},
|
||||
s::Print(e) => {
|
||||
let e = self.evaluate(e)?;
|
||||
println!("{e:?}");
|
||||
},
|
||||
s::Expression(e) => {
|
||||
self.evaluate(e)?;
|
||||
},
|
||||
s::Block(block) => self.block(block)?,
|
||||
s::If {
|
||||
predicate,
|
||||
block,
|
||||
else_,
|
||||
} => {
|
||||
let span = predicate.span;
|
||||
let value = self.evaluate(predicate)?;
|
||||
if let Value::Boolean(b) = value {
|
||||
if b {
|
||||
self.block(block)?;
|
||||
}
|
||||
} else {
|
||||
return error()
|
||||
.reason("Predicate for 'if' statement must be a boolean")
|
||||
.span(&span);
|
||||
}
|
||||
},
|
||||
s::While { predicate, block } => {
|
||||
let span = predicate.span;
|
||||
loop {
|
||||
match self.evaluate(predicate.clone())? {
|
||||
Value::Boolean(true) => self.block(block.clone())?,
|
||||
Value::Boolean(false) => break,
|
||||
_ => {
|
||||
return error()
|
||||
.reason("Predicate for 'while' statement must be a boolean")
|
||||
.span(&span);
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn block(&mut self, block: Vec<Statement>) -> Result<()> {
|
||||
self.scope.enscope();
|
||||
for s in block.into_iter() {
|
||||
let span = s.span;
|
||||
self.execute(s).span(&span)?;
|
||||
}
|
||||
self.scope.descope();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn run(&mut self) -> Result<()> {
|
||||
loop {
|
||||
let next = match self.iter.next() {
|
||||
Some(n) => n,
|
||||
None => break,
|
||||
};
|
||||
let span = next.span;
|
||||
self.execute(next).span(&span)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue