From b55135245d11d7a67080f0a69e08a56edb51349e Mon Sep 17 00:00:00 2001
From: Logan <logan@gatlintc.com>
Date: Mon, 15 Jul 2024 03:22:56 -0500
Subject: [PATCH] tokens

---
 .gitignore   |   1 +
 Cargo.lock   |   7 ++
 Cargo.toml   |   6 ++
 src/main.rs  |  18 +++++
 src/token.rs | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 242 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Cargo.lock
 create mode 100644 Cargo.toml
 create mode 100644 src/main.rs
 create mode 100644 src/token.rs
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..afaae83
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,7 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "lang"
+version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..d6cff4b
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,6 @@
+[package]
+name = "lang"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..762f6c6
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,18 @@
+mod token;
+
+fn main() {
+  repl();
+}
+
+pub fn repl() {
+  let mut buffer = String::new();
+  let stdin = std::io::stdin();
+  loop {
+    stdin.read_line(&mut buffer).unwrap();
+    let tokens = token::tokenize(&buffer);
+    for tok in tokens {
+      println!("{} : {:?}", &buffer[tok.start..tok.end], tok.ttype);
+    }
+    buffer = String::new();
+  }
+}
diff --git a/src/token.rs b/src/token.rs
new file mode 100644
index 0000000..189641a
--- /dev/null
+++ b/src/token.rs
@@ -0,0 +1,210 @@
+#[derive(Debug, Clone)]
+pub enum TokenType {
+  // Symbols
+  LeftParen,
+  RightParen,
+  LeftSquare,
+  RightSquare,
+  LeftBrace,
+  RightBrace,
+  Comma,
+  Dot,
+  Plus,
+  Minus,
+  Star,
+  Slash,
+  Semicolon,
+
+  Bang,
+  BangEqual,
+  Equal,
+  DoubleEqual,
+  Greater,
+  GreaterEqual,
+  Less,
+  LessEqual,
+
+  // Literals
+  String,
+  Character,
+  Number(f64),
+
+  // Words
+  Ident,
+  And,
+  Or,
+  Self_,
+  Struct,
+  True,
+  False,
+  Fn,
+  If,
+  Else,
+  Nil,
+  Print,
+  Return,
+  Super,
+  Let,
+  While,
+  For,
+
+  // Special
+  Unrecognized,
+  TooLong,
+}
+
+/// Type, index
+#[derive(Debug, Clone)]
+pub struct Token {
+  pub ttype: TokenType,
+  pub start: usize,
+  pub end: usize,
+}
+
+pub fn tokenize(input: &str) -> Vec<Token> {
+  let input_str = input;
+  let mut input = input.char_indices().peekable();
+  let mut tokens = vec![];
+  'outer: loop {
+    // Find next non-whitespace line
+    let (start, c) = 'ws: loop {
+      match input.next() {
+        // Stop at end of input
+        None => break 'outer,
+        Some((index, character)) if !character.is_whitespace() => {
+          break 'ws (index, character)
+        },
+        _ => {},
+      }
+    };
+    let mut end = start + 1;
+    let mut advance = || {};
+    let ttype = match c {
+      // Match single character tokens
+      '(' => TokenType::LeftParen,
+      ')' => TokenType::RightParen,
+      '[' => TokenType::LeftSquare,
+      ']' => TokenType::RightSquare,
+      '{' => TokenType::LeftBrace,
+      '}' => TokenType::RightBrace,
+      ',' => TokenType::Comma,
+      '.' => TokenType::Dot,
+      '+' => TokenType::Plus,
+      '-' => TokenType::Minus,
+      '*' => TokenType::Star,
+      '/' => TokenType::Slash,
+      ';' => TokenType::Semicolon,
+      // Match multicharacter tokens
+      '!' => match input.peek() {
+        Some((_, '=')) => {
+          input.next();
+          end += 1;
+          TokenType::BangEqual
+        },
+        _ => TokenType::Bang,
+      },
+      '=' => match input.peek() {
+        Some((_, '=')) => {
+          input.next();
+          end += 1;
+          TokenType::DoubleEqual
+        },
+        _ => TokenType::Equal,
+      },
+      '<' => match input.peek() {
+        Some((_, '=')) => {
+          input.next();
+          end += 1;
+          TokenType::GreaterEqual
+        },
+        _ => TokenType::Greater,
+      },
+      '>' => match input.peek() {
+        Some((_, '=')) => {
+          input.next();
+          end += 1;
+          TokenType::LessEqual
+        },
+        _ => TokenType::Less,
+      },
+      // Match keywords, identifiers, and literals
+      c if c.is_alphanumeric() => 'case: {
+        // Scan full word
+        while let Some((new_end, next)) = input.peek() {
+          if next.is_alphanumeric() || *next == '_' {
+            let _ = input.next();
+          } else {
+            end = *new_end;
+            break;
+          }
+        }
+        let word = &input_str[start..end];
+        // Attempt to parse hex literal
+        if let Some(s) =
+          word.strip_prefix("0x").or_else(|| word.strip_prefix("0X"))
+        {
+          if let Ok(n) = u64::from_str_radix(s, 16) {
+            break 'case TokenType::Number(n as f64);
+          } else {
+            break 'case TokenType::Unrecognized;
+          }
+        }
+        // Attempt to parse binary literal
+        if let Some(s) =
+          word.strip_prefix("0b").or_else(|| word.strip_prefix("0B"))
+        {
+          if let Ok(n) = u64::from_str_radix(s, 2) {
+            break 'case TokenType::Number(n as f64);
+          } else {
+            break 'case TokenType::Unrecognized;
+          }
+        }
+        // Attempt to parse decimal literal
+        if let Ok(f) = word.parse::<f64>() {
+          break 'case TokenType::Number(f);
+        }
+        // Parse keyword or ident
+        match word {
+          "and" => TokenType::And,
+          "or" => TokenType::Or,
+          "self" => TokenType::Self_,
+          "struct" => TokenType::Struct,
+          "true" => TokenType::True,
+          "false" => TokenType::False,
+          "fn" => TokenType::Fn,
+          "if" => TokenType::If,
+          "else" => TokenType::Else,
+          "nil" => TokenType::Nil,
+          "print" => TokenType::Print,
+          "return" => TokenType::Return,
+          "super" => TokenType::Super,
+          "let" => TokenType::Let,
+          "while" => TokenType::While,
+          "for" => TokenType::For,
+          _ => TokenType::Ident,
+        }
+      },
+      // Parse string
+      '"' => {
+        while let Some((new_end, next)) = input.next() {
+          match next {
+            '"' => {
+              end = new_end + 1;
+              break;
+            },
+            // Skip escapes and deal with them later
+            '\\' => {
+              let _ = input.next();
+            },
+            _ => {},
+          }
+        }
+        TokenType::String
+      },
+      // Parse character
+      _ => TokenType::Unrecognized,
+    };
+    tokens.push(Token { ttype, start, end });
+  }
+  tokens
+}