Tokens finished

This commit is contained in:
voidNUL 2024-07-25 23:54:06 -05:00
parent 242584f470
commit 95fee4dd13

View file

@ -4,20 +4,39 @@
// Implicit tag // Implicit tag
// h1 a#id."a b c" href="/" {"foo"}; // h1 a#id."a b c" href="/" {"foo"};
type Offset = usize; mod lex;
type Parse<'a> = (&'a str, &'a str, Offset);
type MaybeParse<'a> = Option<Parse<'a>>;
fn parse_until(i: &str, condition: impl Fn(char) -> bool) -> Parse { type Offset = usize;
type Parse<'a, T> = (T, &'a str, Offset);
type MaybeParse<'a, T> = Option<Parse<'a, T>>;
#[derive(Debug, Clone)]
struct Tag<'a> {
name: &'a str,
id: &'a str,
classes: &'a str
}
impl Tag<'_> {
fn write_open(&self, out: &mut String) {
out.push_str(&format!("<{} id={} class={}>", self.name, self.id, self.classes));
}
fn write_close(&self, out: &mut String) {
out.push_str(&format!("</{}>", self.name));
}
}
fn parse_until(i: &str, condition: impl Fn(char) -> bool) -> Parse<&str> {
i.chars() i.chars()
.position(condition) .position(condition)
.map(|pos| (&i[..pos], &i[pos..], pos)) .map(|pos| (&i[..pos], &i[pos..], pos))
.unwrap_or((&i, "", i.len())) .unwrap_or((&i, "", i.len()))
} }
fn parse_ws(i: &str) -> Parse { parse_until(i, |c| !c.is_whitespace()) } fn parse_ws(i: &str) -> Parse<&str> { parse_until(i, |c| !c.is_whitespace()) }
fn parse_char(i: &str, matches: char) -> MaybeParse { fn parse_char(i: &str, matches: char) -> MaybeParse<&str> {
if let Some(c) = i.chars().next() { if let Some(c) = i.chars().next() {
if c == matches { if c == matches {
return Some((&i[0..1], &i[1..], 1)); return Some((&i[0..1], &i[1..], 1));
@ -26,7 +45,7 @@ fn parse_char(i: &str, matches: char) -> MaybeParse {
None None
} }
fn not_empty(parse: Parse) -> MaybeParse { fn not_empty<'a>(parse: Parse<'a, &'a str>) -> MaybeParse<&'a str> {
if !parse.0.is_empty() { if !parse.0.is_empty() {
Some(parse) Some(parse)
} else { } else {
@ -34,7 +53,7 @@ fn not_empty(parse: Parse) -> MaybeParse {
} }
} }
fn parse_string(i: &str) -> MaybeParse { fn parse_string(i: &str) -> MaybeParse<&str> {
let mut skip = false; let mut skip = false;
let (_, i, o1) = parse_char(i, '"')?; let (_, i, o1) = parse_char(i, '"')?;
let mut o2 = 0; let mut o2 = 0;
@ -45,20 +64,21 @@ fn parse_string(i: &str) -> MaybeParse {
skip = true; skip = true;
} else if c == '"' { } else if c == '"' {
let off = o1 + o2 - 1; let off = o1 + o2 - 1;
return Some((&i[..off], &i[off..], off)); return Some((&i[..off], &i[off+1..], off+2));
} }
o2 += 1; o2 += 1;
} }
None None
} }
fn parse_name(i: &str) -> MaybeParse {
let (name, i, o1) = not_empty(parse_until(i, |c| c.is_whitespace()))?; fn parse_name(i: &str) -> MaybeParse<&str> {
let (name, i, o1) = not_empty(parse_until(i, |c| !(c != '_') || !(c != '-') || !c.is_alphabetic()))?;
let (_, i, o2) = parse_ws(i); let (_, i, o2) = parse_ws(i);
Some((name, i, o1 + o2)) Some((name, i, o1 + o2))
} }
fn parse_id(i: &str) -> MaybeParse { fn parse_id(i: &str) -> MaybeParse<&str> {
let (_, i, o1) = parse_char(i, '#')?; let (_, i, o1) = parse_char(i, '#')?;
let (_, i, o2) = parse_ws(i); let (_, i, o2) = parse_ws(i);
let (id, i, o3) = parse_name(i)?; let (id, i, o3) = parse_name(i)?;
@ -66,7 +86,7 @@ fn parse_id(i: &str) -> MaybeParse {
Some((id, i, o1 + o2 + o3 + o4)) Some((id, i, o1 + o2 + o3 + o4))
} }
fn parse_classes(i: &str) -> MaybeParse { fn parse_classes(i: &str) -> MaybeParse<&str> {
let (_, i, o1) = parse_char(i, '.')?; let (_, i, o1) = parse_char(i, '.')?;
let (_, i, o2) = parse_ws(i); let (_, i, o2) = parse_ws(i);
let (classes, i, o3) = parse_string(i)?; let (classes, i, o3) = parse_string(i)?;
@ -74,21 +94,33 @@ fn parse_classes(i: &str) -> MaybeParse {
Some((classes, i, o1 + o2 + o3 + o4)) Some((classes, i, o1 + o2 + o3 + o4))
} }
fn parse_tag(i: &str) -> Option<()> { enum TokenType {
let (name, i, o1) = parse_name(i)?; String,
let (id, i, o2) = parse_id(i).unwrap_or(("", i, 0)); Name,
let (classes, i, o3) = parse_classes(i).unwrap_or(("", i, 0)); Id,
Some(()) Classes,
Equals,
Close
} }
fn parse(i: &str) { struct Token<'a> {
let mut explicit_stack: Vec<&str> = vec![]; tty: TokenType,
let mut inline_stack: Vec<&str> = vec![]; slice: &'a str,
let mut out = String::new(); offset: usize
let mut off = 0;
} }
fn tokenize(i: &str) -> Vec<Token> {
let mut tokens = vec![];
let mut off = 0;
while !i.is_empty() {
}
tokens
}
fn main() { fn main() {
let p = parse_classes(".\"a b c\""); let (_, i, o1) = parse_string("\"asdf\"\"asdf\"").unwrap();
println!("{:?}", p); let (_, i, o2) = parse_string(i).unwrap();
println!("{i} {:?}", o1+o2);
} }