spring break work
This commit is contained in:
parent
5cef0b4563
commit
dd7995ac31
1073
Cargo.lock
generated
1073
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -7,7 +7,5 @@ edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
html_parser = "0.7"
|
html_parser = "0.7"
|
||||||
minify-html = "0.15"
|
|
||||||
grass = "0.13"
|
|
||||||
toml = "0.8"
|
toml = "0.8"
|
||||||
walkdir = "2.5"
|
walkdir = "2.5"
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
|
<!doctype html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
|
|
||||||
<head>
|
<head>
|
||||||
<link>
|
<link "asdf">
|
||||||
<lg:include rel='css' href="./style.css" />
|
<lg:include rel='css' href="./style.css" />
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
<A href="fdfs"> asdf </A>
|
<A href="fdfs"> asdf </A>
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,7 @@ fn run_compiler() -> Result<()> {
|
||||||
fn main() {
|
fn main() {
|
||||||
let test = include_str!("../simple.html").to_string();
|
let test = include_str!("../simple.html").to_string();
|
||||||
// let test = " <as> </as> ".to_string();
|
// let test = " <as> </as> ".to_string();
|
||||||
let r = parser::parse_html(&test);
|
let r = parser::parse_html(&test).unwrap();
|
||||||
for l in r {
|
for l in r {
|
||||||
println!("{l:?}");
|
println!("{l:?}");
|
||||||
}
|
}
|
||||||
|
|
435
src/parser.rs
435
src/parser.rs
|
@ -1,8 +1,48 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::trace::*;
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum ParseError {
|
||||||
|
InvalidTag,
|
||||||
|
MismatchedClosing { expected: String, found: String },
|
||||||
|
UnmatchedOpen(String),
|
||||||
|
UnmatchedClose(String),
|
||||||
|
VoidClosingTag(String),
|
||||||
|
Unknown,
|
||||||
|
}
|
||||||
|
|
||||||
|
use crate::trace::{self, WithContext};
|
||||||
|
|
||||||
|
impl From<ParseError> for trace::Error {
|
||||||
|
fn from(value: ParseError) -> Self {
|
||||||
|
let msg = match value {
|
||||||
|
ParseError::InvalidTag => "Failed to parse a tag".into(),
|
||||||
|
ParseError::MismatchedClosing { expected, found } => {
|
||||||
|
format!(
|
||||||
|
"Found closing tag '{}' where '{}' was expected",
|
||||||
|
found, expected
|
||||||
|
)
|
||||||
|
},
|
||||||
|
ParseError::UnmatchedOpen(s) => {
|
||||||
|
format!("The tag '{}' is opened, but never closed", s)
|
||||||
|
},
|
||||||
|
ParseError::UnmatchedClose(s) => {
|
||||||
|
format!("The tag '{}' is closed, but never opened", s)
|
||||||
|
},
|
||||||
|
ParseError::VoidClosingTag(s) => {
|
||||||
|
format!("The tag '{}' should not have a closing tag", s)
|
||||||
|
},
|
||||||
|
ParseError::Unknown => {
|
||||||
|
return trace::Error::new(
|
||||||
|
trace::ErrorKind::Unknown,
|
||||||
|
"Unknown error while parsing",
|
||||||
|
)
|
||||||
|
},
|
||||||
|
};
|
||||||
|
trace::Error::new(trace::ErrorKind::Parsing, msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub enum Lexeme<'a> {
|
pub enum Lexeme<'a> {
|
||||||
OpenTag {
|
OpenTag {
|
||||||
name: &'a str,
|
name: &'a str,
|
||||||
|
@ -12,33 +52,82 @@ pub enum Lexeme<'a> {
|
||||||
CloseTag {
|
CloseTag {
|
||||||
name: &'a str,
|
name: &'a str,
|
||||||
},
|
},
|
||||||
Content(&'a str),
|
Text(&'a str),
|
||||||
|
Doctype,
|
||||||
|
Comment,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn normalize_whitespace(s: &str) {
|
fn normalize_whitespace(mut tail: &str) -> String {
|
||||||
// https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace
|
// https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace
|
||||||
todo!()
|
let mut _index = 0;
|
||||||
}
|
let mut buffer = String::with_capacity(tail.len());
|
||||||
|
while !tail.is_empty() {
|
||||||
fn error(message: impl Into<String>) -> Error {
|
match parse_whitespace_min(tail, 1, &mut _index) {
|
||||||
Error {
|
Some((_, new_tail)) => {
|
||||||
kind: ErrorKind::Parsing,
|
buffer.push(' ');
|
||||||
reason: message.into(),
|
tail = new_tail;
|
||||||
backtrace: vec![],
|
},
|
||||||
|
None => {},
|
||||||
|
}
|
||||||
|
let (chars, new_tail) =
|
||||||
|
parse_while(tail, |c| !c.is_whitespace(), &mut _index);
|
||||||
|
buffer.push_str(chars);
|
||||||
|
tail = new_tail
|
||||||
}
|
}
|
||||||
|
buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Try parsing single specific character
|
/// Try parsing single specific character ignoring case
|
||||||
fn parse_char(i: &str, c: char) -> Option<(&str, &str)> {
|
fn parse_char<'a>(
|
||||||
if i.starts_with(c) {
|
tail: &'a str,
|
||||||
Some((&i[0..1], &i[1..]))
|
c: char,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Option<(&'a str, &'a str)> {
|
||||||
|
if !tail.is_empty() && tail[0..1].eq_ignore_ascii_case(&c.to_string()) {
|
||||||
|
*index += 1;
|
||||||
|
Some((&tail[0..1], &tail[1..]))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse until condition is not true for next character
|
fn parse_str<'a>(
|
||||||
fn parse_while(tail: &str, condition: impl Fn(char) -> bool) -> (&str, &str) {
|
tail: &'a str,
|
||||||
|
to_match: &'a str,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Option<(&'a str, &'a str)> {
|
||||||
|
if tail.len() < to_match.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
if tail[0..to_match.len()].eq_ignore_ascii_case(to_match) {
|
||||||
|
*index += to_match.len();
|
||||||
|
Some((&tail[0..to_match.len()], &tail[to_match.len()..]))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_until_str<'a>(
|
||||||
|
tail: &'a str,
|
||||||
|
to_match: &'a str,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Option<(&'a str, &'a str)> {
|
||||||
|
for i in 0..tail.len() {
|
||||||
|
let substr = &tail[0..i];
|
||||||
|
if substr.ends_with(to_match) {
|
||||||
|
*index += i;
|
||||||
|
return Some((&tail[0..i], &tail[i..]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse until condition is not true for next character
|
||||||
|
fn parse_while<'a>(
|
||||||
|
tail: &'a str,
|
||||||
|
condition: impl Fn(char) -> bool,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> (&'a str, &'a str) {
|
||||||
let mut end;
|
let mut end;
|
||||||
let mut it = tail.char_indices();
|
let mut it = tail.char_indices();
|
||||||
'outer: loop {
|
'outer: loop {
|
||||||
|
@ -55,105 +144,166 @@ fn parse_while(tail: &str, condition: impl Fn(char) -> bool) -> (&str, &str) {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
*index += end;
|
||||||
(&tail[0..end], &tail[end..])
|
(&tail[0..end], &tail[end..])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_whitespace(i: &str) -> (&str, &str) {
|
fn parse_whitespace<'a>(i: &'a str, index: &mut usize) -> (&'a str, &'a str) {
|
||||||
parse_while(i, |c| c.is_whitespace())
|
parse_while(i, |c| c.is_whitespace(), index)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_doctype(tail: &str) -> Option<(&str, &str)> {
|
fn parse_whitespace_min<'a>(
|
||||||
const doctype_str = "<!DOCTYPE>"
|
tail: &'a str,
|
||||||
|
min: usize,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Option<(&'a str, &'a str)> {
|
||||||
|
let mut new_index = 0;
|
||||||
|
let (ws, tail) = parse_whitespace(tail, &mut new_index);
|
||||||
|
if ws.len() < min {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
*index += new_index;
|
||||||
|
Some((ws, tail))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Try parsing all characters between two delimiter
|
/// Try parsing all characters between two delimiter
|
||||||
/// characters
|
/// characters
|
||||||
fn parse_delimited(i: &str, delimiter: char) -> Option<(&str, &str)> {
|
fn parse_delimited<'a>(
|
||||||
let (_, tail) = parse_char(i, delimiter)?;
|
i: &'a str,
|
||||||
let (value, tail) = parse_while(tail, |c| c != delimiter);
|
delimiter: char,
|
||||||
let (_, tail) = parse_char(tail, delimiter)?;
|
index: &mut usize,
|
||||||
|
) -> Option<(&'a str, &'a str)> {
|
||||||
|
let mut new_index = 0;
|
||||||
|
let (_, tail) = parse_char(i, delimiter, &mut new_index)?;
|
||||||
|
let (value, tail) = parse_while(tail, |c| c != delimiter, &mut new_index);
|
||||||
|
let (_, tail) = parse_char(tail, delimiter, &mut new_index)?;
|
||||||
|
*index += new_index;
|
||||||
Some((value, tail))
|
Some((value, tail))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_tag_name(i: &str) -> Option<(&str, &str)> {
|
fn parse_tag_name<'a>(
|
||||||
let (value, tail) = parse_while(i, |c| c.is_ascii_alphanumeric() || c == ':');
|
i: &'a str,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Option<(&'a str, &'a str)> {
|
||||||
|
let mut new_index = 0;
|
||||||
|
let (value, tail) = parse_while(
|
||||||
|
i,
|
||||||
|
|c| c.is_ascii_alphanumeric() || [':', '_', '-'].contains(&c),
|
||||||
|
&mut new_index,
|
||||||
|
);
|
||||||
if value.is_empty() {
|
if value.is_empty() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
|
*index += new_index;
|
||||||
Some((value, tail))
|
Some((value, tail))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_attribute_key(i: &str) -> Option<(&str, &str)> {
|
fn parse_attribute_key<'a>(
|
||||||
let (value, tail) = parse_while(i, |c| {
|
i: &'a str,
|
||||||
!(['"', '\'', '>', '/', '='].contains(&c) || c.is_control())
|
index: &mut usize,
|
||||||
});
|
) -> Option<(&'a str, &'a str)> {
|
||||||
|
let mut new_index = 0;
|
||||||
|
let (value, tail) = parse_while(
|
||||||
|
i,
|
||||||
|
|c| {
|
||||||
|
!(['"', '\'', '>', '/', '='].contains(&c)
|
||||||
|
|| c.is_control()
|
||||||
|
|| c.is_whitespace())
|
||||||
|
},
|
||||||
|
&mut new_index,
|
||||||
|
);
|
||||||
if value.is_empty() {
|
if value.is_empty() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
|
*index += new_index;
|
||||||
Some((value, tail))
|
Some((value, tail))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_attribute_val(i: &str) -> Option<(&str, &str)> {
|
fn parse_attribute_val<'a>(
|
||||||
|
i: &'a str,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Option<(&'a str, &'a str)> {
|
||||||
const SINGLE_QUOTE: char = '\'';
|
const SINGLE_QUOTE: char = '\'';
|
||||||
const DOUBLE_QUOTE: char = '"';
|
const DOUBLE_QUOTE: char = '"';
|
||||||
let (value, tail) = parse_delimited(i, '\'') // Single quote delimit
|
let mut new_index = 0;
|
||||||
.or_else(|| parse_delimited(i, '"')) // Double quote delimit
|
let (value, tail) =
|
||||||
|
parse_delimited(i, SINGLE_QUOTE, &mut new_index) // Single quote delimit
|
||||||
|
.or_else(|| parse_delimited(i, DOUBLE_QUOTE, &mut new_index)) // Double quote delimit
|
||||||
.or_else(|| { // Unquoted
|
.or_else(|| { // Unquoted
|
||||||
Some(parse_while(i, |c| {
|
Some(parse_while(i, |c| {
|
||||||
!(c.is_whitespace()
|
!(c.is_whitespace()
|
||||||
|| [SINGLE_QUOTE, DOUBLE_QUOTE, '=', '<', '>', '`'].contains(&c))
|
|| [SINGLE_QUOTE, DOUBLE_QUOTE, '=', '<', '>', '`'].contains(&c))
|
||||||
}))
|
}, &mut new_index))
|
||||||
})?;
|
})?;
|
||||||
if value.is_empty() {
|
*index += new_index;
|
||||||
None
|
Some((value, tail))
|
||||||
} else {
|
|
||||||
Some((value, tail))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns Option<((key, value), tail)>
|
/// Returns Option<((key, value), tail)>
|
||||||
fn parse_key_val(tail: &str) -> Option<((&str, Option<&str>), &str)> {
|
fn parse_key_val<'a>(
|
||||||
|
tail: &'a str,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Option<((&'a str, Option<&'a str>), &'a str)> {
|
||||||
|
let mut new_index = 0;
|
||||||
// Require whitespace
|
// Require whitespace
|
||||||
let (ws, tail) = parse_whitespace(tail);
|
let (_, tail) = parse_whitespace_min(tail, 1, &mut new_index)?;
|
||||||
if ws.is_empty() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
// Fail when no key found
|
// Fail when no key found
|
||||||
let (key, tail) = parse_attribute_key(tail)?;
|
let (key, tail) = parse_attribute_key(tail, &mut new_index)?;
|
||||||
let (_, tail) = parse_whitespace(tail);
|
if let Some((_, tail)) = parse_char(
|
||||||
if let Some((_, tail)) = parse_char(tail, '=') {
|
parse_whitespace(tail, &mut new_index).1,
|
||||||
let (_, tail) = parse_whitespace(tail);
|
'=',
|
||||||
|
&mut new_index,
|
||||||
|
) {
|
||||||
|
let (_, tail) = parse_whitespace(tail, &mut new_index);
|
||||||
// Fail when = is not followed by value
|
// Fail when = is not followed by value
|
||||||
let (val, tail) = parse_attribute_val(tail)?;
|
let (val, tail) = parse_attribute_val(tail, &mut new_index)?;
|
||||||
Some(((key, Some(val)), tail))
|
let val = if val.is_empty() { None } else { Some(val) };
|
||||||
|
*index += new_index;
|
||||||
|
Some(((key, val), tail))
|
||||||
} else {
|
} else {
|
||||||
|
*index += new_index;
|
||||||
Some(((key, None), tail))
|
Some(((key, None), tail))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tags that are implicitly self closing, ending in /> is optional
|
||||||
const VOID_ELEMENTS: [&str; 16] = [
|
const VOID_ELEMENTS: [&str; 16] = [
|
||||||
"area", "base", "br", "col", "command", "embed", "hr", "img", "input",
|
"area", "base", "br", "col", "command", "embed", "hr", "img", "input",
|
||||||
"keygen", "link", "meta", "param", "source", "track", "wbr",
|
"keygen", "link", "meta", "param", "source", "track", "wbr",
|
||||||
];
|
];
|
||||||
|
|
||||||
fn parse_open_tag(tail: &str) -> Option<(Lexeme, &str)> {
|
fn parse_open_tag<'a>(
|
||||||
|
tail: &'a str,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Result<(Lexeme<'a>, &'a str), ParseError> {
|
||||||
|
let mut new_index = 0;
|
||||||
// <
|
// <
|
||||||
let (_, tail) = parse_char(tail, '<')?;
|
let (_, tail) =
|
||||||
|
parse_char(tail, '<', &mut new_index).ok_or(ParseError::Unknown)?;
|
||||||
// tag name
|
// tag name
|
||||||
let (name, mut tail) = parse_tag_name(tail)?;
|
let (name, mut tail) =
|
||||||
|
parse_tag_name(tail, &mut new_index).ok_or(ParseError::InvalidTag)?;
|
||||||
// attributes
|
// attributes
|
||||||
let mut attributes: HashMap<&str, Option<&str>> = HashMap::new();
|
let mut attributes: HashMap<&str, Option<&str>> = HashMap::new();
|
||||||
while let Some((kv, new_tail)) = parse_key_val(tail) {
|
while let Some((kv, new_tail)) = parse_key_val(tail, &mut new_index) {
|
||||||
attributes.insert(kv.0, kv.1);
|
attributes.insert(kv.0, kv.1);
|
||||||
tail = new_tail;
|
tail = new_tail;
|
||||||
}
|
}
|
||||||
let (_, tail) = parse_whitespace(tail);
|
let (_, tail) = parse_whitespace(tail, &mut new_index);
|
||||||
let (is_void, tail) = parse_char(tail, '/').unwrap_or(("", tail));
|
let (is_void, tail) =
|
||||||
|
parse_char(tail, '/', &mut new_index).unwrap_or(("", tail));
|
||||||
let is_void = !is_void.is_empty() || VOID_ELEMENTS.contains(&name);
|
let is_void = !is_void.is_empty() || VOID_ELEMENTS.contains(&name);
|
||||||
let (_, tail) = parse_char(tail, '>')?;
|
let (_, tail) = match parse_char(tail, '>', &mut new_index) {
|
||||||
Some((
|
Some(v) => v,
|
||||||
|
None => {
|
||||||
|
return Err(ParseError::InvalidTag);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
*index += new_index;
|
||||||
|
Ok((
|
||||||
Lexeme::OpenTag {
|
Lexeme::OpenTag {
|
||||||
name,
|
name,
|
||||||
attributes,
|
attributes,
|
||||||
|
@ -163,37 +313,162 @@ fn parse_open_tag(tail: &str) -> Option<(Lexeme, &str)> {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_close_tag(tail: &str) -> Option<(Lexeme, &str)> {
|
fn parse_close_tag<'a>(
|
||||||
let (_, tail) = parse_char(tail, '<')?;
|
tail: &'a str,
|
||||||
let (_, tail) = parse_char(tail, '/')?;
|
index: &mut usize,
|
||||||
let (name, tail) = parse_tag_name(tail)?;
|
) -> Result<(Lexeme<'a>, &'a str), ParseError> {
|
||||||
let (_, tail) = parse_whitespace(tail);
|
let mut new_index = 0;
|
||||||
let (_, tail) = parse_char(tail, '>')?;
|
let (_, tail) =
|
||||||
Some((Lexeme::CloseTag { name }, tail))
|
parse_char(tail, '<', &mut new_index).ok_or(ParseError::Unknown)?;
|
||||||
|
let (_, tail) =
|
||||||
|
parse_char(tail, '/', &mut new_index).ok_or(ParseError::Unknown)?;
|
||||||
|
let (name, tail) =
|
||||||
|
parse_tag_name(tail, &mut new_index).ok_or(ParseError::InvalidTag)?;
|
||||||
|
let (_, tail) = parse_whitespace(tail, &mut new_index);
|
||||||
|
let (_, tail) =
|
||||||
|
parse_char(tail, '>', &mut new_index).ok_or(ParseError::InvalidTag)?;
|
||||||
|
*index += new_index;
|
||||||
|
Ok((Lexeme::CloseTag { name }, tail))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_text(tail: &str) -> Option<(Lexeme, &str)> {
|
fn parse_doctype<'a>(
|
||||||
let (txt, tail) = parse_while(tail, |c| c != '<');
|
tail: &'a str,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Result<(Lexeme<'a>, &'a str), ParseError> {
|
||||||
|
let mut new_index = 0;
|
||||||
|
let mut closure = || -> Option<(&str, &str)> {
|
||||||
|
let (_, tail) = parse_str(tail, "<!doctype", &mut new_index)?;
|
||||||
|
let (_, tail) = parse_whitespace_min(tail, 1, &mut new_index)?;
|
||||||
|
let (_, tail) = parse_str(tail, "html", &mut new_index)?;
|
||||||
|
let (_, tail) = parse_whitespace(tail, &mut new_index);
|
||||||
|
parse_char(tail, '>', &mut new_index)
|
||||||
|
};
|
||||||
|
let (_, tail) = closure().ok_or(ParseError::Unknown)?;
|
||||||
|
*index += new_index;
|
||||||
|
Ok((Lexeme::Doctype, tail))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_comment<'a>(
|
||||||
|
tail: &'a str,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Result<(Lexeme<'a>, &'a str), ParseError> {
|
||||||
|
let mut new_index = 0;
|
||||||
|
let (_, tail) =
|
||||||
|
parse_str(tail, "<!--", &mut new_index).ok_or(ParseError::Unknown)?;
|
||||||
|
let (_, tail) =
|
||||||
|
parse_until_str(tail, "-->", &mut new_index).ok_or(ParseError::Unknown)?;
|
||||||
|
*index += new_index;
|
||||||
|
Ok((Lexeme::Comment, tail))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_text<'a>(
|
||||||
|
tail: &'a str,
|
||||||
|
index: &mut usize,
|
||||||
|
) -> Result<(Lexeme<'a>, &'a str), ParseError> {
|
||||||
|
let mut new_index = 0;
|
||||||
|
let (txt, tail) = parse_while(tail, |c| c != '<', &mut new_index);
|
||||||
if txt.is_empty() {
|
if txt.is_empty() {
|
||||||
None
|
Err(ParseError::Unknown)
|
||||||
} else {
|
} else {
|
||||||
Some((Lexeme::Content(txt), tail))
|
*index += new_index;
|
||||||
|
Ok((Lexeme::Text(txt), tail))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_html(mut tail: &str) -> Vec<Lexeme> {
|
fn or_keep_error<'a>(
|
||||||
let mut stack = vec![];
|
r: Result<(Lexeme<'a>, &'a str), ParseError>,
|
||||||
|
op: impl FnOnce() -> Result<(Lexeme<'a>, &'a str), ParseError>,
|
||||||
|
) -> Result<(Lexeme<'a>, &'a str), ParseError> {
|
||||||
|
match r {
|
||||||
|
Ok(val) => Ok(val),
|
||||||
|
Err(e) => op().map_err(|new_e| match e {
|
||||||
|
ParseError::Unknown => new_e,
|
||||||
|
_ => e,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn index_to_rc(input: &str, index: usize) -> (usize, usize) {
|
||||||
|
let (mut row, mut col) = (1, 1);
|
||||||
|
for c in input[0..index].chars() {
|
||||||
|
if c == '\n' {
|
||||||
|
row += 1;
|
||||||
|
col = 1;
|
||||||
|
} else {
|
||||||
|
col += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(row, col)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_html(input: &str) -> trace::Result<Vec<Lexeme>> {
|
||||||
|
let mut tail = input;
|
||||||
|
let mut lexeme_stack = vec![];
|
||||||
|
let mut validation_stack = vec![];
|
||||||
|
let mut index = 0;
|
||||||
|
|
||||||
|
let err = |error: ParseError, index: usize| -> trace::Result<Vec<Lexeme>> {
|
||||||
|
let (row, col) = index_to_rc(input, index);
|
||||||
|
let e: trace::Error = error.into();
|
||||||
|
Err(e).ctx(format!("Starting at line {} character {}", row, col))
|
||||||
|
};
|
||||||
|
|
||||||
while !tail.is_empty() {
|
while !tail.is_empty() {
|
||||||
let (_, new_tail) = parse_whitespace(tail);
|
let (_, new_tail) = parse_whitespace(tail, &mut index);
|
||||||
if new_tail.is_empty() {
|
if new_tail.is_empty() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (lm, new_tail) = parse_open_tag(new_tail)
|
let result = or_keep_error(parse_open_tag(new_tail, &mut index), || {
|
||||||
.or_else(|| parse_close_tag(new_tail))
|
parse_close_tag(new_tail, &mut index)
|
||||||
.or_else(|| parse_text(new_tail))
|
});
|
||||||
.unwrap();
|
let result = or_keep_error(result, || parse_text(new_tail, &mut index));
|
||||||
stack.push(lm);
|
let result = or_keep_error(result, || parse_comment(new_tail, &mut index));
|
||||||
|
let (lm, new_tail) =
|
||||||
|
match or_keep_error(result, || parse_doctype(new_tail, &mut index)) {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(e) => {
|
||||||
|
return err(e, index);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
// Validate that open and close tags match
|
||||||
|
match lm {
|
||||||
|
Lexeme::OpenTag { name, is_void, .. } => {
|
||||||
|
if !is_void {
|
||||||
|
validation_stack.push(name);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Lexeme::CloseTag { name } => {
|
||||||
|
if VOID_ELEMENTS.contains(&name) {
|
||||||
|
return err(ParseError::VoidClosingTag(name.into()).into(), index);
|
||||||
|
}
|
||||||
|
if let Some(top) = validation_stack.pop() {
|
||||||
|
if name != top {
|
||||||
|
return err(
|
||||||
|
ParseError::MismatchedClosing {
|
||||||
|
expected: top.into(),
|
||||||
|
found: name.into(),
|
||||||
|
},
|
||||||
|
index,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return err(ParseError::UnmatchedClose(name.into()), index);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Lexeme::Comment => {
|
||||||
|
tail = new_tail;
|
||||||
|
continue;
|
||||||
|
},
|
||||||
|
_ => {},
|
||||||
|
};
|
||||||
|
|
||||||
|
lexeme_stack.push(lm);
|
||||||
|
|
||||||
tail = new_tail;
|
tail = new_tail;
|
||||||
}
|
}
|
||||||
stack
|
if let Some(top) = validation_stack.pop() {
|
||||||
|
let e: trace::Error = ParseError::UnmatchedOpen(top.into()).into();
|
||||||
|
return Err(e).ctx("At end of file");
|
||||||
|
}
|
||||||
|
Ok(lexeme_stack)
|
||||||
}
|
}
|
||||||
|
|
30
src/trace.rs
30
src/trace.rs
|
@ -44,15 +44,26 @@ pub enum ErrorKind {
|
||||||
Unknown,
|
Unknown,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for Error {
|
impl Display for ErrorKind {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"{:?} error\nReason:\n\t{}\nBacktrace:\n",
|
"{}",
|
||||||
self.kind, self.reason
|
match self {
|
||||||
)?;
|
ErrorKind::IO => "IO",
|
||||||
|
ErrorKind::Parsing => "PARSING",
|
||||||
|
ErrorKind::Compilation => "COMPILATION",
|
||||||
|
ErrorKind::Unknown => "UNKNOWN",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for Error {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "\n[{} ERROR] {}\nBacktrace:\n", self.kind, self.reason)?;
|
||||||
for s in self.backtrace.iter().rev() {
|
for s in self.backtrace.iter().rev() {
|
||||||
write!(f, "\t{}\n", s)?;
|
write!(f, "{}\n", s)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -129,9 +140,10 @@ where
|
||||||
S: Into<String>,
|
S: Into<String>,
|
||||||
{
|
{
|
||||||
fn ctx(self, s: S) -> Result<T> {
|
fn ctx(self, s: S) -> Result<T> {
|
||||||
match self {
|
self.ok_or_else(|| Error {
|
||||||
Some(v) => Ok(v),
|
kind: ErrorKind::Unknown,
|
||||||
None => Err(Error::new(ErrorKind::Unknown, "Missing expected value")),
|
reason: "Missing expected value".into(),
|
||||||
}
|
backtrace: vec![s.into()],
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,8 +7,7 @@
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
<link rel="stylesheet" href="test_files/main-ad0a5132b4027392.css">
|
<link rel="stylesheet" href="test_files/main-ad0a5132b4027392.css">
|
||||||
|
|
||||||
<link rel="preload" href="https://logan-gatlin.com/web-2abb0afbb41a1a01_bg.wasm" as="fetch" type="application/wasm"
|
<link rel="preload" href="https://logan-gatlin.com/web-2abb0afbb41a1a01_bg.wasm" as="fetch" type="application/wasm" crossorigin="">
|
||||||
crossorigin="">
|
|
||||||
<link rel="modulepreload" href="https://logan-gatlin.com/web-2abb0afbb41a1a01.js">
|
<link rel="modulepreload" href="https://logan-gatlin.com/web-2abb0afbb41a1a01.js">
|
||||||
<link
|
<link
|
||||||
href="data:text/css,%3Ais(%5Bid*%3D'google_ads_iframe'%5D%2C%5Bid*%3D'taboola-'%5D%2C.taboolaHeight%2C.taboola-placeholder%2C%23credential_picker_container%2C%23credentials-picker-container%2C%23credential_picker_iframe%2C%5Bid*%3D'google-one-tap-iframe'%5D%2C%23google-one-tap-popup-container%2C.google-one-tap-modal-div%2C%23amp_floatingAdDiv%2C%23ez-content-blocker-container)%20%7Bdisplay%3Anone!important%3Bmin-height%3A0!important%3Bheight%3A0!important%3B%7D"
|
href="data:text/css,%3Ais(%5Bid*%3D'google_ads_iframe'%5D%2C%5Bid*%3D'taboola-'%5D%2C.taboolaHeight%2C.taboola-placeholder%2C%23credential_picker_container%2C%23credentials-picker-container%2C%23credential_picker_iframe%2C%5Bid*%3D'google-one-tap-iframe'%5D%2C%23google-one-tap-popup-container%2C.google-one-tap-modal-div%2C%23amp_floatingAdDiv%2C%23ez-content-blocker-container)%20%7Bdisplay%3Anone!important%3Bmin-height%3A0!important%3Bheight%3A0!important%3B%7D"
|
||||||
|
|
Loading…
Reference in a new issue