From dd7995ac31eb41333a7ecd910f01e8b156866b00 Mon Sep 17 00:00:00 2001 From: voidNUL <50534996+Xterminate1818@users.noreply.github.com> Date: Sat, 16 Mar 2024 18:27:14 -0500 Subject: [PATCH] spring break work --- Cargo.lock | 1073 +------------------------------------------------ Cargo.toml | 2 - simple.html | 5 +- src/main.rs | 2 +- src/parser.rs | 435 ++++++++++++++++---- src/trace.rs | 30 +- test.html | 3 +- 7 files changed, 385 insertions(+), 1165 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8ae69e7..6598cec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,126 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "ahash" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" -dependencies = [ - "getrandom", - "once_cell", - "version_check", -] - -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "getrandom", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "aho-corasick" -version = "0.7.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" -dependencies = [ - "memchr", -] - -[[package]] -name = "anstream" -version = "0.6.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "utf8parse", -] - -[[package]] -name = "anstyle" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" - -[[package]] -name = "anstyle-parse" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "anstyle-wincon" -version = "3.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" -dependencies = [ - "anstyle", - "windows-sys", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "base64-simd" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "781dd20c3aff0bd194fe7d2a977dd92f21c173891f3a03b677359e5fa457e5d5" -dependencies = [ - "simd-abstraction", -] - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" - -[[package]] -name = "bitvec" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] - [[package]] name = "block-buffer" version = "0.10.4" @@ -131,111 +11,12 @@ dependencies = [ "generic-array", ] -[[package]] -name = "bumpalo" -version = "3.15.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" - -[[package]] -name = "bytecheck" -version = "0.6.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" -dependencies = [ - "bytecheck_derive", - "ptr_meta", - "simdutf8", -] - -[[package]] -name = "bytecheck_derive" -version = "0.6.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "bytes" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" - [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "clap" -version = "4.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" -dependencies = [ - "anstream", - "anstyle", - "clap_lex", - "strsim", -] - -[[package]] -name = "clap_lex" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" - -[[package]] -name = "codemap" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e769b5c8c8283982a987c6e948e540254f1058d5a74b8794914d4ef5fc2a24" - -[[package]] -name = "colorchoice" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" - -[[package]] -name = "const-str" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21077772762a1002bb421c3af42ac1725fa56066bfc53d9a55bb79905df2aaf3" -dependencies = [ - "const-str-proc-macro", -] - -[[package]] -name = "const-str-proc-macro" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e1e0fdd2e5d3041e530e1b21158aeeef8b5d0e306bc5c1e3d6cf0930d10e25a" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "cpufeatures" version = "0.2.12" @@ -245,31 +26,6 @@ dependencies = [ "libc", ] -[[package]] -name = "crossbeam-deque" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" - [[package]] name = "crypto-common" version = "0.1.6" @@ -280,66 +36,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "cssparser" -version = "0.33.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9be934d936a0fbed5bcdc01042b770de1398bf79d0e192f49fa7faea0e99281e" -dependencies = [ - "cssparser-macros", - "dtoa-short", - "itoa", - "phf 0.11.2", - "smallvec", -] - -[[package]] -name = "cssparser-color" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "556c099a61d85989d7af52b692e35a8d68a57e7df8c6d07563dc0778b3960c9f" -dependencies = [ - "cssparser", -] - -[[package]] -name = "cssparser-macros" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" -dependencies = [ - "quote", - "syn 2.0.51", -] - -[[package]] -name = "dashmap" -version = "5.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" -dependencies = [ - "cfg-if", - "hashbrown 0.14.3", - "lock_api", - "once_cell", - "parking_lot_core", -] - -[[package]] -name = "data-encoding" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" - -[[package]] -name = "data-url" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a30bfce702bcfa94e906ef82421f2c0e61c076ad76030c16ee5d2e9a32fe193" -dependencies = [ - "matches", -] - [[package]] name = "digest" version = "0.10.7" @@ -356,48 +52,12 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" -[[package]] -name = "dtoa" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" - -[[package]] -name = "dtoa-short" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbaceec3c6e4211c79e7b1800fb9680527106beb2f9c51904a3210c03a448c74" -dependencies = [ - "dtoa", -] - -[[package]] -name = "either" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" - [[package]] name = "equivalent" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -408,60 +68,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "getrandom" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "grass" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b89786a806d5b192cf4e573f9831c847a455a142d000c922bdfc1e5edad14303" -dependencies = [ - "clap", - "grass_compiler", -] - -[[package]] -name = "grass_compiler" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cf7d155dd7cef20195016d01005033a5521aad307033f0f8e8bf0a02f5f7554" -dependencies = [ - "codemap", - "indexmap", - "lasso", - "once_cell", - "phf 0.11.2", - "rand", -] - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash 0.7.8", -] - -[[package]] -name = "hashbrown" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" -dependencies = [ - "ahash 0.8.11", - "bumpalo", -] - [[package]] name = "hashbrown" version = "0.14.3" @@ -472,9 +78,7 @@ checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" name = "html" version = "0.1.0" dependencies = [ - "grass", "html_parser", - "minify-html", "toml", "walkdir", ] @@ -501,25 +105,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" dependencies = [ "equivalent", - "hashbrown 0.14.3", -] - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", + "hashbrown", ] [[package]] @@ -528,201 +114,24 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" -[[package]] -name = "lasso" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2" -dependencies = [ - "hashbrown 0.13.2", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - [[package]] name = "libc" version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" -[[package]] -name = "lightningcss" -version = "1.0.0-alpha.54" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07d306844e5af1753490c420c0d6ae3d814b00725092d106332762827ca8f0fe" -dependencies = [ - "ahash 0.8.11", - "bitflags 2.4.2", - "const-str", - "cssparser", - "cssparser-color", - "dashmap", - "data-encoding", - "getrandom", - "itertools 0.10.5", - "lazy_static", - "parcel_selectors", - "parcel_sourcemap", - "paste", - "pathdiff", - "rayon", - "serde", - "smallvec", -] - -[[package]] -name = "lock_api" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" - -[[package]] -name = "matches" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" - [[package]] name = "memchr" version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" -[[package]] -name = "minify-html" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd4517942a8e7425c990b14977f86a63e4996eed7b15cfcca1540126ac5ff25" -dependencies = [ - "aho-corasick", - "lazy_static", - "lightningcss", - "memchr", - "minify-html-common", - "minify-js", - "once_cell", - "rustc-hash", -] - -[[package]] -name = "minify-html-common" -version = "0.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697a6b40dffdc5de10c0cbd709dc2bc2039cea9dab8aaa636eb9a49d6b411780" -dependencies = [ - "aho-corasick", - "itertools 0.12.1", - "lazy_static", - "memchr", - "rustc-hash", - "serde", - "serde_json", -] - -[[package]] -name = "minify-js" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22d6c512a82abddbbc13b70609cb2beff01be2c7afff534d6e5e1c85e438fc8b" -dependencies = [ - "lazy_static", - "parse-js", -] - [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "outref" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f222829ae9293e33a9f5e9f440c6760a3d450a64affe1846486b140db81c1f4" - -[[package]] -name = "parcel_selectors" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d74befe2d076330d9a58bf9ca2da424568724ab278adf15fb5718253133887" -dependencies = [ - "bitflags 2.4.2", - "cssparser", - "fxhash", - "log", - "phf 0.10.1", - "phf_codegen", - "precomputed-hash", - "smallvec", -] - -[[package]] -name = "parcel_sourcemap" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "485b74d7218068b2b7c0e3ff12fbc61ae11d57cb5d8224f525bd304c6be05bbb" -dependencies = [ - "base64-simd", - "data-url", - "rkyv", - "serde", - "serde_json", - "vlq", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.48.5", -] - -[[package]] -name = "parse-js" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ec3b11d443640ec35165ee8f6f0559f1c6f41878d70330fe9187012b5935f02" -dependencies = [ - "aho-corasick", - "bumpalo", - "hashbrown 0.13.2", - "lazy_static", - "memchr", -] - -[[package]] -name = "paste" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" - -[[package]] -name = "pathdiff" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" - [[package]] name = "pest" version = "2.7.7" @@ -754,7 +163,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.51", + "syn", ] [[package]] @@ -768,98 +177,6 @@ dependencies = [ "sha2", ] -[[package]] -name = "phf" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" -dependencies = [ - "phf_shared 0.10.0", -] - -[[package]] -name = "phf" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" -dependencies = [ - "phf_macros", - "phf_shared 0.11.2", -] - -[[package]] -name = "phf_codegen" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" -dependencies = [ - "phf_generator 0.10.0", - "phf_shared 0.10.0", -] - -[[package]] -name = "phf_generator" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" -dependencies = [ - "phf_shared 0.10.0", - "rand", -] - -[[package]] -name = "phf_generator" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" -dependencies = [ - "phf_shared 0.11.2", - "rand", -] - -[[package]] -name = "phf_macros" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" -dependencies = [ - "phf_generator 0.11.2", - "phf_shared 0.11.2", - "proc-macro2", - "quote", - "syn 2.0.51", -] - -[[package]] -name = "phf_shared" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" -dependencies = [ - "siphasher", -] - -[[package]] -name = "phf_shared" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" -dependencies = [ - "siphasher", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "precomputed-hash" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" - [[package]] name = "proc-macro2" version = "1.0.78" @@ -869,26 +186,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "ptr_meta" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" -dependencies = [ - "ptr_meta_derive", -] - -[[package]] -name = "ptr_meta_derive" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "quote" version = "1.0.35" @@ -898,115 +195,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rayon" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "rend" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" -dependencies = [ - "bytecheck", -] - -[[package]] -name = "rkyv" -version = "0.7.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cba464629b3394fc4dbc6f940ff8f5b4ff5c7aef40f29166fd4ad12acbc99c0" -dependencies = [ - "bitvec", - "bytecheck", - "bytes", - "hashbrown 0.12.3", - "ptr_meta", - "rend", - "rkyv_derive", - "seahash", - "tinyvec", - "uuid", -] - -[[package]] -name = "rkyv_derive" -version = "0.7.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7dddfff8de25e6f62b9d64e6e432bf1c6736c57d20323e15ee10435fbda7c65" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "ryu" version = "1.0.17" @@ -1022,18 +210,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "seahash" -version = "4.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" - [[package]] name = "serde" version = "1.0.197" @@ -1051,7 +227,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn", ] [[package]] @@ -1085,50 +261,6 @@ dependencies = [ "digest", ] -[[package]] -name = "simd-abstraction" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cadb29c57caadc51ff8346233b5cec1d240b68ce55cf1afc764818791876987" -dependencies = [ - "outref", -] - -[[package]] -name = "simdutf8" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" - -[[package]] -name = "siphasher" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" - -[[package]] -name = "smallvec" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" - -[[package]] -name = "strsim" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.51" @@ -1140,12 +272,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - [[package]] name = "thiserror" version = "1.0.57" @@ -1163,24 +289,9 @@ checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn", ] -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "toml" version = "0.8.10" @@ -1233,30 +344,12 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "utf8parse" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" - -[[package]] -name = "uuid" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" - [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "vlq" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65dd7eed29412da847b0f78bcec0ac98588165988a8cfe41d4ea1d429f8ccfff" - [[package]] name = "walkdir" version = "2.5.0" @@ -1267,12 +360,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - [[package]] name = "winapi" version = "0.3.9" @@ -1304,129 +391,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets 0.52.4", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - -[[package]] -name = "windows-targets" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" -dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" - [[package]] name = "winnow" version = "0.6.5" @@ -1435,32 +399,3 @@ checksum = "dffa400e67ed5a4dd237983829e66475f0a4a26938c4b04c21baede6262215b8" dependencies = [ "memchr", ] - -[[package]] -name = "wyz" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" -dependencies = [ - "tap", -] - -[[package]] -name = "zerocopy" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.51", -] diff --git a/Cargo.toml b/Cargo.toml index 848da67..9b06940 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,5 @@ edition = "2021" [dependencies] html_parser = "0.7" -minify-html = "0.15" -grass = "0.13" toml = "0.8" walkdir = "2.5" diff --git a/simple.html b/simple.html index dd0b7d8..19ddcad 100644 --- a/simple.html +++ b/simple.html @@ -1,12 +1,13 @@ + - + asdf - + diff --git a/src/main.rs b/src/main.rs index f4ffdb0..4c1a85f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -67,7 +67,7 @@ fn run_compiler() -> Result<()> { fn main() { let test = include_str!("../simple.html").to_string(); // let test = " ".to_string(); - let r = parser::parse_html(&test); + let r = parser::parse_html(&test).unwrap(); for l in r { println!("{l:?}"); } diff --git a/src/parser.rs b/src/parser.rs index 018ce26..8798374 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,8 +1,48 @@ use std::collections::HashMap; -use crate::trace::*; - #[derive(Clone, Debug)] +pub enum ParseError { + InvalidTag, + MismatchedClosing { expected: String, found: String }, + UnmatchedOpen(String), + UnmatchedClose(String), + VoidClosingTag(String), + Unknown, +} + +use crate::trace::{self, WithContext}; + +impl From for trace::Error { + fn from(value: ParseError) -> Self { + let msg = match value { + ParseError::InvalidTag => "Failed to parse a tag".into(), + ParseError::MismatchedClosing { expected, found } => { + format!( + "Found closing tag '{}' where '{}' was expected", + found, expected + ) + }, + ParseError::UnmatchedOpen(s) => { + format!("The tag '{}' is opened, but never closed", s) + }, + ParseError::UnmatchedClose(s) => { + format!("The tag '{}' is closed, but never opened", s) + }, + ParseError::VoidClosingTag(s) => { + format!("The tag '{}' should not have a closing tag", s) + }, + ParseError::Unknown => { + return trace::Error::new( + trace::ErrorKind::Unknown, + "Unknown error while parsing", + ) + }, + }; + trace::Error::new(trace::ErrorKind::Parsing, msg) + } +} + +#[derive(Clone, Debug, PartialEq)] pub enum Lexeme<'a> { OpenTag { name: &'a str, @@ -12,33 +52,82 @@ pub enum Lexeme<'a> { CloseTag { name: &'a str, }, - Content(&'a str), + Text(&'a str), + Doctype, + Comment, } -fn normalize_whitespace(s: &str) { +fn normalize_whitespace(mut tail: &str) -> String { // https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace - todo!() -} - -fn error(message: impl Into) -> Error { - Error { - kind: ErrorKind::Parsing, - reason: message.into(), - backtrace: vec![], + let mut _index = 0; + let mut buffer = String::with_capacity(tail.len()); + while !tail.is_empty() { + match parse_whitespace_min(tail, 1, &mut _index) { + Some((_, new_tail)) => { + buffer.push(' '); + tail = new_tail; + }, + None => {}, + } + let (chars, new_tail) = + parse_while(tail, |c| !c.is_whitespace(), &mut _index); + buffer.push_str(chars); + tail = new_tail } + buffer } -/// Try parsing single specific character -fn parse_char(i: &str, c: char) -> Option<(&str, &str)> { - if i.starts_with(c) { - Some((&i[0..1], &i[1..])) +/// Try parsing single specific character ignoring case +fn parse_char<'a>( + tail: &'a str, + c: char, + index: &mut usize, +) -> Option<(&'a str, &'a str)> { + if !tail.is_empty() && tail[0..1].eq_ignore_ascii_case(&c.to_string()) { + *index += 1; + Some((&tail[0..1], &tail[1..])) } else { None } } -// Parse until condition is not true for next character -fn parse_while(tail: &str, condition: impl Fn(char) -> bool) -> (&str, &str) { +fn parse_str<'a>( + tail: &'a str, + to_match: &'a str, + index: &mut usize, +) -> Option<(&'a str, &'a str)> { + if tail.len() < to_match.len() { + return None; + } + if tail[0..to_match.len()].eq_ignore_ascii_case(to_match) { + *index += to_match.len(); + Some((&tail[0..to_match.len()], &tail[to_match.len()..])) + } else { + None + } +} + +fn parse_until_str<'a>( + tail: &'a str, + to_match: &'a str, + index: &mut usize, +) -> Option<(&'a str, &'a str)> { + for i in 0..tail.len() { + let substr = &tail[0..i]; + if substr.ends_with(to_match) { + *index += i; + return Some((&tail[0..i], &tail[i..])); + } + } + None +} + +/// Parse until condition is not true for next character +fn parse_while<'a>( + tail: &'a str, + condition: impl Fn(char) -> bool, + index: &mut usize, +) -> (&'a str, &'a str) { let mut end; let mut it = tail.char_indices(); 'outer: loop { @@ -55,105 +144,166 @@ fn parse_while(tail: &str, condition: impl Fn(char) -> bool) -> (&str, &str) { }, }; } + *index += end; (&tail[0..end], &tail[end..]) } -fn parse_whitespace(i: &str) -> (&str, &str) { - parse_while(i, |c| c.is_whitespace()) +fn parse_whitespace<'a>(i: &'a str, index: &mut usize) -> (&'a str, &'a str) { + parse_while(i, |c| c.is_whitespace(), index) } -fn parse_doctype(tail: &str) -> Option<(&str, &str)> { - const doctype_str = "" +fn parse_whitespace_min<'a>( + tail: &'a str, + min: usize, + index: &mut usize, +) -> Option<(&'a str, &'a str)> { + let mut new_index = 0; + let (ws, tail) = parse_whitespace(tail, &mut new_index); + if ws.len() < min { + None + } else { + *index += new_index; + Some((ws, tail)) + } } /// Try parsing all characters between two delimiter /// characters -fn parse_delimited(i: &str, delimiter: char) -> Option<(&str, &str)> { - let (_, tail) = parse_char(i, delimiter)?; - let (value, tail) = parse_while(tail, |c| c != delimiter); - let (_, tail) = parse_char(tail, delimiter)?; +fn parse_delimited<'a>( + i: &'a str, + delimiter: char, + index: &mut usize, +) -> Option<(&'a str, &'a str)> { + let mut new_index = 0; + let (_, tail) = parse_char(i, delimiter, &mut new_index)?; + let (value, tail) = parse_while(tail, |c| c != delimiter, &mut new_index); + let (_, tail) = parse_char(tail, delimiter, &mut new_index)?; + *index += new_index; Some((value, tail)) } -fn parse_tag_name(i: &str) -> Option<(&str, &str)> { - let (value, tail) = parse_while(i, |c| c.is_ascii_alphanumeric() || c == ':'); +fn parse_tag_name<'a>( + i: &'a str, + index: &mut usize, +) -> Option<(&'a str, &'a str)> { + let mut new_index = 0; + let (value, tail) = parse_while( + i, + |c| c.is_ascii_alphanumeric() || [':', '_', '-'].contains(&c), + &mut new_index, + ); if value.is_empty() { None } else { + *index += new_index; Some((value, tail)) } } -fn parse_attribute_key(i: &str) -> Option<(&str, &str)> { - let (value, tail) = parse_while(i, |c| { - !(['"', '\'', '>', '/', '='].contains(&c) || c.is_control()) - }); +fn parse_attribute_key<'a>( + i: &'a str, + index: &mut usize, +) -> Option<(&'a str, &'a str)> { + let mut new_index = 0; + let (value, tail) = parse_while( + i, + |c| { + !(['"', '\'', '>', '/', '='].contains(&c) + || c.is_control() + || c.is_whitespace()) + }, + &mut new_index, + ); if value.is_empty() { None } else { + *index += new_index; Some((value, tail)) } } -fn parse_attribute_val(i: &str) -> Option<(&str, &str)> { +fn parse_attribute_val<'a>( + i: &'a str, + index: &mut usize, +) -> Option<(&'a str, &'a str)> { const SINGLE_QUOTE: char = '\''; const DOUBLE_QUOTE: char = '"'; - let (value, tail) = parse_delimited(i, '\'') // Single quote delimit - .or_else(|| parse_delimited(i, '"')) // Double quote delimit + let mut new_index = 0; + let (value, tail) = + parse_delimited(i, SINGLE_QUOTE, &mut new_index) // Single quote delimit + .or_else(|| parse_delimited(i, DOUBLE_QUOTE, &mut new_index)) // Double quote delimit .or_else(|| { // Unquoted Some(parse_while(i, |c| { !(c.is_whitespace() || [SINGLE_QUOTE, DOUBLE_QUOTE, '=', '<', '>', '`'].contains(&c)) - })) + }, &mut new_index)) })?; - if value.is_empty() { - None - } else { - Some((value, tail)) - } + *index += new_index; + Some((value, tail)) } /// Returns Option<((key, value), tail)> -fn parse_key_val(tail: &str) -> Option<((&str, Option<&str>), &str)> { +fn parse_key_val<'a>( + tail: &'a str, + index: &mut usize, +) -> Option<((&'a str, Option<&'a str>), &'a str)> { + let mut new_index = 0; // Require whitespace - let (ws, tail) = parse_whitespace(tail); - if ws.is_empty() { - return None; - } + let (_, tail) = parse_whitespace_min(tail, 1, &mut new_index)?; // Fail when no key found - let (key, tail) = parse_attribute_key(tail)?; - let (_, tail) = parse_whitespace(tail); - if let Some((_, tail)) = parse_char(tail, '=') { - let (_, tail) = parse_whitespace(tail); + let (key, tail) = parse_attribute_key(tail, &mut new_index)?; + if let Some((_, tail)) = parse_char( + parse_whitespace(tail, &mut new_index).1, + '=', + &mut new_index, + ) { + let (_, tail) = parse_whitespace(tail, &mut new_index); // Fail when = is not followed by value - let (val, tail) = parse_attribute_val(tail)?; - Some(((key, Some(val)), tail)) + let (val, tail) = parse_attribute_val(tail, &mut new_index)?; + let val = if val.is_empty() { None } else { Some(val) }; + *index += new_index; + Some(((key, val), tail)) } else { + *index += new_index; Some(((key, None), tail)) } } +// Tags that are implicitly self closing, ending in /> is optional const VOID_ELEMENTS: [&str; 16] = [ "area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr", ]; -fn parse_open_tag(tail: &str) -> Option<(Lexeme, &str)> { +fn parse_open_tag<'a>( + tail: &'a str, + index: &mut usize, +) -> Result<(Lexeme<'a>, &'a str), ParseError> { + let mut new_index = 0; // < - let (_, tail) = parse_char(tail, '<')?; + let (_, tail) = + parse_char(tail, '<', &mut new_index).ok_or(ParseError::Unknown)?; // tag name - let (name, mut tail) = parse_tag_name(tail)?; + let (name, mut tail) = + parse_tag_name(tail, &mut new_index).ok_or(ParseError::InvalidTag)?; // attributes let mut attributes: HashMap<&str, Option<&str>> = HashMap::new(); - while let Some((kv, new_tail)) = parse_key_val(tail) { + while let Some((kv, new_tail)) = parse_key_val(tail, &mut new_index) { attributes.insert(kv.0, kv.1); tail = new_tail; } - let (_, tail) = parse_whitespace(tail); - let (is_void, tail) = parse_char(tail, '/').unwrap_or(("", tail)); + let (_, tail) = parse_whitespace(tail, &mut new_index); + let (is_void, tail) = + parse_char(tail, '/', &mut new_index).unwrap_or(("", tail)); let is_void = !is_void.is_empty() || VOID_ELEMENTS.contains(&name); - let (_, tail) = parse_char(tail, '>')?; - Some(( + let (_, tail) = match parse_char(tail, '>', &mut new_index) { + Some(v) => v, + None => { + return Err(ParseError::InvalidTag); + }, + }; + *index += new_index; + Ok(( Lexeme::OpenTag { name, attributes, @@ -163,37 +313,162 @@ fn parse_open_tag(tail: &str) -> Option<(Lexeme, &str)> { )) } -fn parse_close_tag(tail: &str) -> Option<(Lexeme, &str)> { - let (_, tail) = parse_char(tail, '<')?; - let (_, tail) = parse_char(tail, '/')?; - let (name, tail) = parse_tag_name(tail)?; - let (_, tail) = parse_whitespace(tail); - let (_, tail) = parse_char(tail, '>')?; - Some((Lexeme::CloseTag { name }, tail)) +fn parse_close_tag<'a>( + tail: &'a str, + index: &mut usize, +) -> Result<(Lexeme<'a>, &'a str), ParseError> { + let mut new_index = 0; + let (_, tail) = + parse_char(tail, '<', &mut new_index).ok_or(ParseError::Unknown)?; + let (_, tail) = + parse_char(tail, '/', &mut new_index).ok_or(ParseError::Unknown)?; + let (name, tail) = + parse_tag_name(tail, &mut new_index).ok_or(ParseError::InvalidTag)?; + let (_, tail) = parse_whitespace(tail, &mut new_index); + let (_, tail) = + parse_char(tail, '>', &mut new_index).ok_or(ParseError::InvalidTag)?; + *index += new_index; + Ok((Lexeme::CloseTag { name }, tail)) } -fn parse_text(tail: &str) -> Option<(Lexeme, &str)> { - let (txt, tail) = parse_while(tail, |c| c != '<'); +fn parse_doctype<'a>( + tail: &'a str, + index: &mut usize, +) -> Result<(Lexeme<'a>, &'a str), ParseError> { + let mut new_index = 0; + let mut closure = || -> Option<(&str, &str)> { + let (_, tail) = parse_str(tail, "', &mut new_index) + }; + let (_, tail) = closure().ok_or(ParseError::Unknown)?; + *index += new_index; + Ok((Lexeme::Doctype, tail)) +} + +fn parse_comment<'a>( + tail: &'a str, + index: &mut usize, +) -> Result<(Lexeme<'a>, &'a str), ParseError> { + let mut new_index = 0; + let (_, tail) = + parse_str(tail, "", &mut new_index).ok_or(ParseError::Unknown)?; + *index += new_index; + Ok((Lexeme::Comment, tail)) +} + +fn parse_text<'a>( + tail: &'a str, + index: &mut usize, +) -> Result<(Lexeme<'a>, &'a str), ParseError> { + let mut new_index = 0; + let (txt, tail) = parse_while(tail, |c| c != '<', &mut new_index); if txt.is_empty() { - None + Err(ParseError::Unknown) } else { - Some((Lexeme::Content(txt), tail)) + *index += new_index; + Ok((Lexeme::Text(txt), tail)) } } -pub fn parse_html(mut tail: &str) -> Vec { - let mut stack = vec![]; +fn or_keep_error<'a>( + r: Result<(Lexeme<'a>, &'a str), ParseError>, + op: impl FnOnce() -> Result<(Lexeme<'a>, &'a str), ParseError>, +) -> Result<(Lexeme<'a>, &'a str), ParseError> { + match r { + Ok(val) => Ok(val), + Err(e) => op().map_err(|new_e| match e { + ParseError::Unknown => new_e, + _ => e, + }), + } +} + +fn index_to_rc(input: &str, index: usize) -> (usize, usize) { + let (mut row, mut col) = (1, 1); + for c in input[0..index].chars() { + if c == '\n' { + row += 1; + col = 1; + } else { + col += 1; + } + } + (row, col) +} + +pub fn parse_html(input: &str) -> trace::Result> { + let mut tail = input; + let mut lexeme_stack = vec![]; + let mut validation_stack = vec![]; + let mut index = 0; + + let err = |error: ParseError, index: usize| -> trace::Result> { + let (row, col) = index_to_rc(input, index); + let e: trace::Error = error.into(); + Err(e).ctx(format!("Starting at line {} character {}", row, col)) + }; + while !tail.is_empty() { - let (_, new_tail) = parse_whitespace(tail); + let (_, new_tail) = parse_whitespace(tail, &mut index); if new_tail.is_empty() { break; } - let (lm, new_tail) = parse_open_tag(new_tail) - .or_else(|| parse_close_tag(new_tail)) - .or_else(|| parse_text(new_tail)) - .unwrap(); - stack.push(lm); + let result = or_keep_error(parse_open_tag(new_tail, &mut index), || { + parse_close_tag(new_tail, &mut index) + }); + let result = or_keep_error(result, || parse_text(new_tail, &mut index)); + let result = or_keep_error(result, || parse_comment(new_tail, &mut index)); + let (lm, new_tail) = + match or_keep_error(result, || parse_doctype(new_tail, &mut index)) { + Ok(v) => v, + Err(e) => { + return err(e, index); + }, + }; + // Validate that open and close tags match + match lm { + Lexeme::OpenTag { name, is_void, .. } => { + if !is_void { + validation_stack.push(name); + } + }, + Lexeme::CloseTag { name } => { + if VOID_ELEMENTS.contains(&name) { + return err(ParseError::VoidClosingTag(name.into()).into(), index); + } + if let Some(top) = validation_stack.pop() { + if name != top { + return err( + ParseError::MismatchedClosing { + expected: top.into(), + found: name.into(), + }, + index, + ); + } + } else { + return err(ParseError::UnmatchedClose(name.into()), index); + } + }, + Lexeme::Comment => { + tail = new_tail; + continue; + }, + _ => {}, + }; + + lexeme_stack.push(lm); + tail = new_tail; } - stack + if let Some(top) = validation_stack.pop() { + let e: trace::Error = ParseError::UnmatchedOpen(top.into()).into(); + return Err(e).ctx("At end of file"); + } + Ok(lexeme_stack) } diff --git a/src/trace.rs b/src/trace.rs index 62a8294..f12566f 100644 --- a/src/trace.rs +++ b/src/trace.rs @@ -44,15 +44,26 @@ pub enum ErrorKind { Unknown, } -impl Display for Error { +impl Display for ErrorKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "{:?} error\nReason:\n\t{}\nBacktrace:\n", - self.kind, self.reason - )?; + "{}", + match self { + ErrorKind::IO => "IO", + ErrorKind::Parsing => "PARSING", + ErrorKind::Compilation => "COMPILATION", + ErrorKind::Unknown => "UNKNOWN", + } + ) + } +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "\n[{} ERROR] {}\nBacktrace:\n", self.kind, self.reason)?; for s in self.backtrace.iter().rev() { - write!(f, "\t{}\n", s)?; + write!(f, "{}\n", s)?; } Ok(()) } @@ -129,9 +140,10 @@ where S: Into, { fn ctx(self, s: S) -> Result { - match self { - Some(v) => Ok(v), - None => Err(Error::new(ErrorKind::Unknown, "Missing expected value")), - } + self.ok_or_else(|| Error { + kind: ErrorKind::Unknown, + reason: "Missing expected value".into(), + backtrace: vec![s.into()], + }) } } diff --git a/test.html b/test.html index 2958270..5348d7d 100644 --- a/test.html +++ b/test.html @@ -7,8 +7,7 @@ - +