diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml new file mode 100644 index 0000000..24123b5 --- /dev/null +++ b/.github/workflows/bench.yml @@ -0,0 +1,27 @@ +name: Rust +on: + push: + branches: [main] + pull_request: + branches: [main] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/cache@v3 + id: cache + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - name: Run bench + run: cargo bench diff --git a/Cargo.lock b/Cargo.lock index 7610aa5..39816b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,33 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -11,6 +38,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "ansi_term" version = "0.12.1" @@ -20,6 +53,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + [[package]] name = "auto_encoder" version = "0.1.7" @@ -29,8 +68,8 @@ dependencies = [ "chardetng", "encoding_rs", "percent-encoding", - "phf", - "phf_codegen", + "phf 0.11.2", + "phf_codegen 0.11.2", ] [[package]] @@ -39,18 +78,59 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "benches" +version = "0.0.0" +dependencies = [ + "criterion", + "fast_html2md", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + [[package]] name = "byteorder" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cfg-if" version = "1.0.0" @@ -68,6 +148,160 @@ dependencies = [ "memchr", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "futures", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "tokio", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa 0.4.8", + "matches", + "phf 0.8.0", + "proc-macro2", + "quote", + "smallvec", + "syn 1.0.109", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.87", +] + [[package]] name = "ctor" version = "0.1.26" @@ -78,12 +312,57 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_more" +version = "0.99.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 2.0.87", +] + [[package]] name = "diff" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "dtoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -95,12 +374,13 @@ dependencies = [ [[package]] name = "fast_html2md" -version = "0.0.25" +version = "0.0.26" dependencies = [ "auto_encoder", "html5ever", "indoc", "lazy_static", + "lol_html", "markup5ever_rcdom", "percent-encoding", "pretty_assertions", @@ -134,6 +414,87 @@ dependencies = [ "new_debug_unreachable", ] +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-sink", + "futures-task", + "pin-project-lite", + "pin-utils", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -142,9 +503,40 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", ] +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + [[package]] name = "html5ever" version = "0.27.0" @@ -156,17 +548,146 @@ dependencies = [ "markup5ever", "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.87", +] + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", ] [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] @@ -175,17 +696,70 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "js-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libc" -version = "0.2.161" +version = "0.2.162" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" + +[[package]] +name = "litemap" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "lock_api" @@ -203,6 +777,25 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "lol_html" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "964b47c14635e111f7efddcd8f1f8794195f66225fef19822fa942b217a859cf" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "cssparser", + "encoding_rs", + "hashbrown", + "lazy_static", + "lazycell", + "memchr", + "mime", + "selectors", + "thiserror", +] + [[package]] name = "mac" version = "0.1.1" @@ -216,8 +809,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" dependencies = [ "log", - "phf", - "phf_codegen", + "phf 0.11.2", + "phf_codegen 0.11.2", "string_cache", "string_cache_codegen", "tendril", @@ -235,18 +828,45 @@ dependencies = [ "xml5ever", ] +[[package]] +name = "matches" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" + [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + [[package]] name = "new_debug_unreachable" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + [[package]] name = "num" version = "0.1.42" @@ -324,12 +944,27 @@ dependencies = [ "autocfg", ] +[[package]] +name = "object" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + [[package]] name = "output_vt100" version = "0.1.3" @@ -368,16 +1003,37 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", +] + [[package]] name = "phf" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ - "phf_macros", + "phf_macros 0.11.2", "phf_shared 0.11.2", ] +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", +] + [[package]] name = "phf_codegen" version = "0.11.2" @@ -388,6 +1044,16 @@ dependencies = [ "phf_shared 0.11.2", ] +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", +] + [[package]] name = "phf_generator" version = "0.10.0" @@ -410,33 +1076,96 @@ dependencies = [ [[package]] name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ - "phf_generator 0.11.2", - "phf_shared 0.11.2", - "proc-macro2", - "quote", - "syn 2.0.85", + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", ] [[package]] -name = "phf_shared" -version = "0.10.0" +name = "plotters-backend" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" -dependencies = [ - "siphasher", -] +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] -name = "phf_shared" -version = "0.11.2" +name = "plotters-svg" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ - "siphasher", + "plotters-backend", ] [[package]] @@ -466,6 +1195,12 @@ dependencies = [ "output_vt100", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + [[package]] name = "proc-macro2" version = "1.0.89" @@ -497,6 +1232,20 @@ dependencies = [ "winapi", ] +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", + "rand_pcg", +] + [[package]] name = "rand" version = "0.8.5" @@ -504,10 +1253,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", + "rand_chacha 0.3.1", "rand_core 0.6.4", ] +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -533,13 +1292,60 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.15", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", ] [[package]] @@ -557,7 +1363,7 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "bitflags", + "bitflags 2.6.0", ] [[package]] @@ -574,9 +1380,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -589,36 +1395,114 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + [[package]] name = "rustc-serialize" version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe834bc780604f4674073badbad26d7219cadfb4a2275802db12cbae17498401" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags 1.3.2", + "cssparser", + "derive_more", + "fxhash", + "log", + "matches", + "phf 0.8.0", + "phf_codegen 0.8.0", + "precomputed-hash", + "servo_arc", + "smallvec", + "thin-slice", +] + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + [[package]] name = "serde" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.87", +] + +[[package]] +name = "serde_json" +version = "1.0.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +dependencies = [ + "itoa 1.0.11", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", ] [[package]] @@ -642,6 +1526,12 @@ dependencies = [ "num", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "string_cache" version = "0.8.7" @@ -681,15 +1571,26 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.85" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "tendril" version = "0.4.3" @@ -702,46 +1603,72 @@ dependencies = [ ] [[package]] -name = "tinyvec" -version = "1.8.0" +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + +[[package]] +name = "thiserror" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "tinyvec_macros", + "thiserror-impl", ] [[package]] -name = "tinyvec_macros" -version = "0.1.1" +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] [[package]] -name = "unicode-bidi" -version = "0.3.17" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] [[package]] -name = "unicode-ident" -version = "1.0.13" +name = "tinytemplate" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] [[package]] -name = "unicode-normalization" -version = "0.1.24" +name = "tokio" +version = "1.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" dependencies = [ - "tinyvec", + "backtrace", + "pin-project-lite", ] +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + [[package]] name = "url" -version = "2.5.2" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" dependencies = [ "form_urlencoded", "idna", @@ -754,12 +1681,111 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.87", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" + +[[package]] +name = "web-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" @@ -776,12 +1802,39 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -846,6 +1899,18 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "xml5ever" version = "0.18.1" @@ -857,6 +1922,30 @@ dependencies = [ "markup5ever", ] +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -875,5 +1964,48 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.87", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", ] diff --git a/Cargo.toml b/Cargo.toml index 14547d5..03ad016 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,27 +1,6 @@ -[package] -name = "fast_html2md" -version = "0.0.25" -edition = "2021" -description = "A fast html2md crate for rust" -categories = ["development-tools", "parsing", "parser-implementations"] -keywords = ["html", "markdown", "converter"] -license = "MIT" -documentation = "https://docs.rs/fast_html2md" -repository = "https://github.com/spider-rs/html2md" - -[lib] -name = "html2md" - -[dependencies] -regex = "1" -markup5ever_rcdom = "0.3.0" -html5ever = "0.27" -lazy_static = "1" -percent-encoding = "2" -auto_encoder = "0" -url = "2" - -[dev-dependencies] -spectral = "0.6.0" -pretty_assertions = "0.7.2" -indoc = "1.0.3" +[workspace] +members = [ + "fast_html2md", + "benches" +] +resolver = "2" diff --git a/benches/Cargo.toml b/benches/Cargo.toml new file mode 100644 index 0000000..dae08e3 --- /dev/null +++ b/benches/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "benches" +version = "0.0.0" +publish = false +edition = "2021" + +[dependencies] +criterion = { version = "0.5", features = ["html_reports", "async_tokio"] } +fast_html2md = { path = "../fast_html2md", version = "0" } + +[[bench]] +name = "parse" +path = "parse.rs" +harness = false + +[features] diff --git a/benches/parse.rs b/benches/parse.rs new file mode 100644 index 0000000..6eaff27 --- /dev/null +++ b/benches/parse.rs @@ -0,0 +1,30 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use html2md::{parse_html, rewrite_html}; +use std::fs::File; +use std::io::Read; + +/// bench crawling between different libs +pub fn bench_speed(c: &mut Criterion) { + let mut group = c.benchmark_group("crawl-speed/libraries"); + let sample_count = 100; + let sample_title = format!("crawl {} samples", sample_count); + + let path = std::path::Path::new("../test-samples/real-world-1.html"); + let mut html = String::new(); + let mut html_file = File::open(path).unwrap(); + + html_file.read_to_string(&mut html).unwrap(); + + group.bench_function(format!("Scraper real-world-1: {}", sample_title), |b| { + b.iter(|| black_box(parse_html(&html, false))) + }); + + group.bench_function(format!("Rewriter real-world-1: {}", sample_title), |b| { + b.iter(|| black_box(rewrite_html(&html))) + }); + + group.finish(); +} + +criterion_group!(benches, bench_speed); +criterion_main!(benches); diff --git a/fast_html2md/Cargo.lock b/fast_html2md/Cargo.lock new file mode 100644 index 0000000..a9769bf --- /dev/null +++ b/fast_html2md/Cargo.lock @@ -0,0 +1,1237 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "auto_encoder" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e492d0e069edd5c31f88780c685bb589939bbf898cac872b2ffc7eedad80220" +dependencies = [ + "chardetng", + "encoding_rs", + "percent-encoding", + "phf 0.11.2", + "phf_codegen 0.11.2", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chardetng" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea" +dependencies = [ + "cfg-if", + "encoding_rs", + "memchr", +] + +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "matches", + "phf 0.8.0", + "proc-macro2", + "quote", + "smallvec", + "syn 1.0.109", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.85", +] + +[[package]] +name = "ctor" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +dependencies = [ + "quote", + "syn 1.0.109", +] + +[[package]] +name = "derive_more" +version = "0.99.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 2.0.85", +] + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "dtoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "fast_html2md" +version = "0.0.25" +dependencies = [ + "auto_encoder", + "html5ever", + "indoc", + "lazy_static", + "lol_html", + "markup5ever_rcdom", + "percent-encoding", + "pretty_assertions", + "regex", + "spectral", + "url", +] + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indoc" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.161" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "lol_html" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "964b47c14635e111f7efddcd8f1f8794195f66225fef19822fa942b217a859cf" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "cssparser", + "encoding_rs", + "hashbrown", + "lazy_static", + "lazycell", + "memchr", + "mime", + "selectors", + "thiserror", +] + +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +dependencies = [ + "log", + "phf 0.11.2", + "phf_codegen 0.11.2", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + +[[package]] +name = "matches" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + +[[package]] +name = "num" +version = "0.1.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4703ad64153382334aa8db57c637364c322d3372e097840c72000dabdcf6156e" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e63899ad0da84ce718c14936262a41cee2c79c981fc0a0e7c7beb47d5a07e8c1" +dependencies = [ + "num-integer", + "num-traits", + "rand 0.4.6", + "rustc-serialize", +] + +[[package]] +name = "num-complex" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b288631d7878aaf59442cffd36910ea604ecd7745c36054328595114001c9656" +dependencies = [ + "num-traits", + "rustc-serialize", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.1.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee314c74bd753fc86b4780aa9475da469155f3848473a261d2d18e35245a784e" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", + "rustc-serialize", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "output_vt100" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +dependencies = [ + "winapi", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_macros 0.11.2", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand 0.8.5", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "pretty_assertions" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cab0e7c02cf376875e9335e0ba1da535775beb5450d21e1dffca068818ed98b" +dependencies = [ + "ansi_term", + "ctor", + "diff", + "output_vt100", +] + +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", + "rand_pcg", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.15", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc-serialize" +version = "0.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe834bc780604f4674073badbad26d7219cadfb4a2275802db12cbae17498401" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags 1.3.2", + "cssparser", + "derive_more", + "fxhash", + "log", + "matches", + "phf 0.8.0", + "phf_codegen 0.8.0", + "precomputed-hash", + "servo_arc", + "smallvec", + "thin-slice", +] + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "serde" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "spectral" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae3c15181f4b14e52eeaac3efaeec4d2764716ce9c86da0c934c3e318649c5ba" +dependencies = [ + "num", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + +[[package]] +name = "thiserror" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "tinyvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "unicode-bidi" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unicode-normalization" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "url" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "xml5ever" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bbb26405d8e919bc1547a5aa9abc95cbfa438f04844f5fdd9dc7596b748bf69" +dependencies = [ + "log", + "mac", + "markup5ever", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", +] diff --git a/fast_html2md/Cargo.toml b/fast_html2md/Cargo.toml new file mode 100644 index 0000000..a70fd33 --- /dev/null +++ b/fast_html2md/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "fast_html2md" +version = "0.0.26" +edition = "2021" +description = "A fast html2md crate for rust" +categories = ["development-tools", "parsing", "parser-implementations"] +keywords = ["html", "markdown", "converter"] +license = "MIT" +documentation = "https://docs.rs/fast_html2md" +repository = "https://github.com/spider-rs/html2md" + +[lib] +name = "html2md" + +[dependencies] +regex = "1" +markup5ever_rcdom = "0.3.0" +html5ever = "0.27" +lazy_static = "1" +percent-encoding = "2" +auto_encoder = "0" +url = "2" +lol_html = "2" + +[dev-dependencies] +spectral = "0.6.0" +pretty_assertions = "0.7.2" +indoc = "1.0.3" diff --git a/src/lib.rs b/fast_html2md/src/lib.rs similarity index 90% rename from src/lib.rs rename to fast_html2md/src/lib.rs index d6d9cbb..12f79db 100644 --- a/src/lib.rs +++ b/fast_html2md/src/lib.rs @@ -8,21 +8,25 @@ use std::boxed::Box; use std::collections::HashMap; use std::sync::Arc; use url::Url; -pub mod anchors; -pub mod codes; -pub mod common; -pub mod containers; -pub mod dummy; -pub mod headers; -pub mod iframes; -pub mod ignore; -pub mod images; -pub mod lists; -pub mod paragraphs; -pub mod quotes; -pub mod styles; -pub mod tables; -pub mod utils; + +// we want to just use the rewriter instead for v0.1. +pub mod rewriter; +pub mod scraper; + +pub(crate) use scraper::anchors; +pub(crate) use scraper::codes; +// pub(crate) use scraper::common; +pub(crate) use scraper::containers; +pub(crate) use scraper::dummy; +pub(crate) use scraper::headers; +pub(crate) use scraper::iframes; +pub(crate) use scraper::images; +pub(crate) use scraper::lists; +pub(crate) use scraper::paragraphs; +pub(crate) use scraper::quotes; +pub(crate) use scraper::styles; +pub(crate) use scraper::tables; +pub(crate) use scraper::utils; use anchors::AnchorHandler; use codes::CodeHandler; @@ -52,14 +56,12 @@ lazy_static! { static ref MARKDOWN_MIDDLE_KEYCHARS: Regex = Regex::new(r"[<>*\\_~]").expect("valid regex pattern"); // for Markdown escaping static ref CLEANUP_PATTERN: Regex = Regex::new( r"(?x) - (?P^\s*$\n)| - (?P\n{3,})| - (?P(\S) )| - (?P^\n+)| - (?P\s+$)| - (?P!\[\]\(\))| - (?P^\s+)" - ).expect("valid regex pattern"); + (?m) + (^\s*$\n|\n{3,})| # Empty lines or excessive newlines + (\s+$|^\n+|\s{2,})| # Trailing, leading, or excessive spaces + (!\[\]\(\)) # Empty image syntax + " + ).expect("Valid regex pattern"); } /// Custom variant of main function. Allows to pass custom tag<->tag factory pairs @@ -145,6 +147,14 @@ pub fn parse_html(html: &str, commonmark: bool) -> String { parse_html_custom(html, &HashMap::default(), commonmark) } +/// Main function of this library to come. Rewrites incoming HTML, converts it into Markdown +/// and returns converted string. Incomplete work in progress for major performance increases. +/// # Arguments +/// `html` is source HTML as `String` +pub fn rewrite_html(html: &str) -> String { + rewriter::writer::convert_html_to_markdown(html).unwrap_or_default() +} + /// Same as `parse_html` but retains all "span" html elements intact /// Markdown parsers usually strip them down when rendering but they /// may be useful for later processing. @@ -381,21 +391,17 @@ fn escape_markdown(result: &StructuredPrinter, text: &str) -> String { fn clean_markdown(text: &str) -> String { CLEANUP_PATTERN .replace_all(text, |caps: ®ex::Captures| { - if caps.name("empty_line").is_some() - || caps.name("leading_newlines").is_some() - || caps.name("last_whitespace").is_some() - || caps.name("empty_image").is_some() - || caps.name("leading_whitespace").is_some() - { - "".to_string() - } else if caps.name("excessive_newlines").is_some() { - "\n\n".to_string() - } else if let Some(trailing_match) = caps.name("trailing_space") { - trailing_match.as_str().trim_end().to_string() + if caps.get(1).is_some() || caps.get(4).is_some() { + "\n\n".to_string() // Consolidate newlines + } else if caps.get(3).is_some() { + "".to_string() // Remove spaces or empty image syntax + } else if caps.get(2).is_some() { + " ".to_string() // Remove spaces or empty image syntax } else { - caps[0].to_string() + caps[0].trim().to_string() } }) + .trim() .to_string() } diff --git a/fast_html2md/src/rewriter/mod.rs b/fast_html2md/src/rewriter/mod.rs new file mode 100644 index 0000000..d3baa81 --- /dev/null +++ b/fast_html2md/src/rewriter/mod.rs @@ -0,0 +1 @@ +pub mod writer; diff --git a/fast_html2md/src/rewriter/writer.rs b/fast_html2md/src/rewriter/writer.rs new file mode 100644 index 0000000..05f986f --- /dev/null +++ b/fast_html2md/src/rewriter/writer.rs @@ -0,0 +1,109 @@ +use crate::clean_markdown; +use lol_html::doc_comments; +use lol_html::html_content::ContentType::Text; +use lol_html::html_content::Element; +use lol_html::{element, rewrite_str, RewriteStrSettings}; + +/// Insert a new line +#[inline] +pub fn insert_newline(element: &mut Element) { + element.after("\n", Text); +} + +/// Handle the lol_html tag. +#[inline] +fn handle_tag(element: &mut Element) -> Result<(), Box> { + element.remove_and_keep_content(); + + // Add the markdown equivalents before the element. + match element.tag_name().as_str() { + "h1" => { + element.before("# ", Text); + insert_newline(element); + } + "h2" => { + element.before("## ", Text); + insert_newline(element); + } + "h3" => { + element.before("### ", Text); + insert_newline(element); + } + "h4" => { + element.before("#### ", Text); + insert_newline(element); + } + "h5" => { + element.before("##### ", Text); + insert_newline(element); + } + "h6" => { + element.before("###### ", Text); + insert_newline(element); + } + "p" => element.before("\n", Text), + "li" => element.before("* ", Text), + "a" => { + if let Some(href) = element.get_attribute("href") { + element.before("[", lol_html::html_content::ContentType::Text); + element.after( + &format!("]({})", href), + lol_html::html_content::ContentType::Text, + ); + element.set_inner_content("", lol_html::html_content::ContentType::Text); + // Remove content tags. + } + } + "img" => { + let alt = element.get_attribute("alt").unwrap_or_default(); + let src = element.get_attribute("src").unwrap_or_default(); + element.replace(&format!("![{}]({})", alt, src), Text); + } + + "tr" => { + element.before("| ", Text); + element.after(" |\n", Text); + } + "th" => { + element.before("**", Text); + element.after("** | ", Text); + } + "td" => { + element.after(" | ", Text); + } + _ => (), + } + + Ok(()) +} + +/// Get the HTML rewriter settings to convert ot markdown. +pub fn get_rewriter_settings() -> RewriteStrSettings<'static, 'static> { + RewriteStrSettings { + document_content_handlers: vec![doc_comments!(|c| { + c.remove(); + Ok(()) + })], + element_content_handlers: vec![ + element!("head, nav, svg", |el| { + el.remove(); + Ok(()) + }), + element!("*:not(script):not(head):not(style):not(svg)", |el| { + let _ = handle_tag(el); + Ok(()) + }), + ], + ..RewriteStrSettings::default() + } +} + +/// Convert to markdown streaming re-writer +pub(crate) fn convert_html_to_markdown(html: &str) -> Result> { + let settings = get_rewriter_settings(); + + match rewrite_str(&Box::new(html), settings) { + Ok(markdown) => Ok(clean_markdown(&markdown)), + Err(e) => Err(e.into()), + } +} diff --git a/src/anchors.rs b/fast_html2md/src/scraper/anchors.rs similarity index 100% rename from src/anchors.rs rename to fast_html2md/src/scraper/anchors.rs diff --git a/src/codes.rs b/fast_html2md/src/scraper/codes.rs similarity index 100% rename from src/codes.rs rename to fast_html2md/src/scraper/codes.rs diff --git a/src/common.rs b/fast_html2md/src/scraper/common.rs similarity index 100% rename from src/common.rs rename to fast_html2md/src/scraper/common.rs diff --git a/src/containers.rs b/fast_html2md/src/scraper/containers.rs similarity index 100% rename from src/containers.rs rename to fast_html2md/src/scraper/containers.rs diff --git a/src/dummy.rs b/fast_html2md/src/scraper/dummy.rs similarity index 98% rename from src/dummy.rs rename to fast_html2md/src/scraper/dummy.rs index 1253e5e..d3b0182 100644 --- a/src/dummy.rs +++ b/fast_html2md/src/scraper/dummy.rs @@ -16,7 +16,7 @@ impl TagHandler for DummyHandler { /// Handler that completely copies tag to printer as HTML with all descendants #[derive(Default)] -pub(super) struct IdentityHandler { +pub struct IdentityHandler { /// Commonmark spec pub commonmark: bool, } diff --git a/src/headers.rs b/fast_html2md/src/scraper/headers.rs similarity index 100% rename from src/headers.rs rename to fast_html2md/src/scraper/headers.rs diff --git a/src/iframes.rs b/fast_html2md/src/scraper/iframes.rs similarity index 100% rename from src/iframes.rs rename to fast_html2md/src/scraper/iframes.rs diff --git a/src/ignore.rs b/fast_html2md/src/scraper/ignore.rs similarity index 100% rename from src/ignore.rs rename to fast_html2md/src/scraper/ignore.rs diff --git a/src/images.rs b/fast_html2md/src/scraper/images.rs similarity index 100% rename from src/images.rs rename to fast_html2md/src/scraper/images.rs diff --git a/src/lists.rs b/fast_html2md/src/scraper/lists.rs similarity index 100% rename from src/lists.rs rename to fast_html2md/src/scraper/lists.rs diff --git a/fast_html2md/src/scraper/mod.rs b/fast_html2md/src/scraper/mod.rs new file mode 100644 index 0000000..4e1380b --- /dev/null +++ b/fast_html2md/src/scraper/mod.rs @@ -0,0 +1,21 @@ +pub mod anchors; +pub mod codes; +pub mod common; +pub mod containers; +pub mod dummy; +pub mod headers; +pub mod iframes; +pub mod ignore; +pub mod images; +pub mod lists; +pub mod paragraphs; +pub mod quotes; +pub mod styles; +pub mod tables; +pub mod utils; + +use super::Handle; +use super::StructuredPrinter; +use super::TagHandler; +use super::TagHandlerFactory; +use super::{clean_markdown, walk}; diff --git a/src/paragraphs.rs b/fast_html2md/src/scraper/paragraphs.rs similarity index 100% rename from src/paragraphs.rs rename to fast_html2md/src/scraper/paragraphs.rs diff --git a/src/quotes.rs b/fast_html2md/src/scraper/quotes.rs similarity index 100% rename from src/quotes.rs rename to fast_html2md/src/scraper/quotes.rs diff --git a/src/styles.rs b/fast_html2md/src/scraper/styles.rs similarity index 100% rename from src/styles.rs rename to fast_html2md/src/scraper/styles.rs diff --git a/src/tables.rs b/fast_html2md/src/scraper/tables.rs similarity index 99% rename from src/tables.rs rename to fast_html2md/src/scraper/tables.rs index b440414..b954c0e 100644 --- a/src/tables.rs +++ b/fast_html2md/src/scraper/tables.rs @@ -1,7 +1,6 @@ use super::StructuredPrinter; use super::TagHandler; use super::{clean_markdown, walk}; -use std::borrow::BorrowMut; use std::sync::Arc; use std::{cmp, collections::HashMap}; diff --git a/src/utils/inline_elements.rs b/fast_html2md/src/scraper/utils/inline_elements.rs similarity index 100% rename from src/utils/inline_elements.rs rename to fast_html2md/src/scraper/utils/inline_elements.rs diff --git a/src/utils/mod.rs b/fast_html2md/src/scraper/utils/mod.rs similarity index 100% rename from src/utils/mod.rs rename to fast_html2md/src/scraper/utils/mod.rs diff --git a/tests/iframes.rs b/fast_html2md/tests/iframes.rs similarity index 100% rename from tests/iframes.rs rename to fast_html2md/tests/iframes.rs diff --git a/tests/images.rs b/fast_html2md/tests/images.rs similarity index 100% rename from tests/images.rs rename to fast_html2md/tests/images.rs diff --git a/tests/integration.rs b/fast_html2md/tests/integration.rs similarity index 81% rename from tests/integration.rs rename to fast_html2md/tests/integration.rs index 21ec22a..e22257f 100644 --- a/tests/integration.rs +++ b/fast_html2md/tests/integration.rs @@ -14,7 +14,7 @@ use url::Url; #[ignore] fn test_marcfs() { let mut html = String::new(); - let mut html_file = File::open("test-samples/marcfs-readme.html").unwrap(); + let mut html_file = File::open("../test-samples/marcfs-readme.html").unwrap(); html_file .read_to_string(&mut html) .expect("File must be readable"); @@ -29,7 +29,7 @@ fn test_real_world_wiki() -> Result<(), Box> { use std::io::{self, Read}; use std::path::Path; - let paths = fs::read_dir("test-samples/wiki")?; + let paths = fs::read_dir("../test-samples/wiki")?; fn run_parse(path: &Path) -> Result<(), Box> { let mut html = String::new(); @@ -66,7 +66,7 @@ fn test_real_world_wiki() -> Result<(), Box> { #[ignore] fn test_real_world_ja() { let mut html = String::new(); - let mut html_file = File::open("test-samples/real-world-ja-1.html").unwrap(); + let mut html_file = File::open("../test-samples/real-world-ja-1.html").unwrap(); html_file .read_to_string(&mut html) .expect("File must be readable"); @@ -79,8 +79,8 @@ fn test_real_world_ja() { fn test_cheatsheet() { let mut html = String::new(); let mut md = String::new(); - let mut html_file = File::open("test-samples/markdown-cheatsheet.html").unwrap(); - let mut md_file = File::open("test-samples/markdown-cheatsheet.md").unwrap(); + let mut html_file = File::open("../test-samples/markdown-cheatsheet.html").unwrap(); + let mut md_file = File::open("../test-samples/markdown-cheatsheet.md").unwrap(); html_file .read_to_string(&mut html) .expect("File must be readable"); @@ -95,7 +95,7 @@ fn test_cheatsheet() { #[test] fn test_list_newlines() { let mut html = String::new(); - let mut html_file = File::open("test-samples/dybr-bug-with-list-newlines.html").unwrap(); + let mut html_file = File::open("../test-samples/dybr-bug-with-list-newlines.html").unwrap(); html_file .read_to_string(&mut html) .expect("File must be readable"); @@ -107,7 +107,7 @@ fn test_list_newlines() { #[test] fn test_lists_from_text() { let mut html = String::new(); - let mut html_file = File::open("test-samples/dybr-bug-with-lists-from-text.html").unwrap(); + let mut html_file = File::open("../test-samples/dybr-bug-with-lists-from-text.html").unwrap(); html_file .read_to_string(&mut html) .expect("File must be readable"); @@ -120,7 +120,8 @@ fn test_lists_from_text() { #[test] fn test_strong_inside_link() { let mut html = String::new(); - let mut html_file = File::open("test-samples/dybr-bug-with-strong-inside-link.html").unwrap(); + let mut html_file = + File::open("../test-samples/dybr-bug-with-strong-inside-link.html").unwrap(); html_file .read_to_string(&mut html) .expect("File must be readable"); @@ -131,7 +132,7 @@ fn test_strong_inside_link() { #[test] fn test_tables_with_newlines() { let mut html = String::new(); - let mut html_file = File::open("test-samples/dybr-bug-with-tables-masked.html").unwrap(); + let mut html_file = File::open("../test-samples/dybr-bug-with-tables-masked.html").unwrap(); html_file .read_to_string(&mut html) .expect("File must be readable"); @@ -144,26 +145,26 @@ fn test_tables_with_newlines() { #[test] fn test_tables_crash2() { let mut html = String::new(); - let mut html_file = File::open("test-samples/dybr-bug-with-tables-2-masked.html").unwrap(); + let mut html_file = File::open("../test-samples/dybr-bug-with-tables-2-masked.html").unwrap(); html_file .read_to_string(&mut html) .expect("File must be readable"); let table_with_vertical_header = parse_html(&html, false); - assert_that!(table_with_vertical_header).contains(indoc! {"xxxxx xxxxxxxxxx xxxxxxx x xxxxx))~~xxxxxxxx xxxxxxxx~~\n\n## At a Glance\n\n|Current Conditions:|Open all year. No reservations. No services.|\n|||\n| Reservations: | No reservations. |\n| Fees | No fee. |\n| Water: | No water. |\n\n" + assert_that!(table_with_vertical_header).contains(indoc! {"xxxxx xxxxxxxxxx xxxxxxx x xxxxx))~~xxxxxxxx xxxxxxxx~~\n\n## At a Glance\n\n|Current Conditions:|Open all year. No reservations. No services.|\n|||\n| Reservations: | No reservations. |\n| Fees | No fee. |\n| Water: | No water. |" }); } #[test] fn test_html_from_text() { let mut html = String::new(); - let mut html_file = File::open("test-samples/real-world-1.html").unwrap(); + let mut html_file = File::open("../test-samples/real-world-1.html").unwrap(); html_file .read_to_string(&mut html) .expect("File must be readable"); let mut tag_factory: HashMap> = HashMap::new(); - let tag = Box::new(html2md::ignore::IgnoreTagFactory {}); + let tag = Box::new(html2md::scraper::ignore::IgnoreTagFactory {}); tag_factory.insert(String::from("script"), tag.clone()); tag_factory.insert(String::from("style"), tag.clone()); @@ -179,3 +180,22 @@ fn test_html_from_text() { ); assert!(!result.is_empty()); } + +#[test] +fn test_html_from_text_rewrite() { + let mut html = Box::new(String::new()); + let mut html_file = File::open("../test-samples/real-world-1.html").unwrap(); + + html_file + .read_to_string(&mut html) + .expect("File must be readable"); + + let result = html2md::rewrite_html( + &html, + // &tag_factory, + // false, + // &Some(Url::parse("https://spider.cloud").unwrap()), + ); + + assert!(!result.is_empty()); +} diff --git a/tests/lists.rs b/fast_html2md/tests/lists.rs similarity index 100% rename from tests/lists.rs rename to fast_html2md/tests/lists.rs diff --git a/tests/quotes.rs b/fast_html2md/tests/quotes.rs similarity index 100% rename from tests/quotes.rs rename to fast_html2md/tests/quotes.rs diff --git a/tests/styles.rs b/fast_html2md/tests/styles.rs similarity index 100% rename from tests/styles.rs rename to fast_html2md/tests/styles.rs diff --git a/tests/tables.rs b/fast_html2md/tests/tables.rs similarity index 100% rename from tests/tables.rs rename to fast_html2md/tests/tables.rs diff --git a/tests/unit.rs b/fast_html2md/tests/unit.rs similarity index 100% rename from tests/unit.rs rename to fast_html2md/tests/unit.rs diff --git a/test-samples/real-world-1.html b/test-samples/real-world-1.html index 82bfcd6..e974378 100644 --- a/test-samples/real-world-1.html +++ b/test-samples/real-world-1.html @@ -1,73 +1,980 @@ -Spider: The Web Crawler for AI - - - -
+
+
+

+ To help you get started with Spider, we’ll give you $200 in credits + when you spend $100. + + Terms apply + +

+
+
+
+
+
+

+ The Web Crawler for AI Agents and LLMs +

+

+ Spider offers the finest data collecting solution. Engineered for + speed and scalability, it allows you to elevate your AI projects. +

+
+
+
+ Get Started +
+
+
+
+
+
+
+
+
    +
  • + +
  • +
  • + +
  • +
+
+
+
+ Example request +
+ +
+
+ +
+ +
+
+
+
import requests, os, json
 
 headers = {
     'Authorization': os.getenv("SPIDER_API_KEY"),
@@ -87,123 +994,2446 @@
         decode_unicode=True
     ):
         data = json.loads(chunk)
-        print(data)
Free Trial

-Built with the need for Speed

-Experience the power of Spider, built fully in Rust for - next-generation scalability. -

-2.4secs

To crawl over 20,000 pages

-500-1000x

Faster than alternatives

-500x

Cheaper than traditional scraping services

Benchmarks displaying performance between Spider API request modes.
-Spider API Request Modes · Benchmarked tailwindcss.com ·

Seamless Integrations

-Seamlessly integrate Spider with a wide range of platforms, ensuring data curation - perfectly aligned with your requirements. Compatible with all major AI tools. -

Concurrent Streaming

-Save time and money without having to worry about bandwidth concerns by effectively - streaming all the results concurrently. The latency cost that is saved becomes drastic as - you crawl more websites. -

Warp Speed

-Powered by the cutting-edge Spider open-source project, our robust Rust engine scales effortlessly to handle extreme - workloads. We ensure continuous maintenance and improvement for top-tier performance. -

-Kickstart Your Data Collecting Projects Today -

-Jumpstart web crawling with full elastic scaling concurrency, optimal formats, and AI scraping. -

-Performance Tuned -

-Spider is written in Rust and runs in full concurrency to achieve crawling thousands of - pages in secs. -

-Multiple response formats -

-Get clean and formatted markdown, HTML, or text content for fine-tuning or training AI - models. -

-Caching -

-Further boost speed by caching repeated web page crawls to minimize expenses while - building. -

-Smart Mode -

-Spider dynamically switches to Headless Chrome when it needs to quick. -

Beta

-Scrape with AI -

-Do custom browser scripting and data extraction using the latest AI models with no cost - step caching. -

-The crawler for LLMs -

-Don't let crawling and scraping be the highest latency in your LLM & AI agent stack. -

-Scrape with no headaches

  • Auto Proxy rotations
  • Agent headers
  • Anti-bot detections
  • Headless chrome
  • Markdown responses

-The Fastest Web Crawler -

  • -Powered by spider-rs
  • 20,000 pages/seconds
  • Unlimited concurrency
  • Simple API
  • 50,000 RPM

-Do more with AI

  • Browser scripting
  • Advanced extraction
  • Data pipelines
  • Ideal for LLMs and AI Agents
  • Accurate labeling

-Achieve more with these new API features -

-Our API is set to stream so you can act in realtime. -

A user interface with a search bar containing the text "Latest sports news," a green "Submit" button, and two icon buttons to display searching and extracting with the service.

Search

-Get access to search engine results from anywhere and easily crawl and transform pages to - LLM-ready markdown. -

A user interface segment showing three icons representing different stages of data transformation.

Transform

-Convert raw HTML into markdown easily by using this API. Transform thousands of html pages - in seconds. -

-Join the community -

-Backed by a network of early advocates, contributors, and supporters. -

+ + + +
+
+ Free Trial +
+ +
+
+
+ +
+
+

+ Built with the need for Speed +

+

+ Experience the power of Spider, built fully in + Rust for next-generation scalability. +

+
+
+
+

+ 2.4secs +

+

+ To crawl over 20,000 pages +

+
+
+
+
+

+ 500-1000x +

+

+ Faster than alternatives +

+
+
+
+
+

+ 500x +

+

+ Cheaper than traditional scraping services +

+
+
+
+
+
+ + + Benchmarks displaying performance between Spider API request + modes. + + + + + + + + + + + + + + + + + + + +
+
+ Spider API Request Modes · Benchmarked tailwindcss.com · + +
+ +
+
+ +
+
+

+ Concurrent Streaming +

+

+ Save time and money without having to worry about bandwidth + concerns by effectively streaming all the results concurrently. + The latency cost that is saved becomes drastic as you crawl more + websites. +

+
+
+
+
+

+ Warp Speed +

+

+ Powered by the cutting-edge + Spider + open-source project, our robust Rust engine scales effortlessly + to handle extreme workloads. We ensure continuous maintenance + and improvement for top-tier performance. +

+
+
+
+
+
+

+ Kickstart Your Data Collecting Projects Today +

+

+ Jumpstart web crawling with full elastic scaling concurrency, optimal + formats, and AI scraping. +

+
+
+
+
+ + + + + + + + + + + + + + + + +
+

+ Performance Tuned +

+

+ Spider is written in Rust and runs in full concurrency to + achieve crawling thousands of pages in secs. +

+
+
+
+
+
+ + + + + + +
+

+ Multiple response formats +

+

+ Get clean and formatted markdown, HTML, or text content for + fine-tuning or training AI models. +

+
+
+
+
+
+ + + + + +
+

+ Caching +

+

+ Further boost speed by caching repeated web page crawls to + minimize expenses while building. +

+
+
+
+
+
+ + + + + + + + + + + + +
+

+ Smart Mode +

+

+ Spider dynamically switches to Headless Chrome when it needs to + quick. +

+
+
+
+
+
+ Beta +
+
+
+
+ + + + + + + + + + + + + + + +
+

+ Scrape with AI +

+

+ Do custom browser scripting and data extraction using the latest + AI models with no cost step caching. +

+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + +
+

+ The crawler for LLMs +

+

+ Don't let crawling and scraping be the highest latency in your + LLM & AI agent stack. +

+
+
+
+
+
+
+
+

+ Scrape with no headaches +

+
    +
  • Auto Proxy rotations
  • +
  • Agent headers
  • +
  • Anti-bot detections
  • +
  • Headless chrome
  • +
  • Markdown responses
  • +
+
+
+

+ The Fastest Web Crawler +

+
    +
  • + Powered by + spider-rs +
  • +
  • 20,000 pages/seconds
  • +
  • Unlimited concurrency
  • +
  • Simple API
  • +
  • 50,000 RPM
  • +
+
+
+

+ Do more with AI +

+
    +
  • Browser scripting
  • +
  • Advanced extraction
  • +
  • Data pipelines
  • +
  • Ideal for LLMs and AI Agents
  • +
  • Accurate labeling
  • +
+
+
+
+
+

+ Achieve more with these new API features +

+

+ Our API is set to stream so you can act in realtime. +

+
+
+ A user interface with a search bar containing the text "Latest sports news," a green "Submit" button, and two icon buttons to display searching and extracting with the service. +
+

+ Search +

+

+ Get access to search engine results from anywhere and easily + crawl and transform pages to LLM-ready markdown. +

+ +
+
+
+ A user interface segment showing three icons representing different stages of data transformation. +
+

+ Transform +

+

+ Convert raw HTML into markdown easily by using this API. + Transform thousands of html pages in seconds. +

+ +
+
+
+
+ +
+

+ Join the community +

+

+ Backed by a network of early advocates, contributors, and supporters. +

+ + +
+
+

+ FAQ +

+

+ Frequently asked questions about Spider. +

+
+
+ +

+ What is Spider? +

+
+
+

+ Spider is a leading web crawling tool designed for speed and + cost-effectiveness, supporting various data formats including + LLM-ready markdown. +

+
+
+
+ +

+ Why is my website not crawling? +

+
+
+

+ Your crawl may fail if it requires JavaScript rendering. Try + setting your request to 'chrome' to solve this issue. +

+
+
+
+ +

+ Can you crawl all pages? +

+
+
+

+ Yes, Spider accurately crawls all necessary content without + needing a sitemap. +

+
+
+
+ +

+ What formats can Spider convert web data into? +

+
+
+

+ Spider outputs HTML, raw, text, and various markdown formats. + It supports JSON, + JSONL, CSV, and XML for + API responses. +

+
+
+
+ +

+ Is Spider suitable for large scraping projects? +

+
+
+

+ Absolutely, Spider is ideal for large-scale data collection + and offers a cost-effective dashboard for data management. +

+
+
+
+ +

+ How can I try Spider? +

+
+
+

+ Purchase credits for our cloud system or test the Open Source + Spider engine to explore its capabilities. +

+
+
+
+ +

+ Does it respect robots.txt? +

+
+
+

+ Yes, compliance with robots.txt is default, but you can + disable this if necessary. +

+
+
+
+ +

+ Unable to get dynamic content? +

+
+
+

+ If you are having trouble getting dynamic pages, try setting + the request parameter to "chrome" or "smart." You may also + need to set `disable_intercept` to allow third-party or + external scripts to run. +

+
+
+
+ +

+ Why is my crawl going slow? +

+
+
+

+ If you are experiencing a slow crawl, it is most likely due to + the robots.txt file for the website. The robots.txt file may + have a crawl delay set, and we respect the delay up to 60 + seconds. +

+
+
+
+ +

+ Do you offer a Free Trial? +

+
+
+

+ Yes, you can try out the service before being charged for free + at + checkout. +

+
+
+
+
+
+

+ Comprehensive Data Curation for Everyone +

+

+ Trusted by leading tech businesses worldwide to deliver accurate and + insightful data solutions. +

+
+
+ + Outer Labs + + + + + + + + + + + +
+ +
+ + Elementus Logo + + + + + + + + + + + + +
+
+ + Super AI Logo + + + + + + + + + +
+
+ + LayerX Logo + + + + + + + + + + + + + + + + + + + + +
+
+ + Swiss Re + + + +
+
+ + Write Sonic Logo + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + Alioth Logo + + + + + + + + +
+
+
+
+

+ Next generation data for AI, scale to millions +

+ +
+
+
+
+ + \ No newline at end of file + elements.forEach((element) => { + observer.observe(element); + }); + + + +