diff --git a/Cargo.lock b/Cargo.lock index 16aed3dc49ca0..1e7ef0cca9bc0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4741,6 +4741,8 @@ version = "0.0.0" dependencies = [ "arrayvec", "askama", + "base64", + "byteorder", "expect-test", "indexmap", "itertools 0.12.1", diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index bd0fbef998b2b..44ba2a8c0153e 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -9,6 +9,8 @@ path = "lib.rs" [dependencies] arrayvec = { version = "0.7", default-features = false } askama = { version = "0.12", default-features = false, features = ["config"] } +base64 = "0.21.7" +byteorder = "1.5" itertools = "0.12" indexmap = "2" minifier = "0.3.0" diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index 34a4a89aa7bfa..2ec22df0b434f 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -1,6 +1,7 @@ use std::collections::hash_map::Entry; use std::collections::{BTreeMap, VecDeque}; +use base64::prelude::*; use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; use rustc_middle::ty::TyCtxt; use rustc_span::def_id::DefId; @@ -21,14 +22,14 @@ use crate::html::render::{self, IndexItem, IndexItemFunctionType, RenderType, Re /// /// The `index` is a JSON-encoded list of names and other information. /// -/// The desc has newlined descriptions, split up by size into 1MiB shards. +/// The desc has newlined descriptions, split up by size into 128KiB shards. /// For example, `(4, "foo\nbar\nbaz\nquux")`. pub(crate) struct SerializedSearchIndex { pub(crate) index: String, pub(crate) desc: Vec<(usize, String)>, } -const DESC_INDEX_SHARD_LEN: usize = 1024 * 1024; +const DESC_INDEX_SHARD_LEN: usize = 128 * 1024; /// Builds the search index from the collected metadata pub(crate) fn build_index<'tcx>( @@ -342,6 +343,8 @@ pub(crate) fn build_index<'tcx>( // A list of shard lengths encoded as vlqhex. See the comment in write_vlqhex_to_string // for information on the format. descindex: String, + // A list of items with no description. This is eventually turned into a bitmap. + emptydesc: Vec, } struct Paths { @@ -456,7 +459,8 @@ pub(crate) fn build_index<'tcx>( } if item.deprecation.is_some() { - deprecated.push(index); + // bitmasks always use 1-indexing for items, with 0 as the crate itself + deprecated.push(u32::try_from(index + 1).unwrap()); } } @@ -473,9 +477,18 @@ pub(crate) fn build_index<'tcx>( crate_data.serialize_field("i", &parents)?; crate_data.serialize_field("f", &functions)?; crate_data.serialize_field("D", &self.descindex)?; - crate_data.serialize_field("c", &deprecated)?; crate_data.serialize_field("p", &paths)?; crate_data.serialize_field("b", &self.associated_item_disambiguators)?; + let mut buf = Vec::new(); + let mut strbuf = String::new(); + write_bitmap_to_bytes(&deprecated, &mut buf).unwrap(); + BASE64_STANDARD.encode_string(&buf, &mut strbuf); + crate_data.serialize_field("c", &strbuf)?; + strbuf.clear(); + buf.clear(); + write_bitmap_to_bytes(&self.emptydesc, &mut buf).unwrap(); + BASE64_STANDARD.encode_string(&buf, &mut strbuf); + crate_data.serialize_field("e", &strbuf)?; if has_aliases { crate_data.serialize_field("a", &self.aliases)?; } @@ -483,11 +496,18 @@ pub(crate) fn build_index<'tcx>( } } - let desc = { + let (emptydesc, desc) = { + let mut emptydesc = Vec::new(); let mut result = Vec::new(); let mut set = String::new(); let mut len: usize = 0; + let mut itemindex: u32 = 0; for desc in std::iter::once(&crate_doc).chain(crate_items.iter().map(|item| &item.desc)) { + if desc == "" { + emptydesc.push(itemindex); + itemindex += 1; + continue; + } if set.len() >= DESC_INDEX_SHARD_LEN { result.push((len, std::mem::replace(&mut set, String::new()))); len = 0; @@ -496,9 +516,10 @@ pub(crate) fn build_index<'tcx>( } set.push_str(&desc); len += 1; + itemindex += 1; } result.push((len, std::mem::replace(&mut set, String::new()))); - result + (emptydesc, result) }; let descindex = { @@ -509,7 +530,10 @@ pub(crate) fn build_index<'tcx>( descindex }; - assert_eq!(crate_items.len() + 1, desc.iter().map(|(len, _)| *len).sum::()); + assert_eq!( + crate_items.len() + 1, + desc.iter().map(|(len, _)| *len).sum::() + emptydesc.len() + ); // The index, which is actually used to search, is JSON // It uses `JSON.parse(..)` to actually load, since JSON @@ -523,6 +547,7 @@ pub(crate) fn build_index<'tcx>( aliases: &aliases, associated_item_disambiguators: &associated_item_disambiguators, descindex, + emptydesc, }) .expect("failed serde conversion") // All these `replace` calls are because we have to go through JS string for JSON content. @@ -571,6 +596,200 @@ pub(crate) fn write_vlqhex_to_string(n: i32, string: &mut String) { } } +// checked against roaring-rs in +// https://gitlab.com/notriddle/roaring-test +pub fn write_bitmap_to_bytes(domain: &[u32], mut out: impl std::io::Write) -> std::io::Result<()> { + // https://arxiv.org/pdf/1603.06549.pdf + let mut keys = Vec::::new(); + let mut containers = Vec::::new(); + enum Container { + /// number of ones, bits + Bits(Box<[u64; 1024]>), + /// list of entries + Array(Vec), + /// list of (start, len-1) + Run(Vec<(u16, u16)>), + } + impl Container { + fn popcount(&self) -> u32 { + match self { + Container::Bits(bits) => bits.iter().copied().map(|x| x.count_ones()).sum(), + Container::Array(array) => { + array.len().try_into().expect("array can't be bigger than 2**32") + } + Container::Run(runs) => { + runs.iter().copied().map(|(_, lenm1)| u32::from(lenm1) + 1).sum() + } + } + } + fn push(&mut self, value: u16) { + match self { + Container::Bits(bits) => bits[value as usize >> 6] |= 1 << (value & 0x3F), + Container::Array(array) => { + array.push(value); + if array.len() >= 4096 { + let array = std::mem::replace(array, Vec::new()); + *self = Container::Bits(Box::new([0; 1024])); + for value in array { + self.push(value); + } + } + } + Container::Run(runs) => { + if let Some(r) = runs.last_mut() + && r.0 + r.1 + 1 == value + { + r.1 += 1; + } else { + runs.push((value, 0)); + } + } + } + } + fn try_make_run(&mut self) -> bool { + match self { + Container::Bits(bits) => { + let mut r: u64 = 0; + for (i, chunk) in bits.iter().copied().enumerate() { + let next_chunk = + i.checked_add(1).and_then(|i| bits.get(i)).copied().unwrap_or(0); + r += !chunk & u64::from((chunk << 1).count_ones()); + r += !next_chunk & u64::from((chunk >> 63).count_ones()); + } + if (2 + 4 * r) < 8192 { + let bits = std::mem::replace(bits, Box::new([0; 1024])); + *self = Container::Run(Vec::new()); + for (i, bits) in bits.iter().copied().enumerate() { + if bits == 0 { + continue; + } + for j in 0..64 { + let value = (u16::try_from(i).unwrap() << 6) | j; + if bits & (1 << j) != 0 { + self.push(value); + } + } + } + true + } else { + false + } + } + Container::Array(array) if array.len() <= 5 => false, + Container::Array(array) => { + let mut r = 0; + let mut prev = None; + for value in array.iter().copied() { + if value.checked_sub(1) != prev { + r += 1; + } + prev = Some(value); + } + if 2 + 4 * r < 2 * array.len() + 2 { + let array = std::mem::replace(array, Vec::new()); + *self = Container::Run(Vec::new()); + for value in array { + self.push(value); + } + true + } else { + false + } + } + Container::Run(_) => true, + } + } + } + let mut key: u16; + let mut domain_iter = domain.into_iter().copied().peekable(); + let mut has_run = false; + while let Some(entry) = domain_iter.next() { + key = (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit"); + let value: u16 = (entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit"); + let mut container = Container::Array(vec![value]); + while let Some(entry) = domain_iter.peek().copied() { + let entry_key: u16 = + (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit"); + if entry_key != key { + break; + } + domain_iter.next().expect("peeking just succeeded"); + container + .push((entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit")); + } + keys.push(key); + has_run = container.try_make_run() || has_run; + containers.push(container); + } + // https://github.com/RoaringBitmap/RoaringFormatSpec + use byteorder::{WriteBytesExt, LE}; + const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; + const SERIAL_COOKIE: u32 = 12347; + const NO_OFFSET_THRESHOLD: u32 = 4; + let size: u32 = containers.len().try_into().unwrap(); + let start_offset = if has_run { + out.write_u32::(SERIAL_COOKIE | ((size - 1) << 16))?; + for set in containers.chunks(8) { + let mut b = 0; + for (i, container) in set.iter().enumerate() { + if matches!(container, &Container::Run(..)) { + b |= 1 << i; + } + } + out.write_u8(b)?; + } + if size < NO_OFFSET_THRESHOLD { + 4 + 4 * size + ((size + 7) / 8) + } else { + 4 + 8 * size + ((size + 7) / 8) + } + } else { + out.write_u32::(SERIAL_COOKIE_NO_RUNCONTAINER)?; + out.write_u32::(containers.len().try_into().unwrap())?; + 4 + 4 + 4 * size + 4 * size + }; + for (&key, container) in keys.iter().zip(&containers) { + // descriptive header + let key: u32 = key.into(); + let count: u32 = container.popcount() - 1; + out.write_u32::((count << 16) | key)?; + } + if !has_run || size >= NO_OFFSET_THRESHOLD { + // offset header + let mut starting_offset = start_offset; + for container in &containers { + out.write_u32::(starting_offset)?; + starting_offset += match container { + Container::Bits(_) => 8192u32, + Container::Array(array) => u32::try_from(array.len()).unwrap() * 2, + Container::Run(runs) => 2 + u32::try_from(runs.len()).unwrap() * 4, + }; + } + } + for container in &containers { + match container { + Container::Bits(bits) => { + for chunk in bits.iter() { + out.write_u64::(*chunk)?; + } + } + Container::Array(array) => { + for value in array.iter() { + out.write_u16::(*value)?; + } + } + Container::Run(runs) => { + out.write_u16::((runs.len()).try_into().unwrap())?; + for (start, lenm1) in runs.iter().copied() { + out.write_u16::(start)?; + out.write_u16::(lenm1)?; + } + } + } + } + Ok(()) +} + pub(crate) fn get_function_type_for_search<'tcx>( item: &clean::Item, tcx: TyCtxt<'tcx>, diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 15da5bf96b2ca..e70c5bfd734bf 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -242,6 +242,14 @@ function initSearch(rawSearchIndex) { * @type {Array} */ let searchIndex; + /** + * @type {Map} + */ + let searchIndexDeprecated; + /** + * @type {Map} + */ + let searchIndexEmptyDesc; /** * @type {Uint32Array} */ @@ -1326,7 +1334,6 @@ function initSearch(rawSearchIndex) { duplicates.add(obj.fullPath); obj.href = res[1]; - obj.desc = result.desc; out.push(obj); if (out.length >= MAX_RESULTS) { break; @@ -1353,12 +1360,6 @@ function initSearch(rawSearchIndex) { result.word = searchIndex[result.id].word; result_list.push(result); } - for (const result of result_list) { - result.desc = searchState.loadDesc(result.item); - } - for (const result of result_list) { - result.desc = await result.desc; - } result_list.sort((aaa, bbb) => { let a, b; @@ -1401,8 +1402,8 @@ function initSearch(rawSearchIndex) { } // sort deprecated items later - a = aaa.item.deprecated; - b = bbb.item.deprecated; + a = searchIndexDeprecated.get(aaa.item.crate).contains(aaa.item.bitIndex); + b = searchIndexDeprecated.get(bbb.item.crate).contains(bbb.item.bitIndex); if (a !== b) { return a - b; } @@ -1429,8 +1430,8 @@ function initSearch(rawSearchIndex) { } // sort by description (no description goes later) - a = (aaa.desc === ""); - b = (bbb.desc === ""); + a = searchIndexEmptyDesc.get(aaa.item.crate).contains(aaa.item.bitIndex); + b = searchIndexEmptyDesc.get(bbb.item.crate).contains(bbb.item.bitIndex); if (a !== b) { return a - b; } @@ -1453,7 +1454,16 @@ function initSearch(rawSearchIndex) { return 0; }); - return transformResults(result_list); + const transformed = transformResults(result_list); + for (const result of transformed) { + result.desc = searchIndexEmptyDesc.get(result.crate).contains(result.bitIndex) ? + "" : + searchState.loadDesc(result); + } + for (const result of transformed) { + result.desc = await result.desc; + } + return transformed; } /** @@ -2079,7 +2089,7 @@ function initSearch(rawSearchIndex) { parent: item.parent, type: item.type, is_alias: true, - deprecated: item.deprecated, + bitIndex: item.bitIndex, implDisambiguator: item.implDisambiguator, }; } @@ -2712,9 +2722,11 @@ ${item.displayPath}${name}\ currentResults = results.query.userQuery; - const ret_others = await addTab(results.others, results.query, true); - const ret_in_args = await addTab(results.in_args, results.query, false); - const ret_returned = await addTab(results.returned, results.query, false); + const [ret_others, ret_in_args, ret_returned] = await Promise.all([ + addTab(results.others, results.query, true), + addTab(results.in_args, results.query, false), + addTab(results.returned, results.query, false), + ]); // Navigate to the relevant tab if the current tab is empty, like in case users search // for "-> String". If they had selected another tab previously, they have to click on @@ -3267,6 +3279,123 @@ ${item.displayPath}${name}\ return result; } } + class RoaringBitmap { + constructor(str) { + const strdecoded = atob(str); + const u8array = new Uint8Array(strdecoded.length); + for (let j = 0; j < strdecoded.length; ++j) { + u8array[j] = strdecoded.charCodeAt(j); + } + const has_runs = u8array[0] === 0x3b; + const size = has_runs ? + ((u8array[2] | (u8array[3] << 8)) + 1) : + ((u8array[4] | (u8array[5] << 8) | (u8array[6] << 16) | (u8array[7] << 24))); + let i = has_runs ? 4 : 8; + let is_run; + if (has_runs) { + const is_run_len = Math.floor((size + 7) / 8); + is_run = u8array.slice(i, i + is_run_len); + i += is_run_len; + } else { + is_run = new Uint8Array(); + } + this.keys = []; + this.cardinalities = []; + for (let j = 0; j < size; ++j) { + this.keys.push(u8array[i] | (u8array[i + 1] << 8)); + i += 2; + this.cardinalities.push((u8array[i] | (u8array[i + 1] << 8)) + 1); + i += 2; + } + this.containers = []; + let offsets = null; + if (!has_runs || this.keys.length >= 4) { + offsets = []; + for (let j = 0; j < size; ++j) { + offsets.push(u8array[i] | (u8array[i + 1] << 8) | (u8array[i + 2] << 16) | + (u8array[i + 3] << 24)); + i += 4; + } + } + for (let j = 0; j < size; ++j) { + if (offsets && offsets[j] !== i) { + console.log(this.containers); + throw new Error(`corrupt bitmap ${j}: ${i} / ${offsets[j]}`); + } + if (is_run[j >> 3] & (1 << (j & 0x7))) { + const runcount = (u8array[i] | (u8array[i + 1] << 8)); + i += 2; + this.containers.push(new RoaringBitmapRun( + runcount, + u8array.slice(i, i + (runcount * 4)), + )); + i += runcount * 4; + } else if (this.cardinalities[j] >= 4096) { + this.containers.push(new RoaringBitmapBits(u8array.slice(i, i + 8192))); + i += 8192; + } else { + const end = this.cardinalities[j] * 2; + this.containers.push(new RoaringBitmapArray( + this.cardinalities[j], + u8array.slice(i, i + end), + )); + i += end; + } + } + } + contains(keyvalue) { + const key = keyvalue >> 16; + const value = keyvalue & 0xFFFF; + for (let i = 0; i < this.keys.length; ++i) { + if (this.keys[i] === key) { + return this.containers[i].contains(value); + } + } + return false; + } + } + + class RoaringBitmapRun { + constructor(runcount, array) { + this.runcount = runcount; + this.array = array; + } + contains(value) { + const l = this.runcount * 4; + for (let i = 0; i < l; i += 4) { + const start = this.array[i] | (this.array[i + 1] << 8); + const lenm1 = this.array[i + 2] | (this.array[i + 3] << 8); + if (value >= start && value <= (start + lenm1)) { + return true; + } + } + return false; + } + } + class RoaringBitmapArray { + constructor(cardinality, array) { + this.cardinality = cardinality; + this.array = array; + } + contains(value) { + const l = this.cardinality * 2; + for (let i = 0; i < l; i += 2) { + const start = this.array[i] | (this.array[i + 1] << 8); + if (value === start) { + return true; + } + } + return false; + } + } + class RoaringBitmapBits { + constructor(array) { + this.array = array; + } + contains(value) { + return !!(this.array[value >> 3] & (1 << (value & 7))); + } + } /** * Convert raw search index into in-memory search index. @@ -3275,6 +3404,8 @@ ${item.displayPath}${name}\ */ function buildIndex(rawSearchIndex) { searchIndex = []; + searchIndexDeprecated = new Map(); + searchIndexEmptyDesc = new Map(); const charA = "A".charCodeAt(0); let currentIndex = 0; let id = 0; @@ -3307,6 +3438,11 @@ ${item.displayPath}${name}\ }; const descShardList = [ descShard ]; + // Deprecated items and items with no description + searchIndexDeprecated.set(crate, new RoaringBitmap(crateCorpus.c)); + searchIndexEmptyDesc.set(crate, new RoaringBitmap(crateCorpus.e)); + let descIndex = 0; + // This object should have exactly the same set of fields as the "row" // object defined below. Your JavaScript runtime will thank you. // https://mathiasbynens.be/notes/shapes-ics @@ -3316,18 +3452,21 @@ ${item.displayPath}${name}\ name: crate, path: "", descShard, - descIndex: 0, + descIndex, parent: undefined, type: null, id, word: crate, normalizedName: crate.indexOf("_") === -1 ? crate : crate.replace(/_/g, ""), - deprecated: null, + bitIndex: 0, implDisambiguator: null, }; id += 1; searchIndex.push(crateRow); currentIndex += 1; + if (!searchIndexEmptyDesc.get(crate).contains(0)) { + descIndex += 1; + } // a String of one character item type codes const itemTypes = crateCorpus.t; @@ -3341,9 +3480,7 @@ ${item.displayPath}${name}\ const itemPaths = new Map(crateCorpus.q); // an array of (Number) the parent path index + 1 to `paths`, or 0 if none const itemParentIdxs = crateCorpus.i; - // an array of (Number) indices for the deprecated items - const deprecatedItems = new Set(crateCorpus.c); - // an array of (Number) indices for the deprecated items + // a map Number, string for impl disambiguators const implDisambiguator = new Map(crateCorpus.b); // an array of [(Number) item type, // (String) name] @@ -3388,9 +3525,10 @@ ${item.displayPath}${name}\ // faster analysis operations lastPath = ""; len = itemTypes.length; - let descIndex = 1; for (let i = 0; i < len; ++i) { - if (descIndex >= descShard.len) { + const bitIndex = i + 1; + if (descIndex >= descShard.len && + !searchIndexEmptyDesc.get(crate).contains(bitIndex)) { descShard = { crate, shard: descShard.shard + 1, @@ -3439,13 +3577,15 @@ ${item.displayPath}${name}\ id, word, normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""), - deprecated: deprecatedItems.has(i), + bitIndex, implDisambiguator: implDisambiguator.has(i) ? implDisambiguator.get(i) : null, }; id += 1; searchIndex.push(row); lastPath = row.path; - descIndex += 1; + if (!searchIndexEmptyDesc.get(crate).contains(bitIndex)) { + descIndex += 1; + } } if (aliases) { diff --git a/src/tools/rustdoc-js/tester.js b/src/tools/rustdoc-js/tester.js index 1af2f44c230d6..43a22f358c31f 100644 --- a/src/tools/rustdoc-js/tester.js +++ b/src/tools/rustdoc-js/tester.js @@ -559,8 +559,3 @@ process.on("beforeExit", () => { console.log("process did not complete"); process.exit(1); }); - -/*process.on("uncaughtException", (err) => { - console.log(`Uncaught Exception: ${err.message}`); - process.exit(1); -});*/