diff --git a/Cargo.toml b/Cargo.toml index 8c8cd83..c932ecb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ edition = "2018" [dependencies] erasable = "1.2.1" -rustc-hash = "1.0.1" +hashbrown = "0.8.0" serde = { version = "1.0.89", optional = true, default-features = false } slice-dst = "1.4.1" smol_str = "0.1.10" diff --git a/src/green/builder.rs b/src/green/builder.rs index 35ecbc3..1381abd 100644 --- a/src/green/builder.rs +++ b/src/green/builder.rs @@ -1,4 +1,4 @@ -use rustc_hash::FxHashSet; +use hashbrown::HashMap; use crate::{ green::{GreenElement, GreenNode, GreenToken, SyntaxKind}, @@ -7,8 +7,8 @@ use crate::{ #[derive(Default, Debug)] pub struct NodeCache { - nodes: FxHashSet, - tokens: FxHashSet, + nodes: HashMap, + tokens: HashMap, } impl NodeCache { @@ -17,7 +17,7 @@ impl NodeCache { I: IntoIterator, I::IntoIter: ExactSizeIterator, { - let mut node = GreenNode::new(kind, children); + let node = GreenNode::new(kind, children); // Green nodes are fully immutable, so it's ok to deduplicate them. // This is the same optimization that Roslyn does // https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees @@ -27,21 +27,45 @@ impl NodeCache { // 17% of the memory for green nodes! // Future work: make hashing faster by avoiding rehashing of subtrees. if node.children().len() <= 3 { - match self.nodes.get(&node) { - Some(existing) => node = existing.clone(), - None => assert!(self.nodes.insert(node.clone())), - } + self.nodes.raw_entry_mut().from_key(&node).or_insert(node, ()).0.clone() + } else { + node } - node } fn token(&mut self, kind: SyntaxKind, text: SmolStr) -> GreenToken { - let mut token = GreenToken::new(kind, text); - match self.tokens.get(&token) { - Some(existing) => token = existing.clone(), - None => assert!(self.tokens.insert(token.clone())), + let token = GreenToken::new(kind, text); + self.tokens.raw_entry_mut().from_key(&token).or_insert(token, ()).0.clone() + } + + fn collect_root_nodes(&mut self) -> Vec { + // NB: `drain_filter` is `retain` but with an iterator of the removed elements. + // i.e.: elements where the predicate is FALSE are removed and iterated over. + self.nodes.drain_filter(|node, ()| node.strong_count() > 1).map(|(node, _)| node).collect() + } + + fn collect_tokens(&mut self) { + self.tokens.retain(|token, ()| token.strong_count() > 1) + } + + /// Garbage collect any elements in this cache that are only held by this cache. + pub fn gc(&mut self) { + let mut to_drop = self.collect_root_nodes(); + + while let Some(node) = to_drop.pop() { + if node.strong_count() <= 2 { + self.nodes.remove(&node); + + // queue children for (potential) removal from the cache + for child in node.children() { + if let Some(node) = child.into_node() { + to_drop.push(node.clone()); + } + } + } } - token + + self.collect_tokens(); } } diff --git a/src/green/node.rs b/src/green/node.rs index 5e2687a..7e4aa8c 100644 --- a/src/green/node.rs +++ b/src/green/node.rs @@ -68,6 +68,10 @@ impl GreenNode { let r: &SliceWithHeader<_, _> = &*self.data; r as *const _ as _ } + + pub(super) fn strong_count(&self) -> usize { + Thin::with(&self.data, |node| Arc::strong_count(node)) + } } #[derive(Debug, Clone)] diff --git a/src/green/token.rs b/src/green/token.rs index be0fefd..a4b31f5 100644 --- a/src/green/token.rs +++ b/src/green/token.rs @@ -62,6 +62,12 @@ impl GreenToken { pub fn text_len(&self) -> TextSize { TextSize::try_from(self.text().len()).unwrap() } + + pub(super) fn strong_count(&self) -> usize { + let ptr = Self::remove_tag(self.ptr); + let arc = unsafe { ManuallyDrop::new(Arc::from_raw(ptr.as_ptr())) }; + Arc::strong_count(&arc) + } } impl fmt::Debug for GreenToken { diff --git a/src/lib.rs b/src/lib.rs index 3f03fb9..d1d0f7b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,13 +23,13 @@ mod serde_impls; // these, as a custom interner might work better, but `SmolStr` is a pretty good // default. pub use smol_str::SmolStr; -pub use text_size::{TextRange, TextSize, TextLen}; +pub use text_size::{TextLen, TextRange, TextSize}; pub use crate::{ api::{ Language, SyntaxElement, SyntaxElementChildren, SyntaxNode, SyntaxNodeChildren, SyntaxToken, }, - green::{Checkpoint, Children, GreenNode, GreenNodeBuilder, GreenToken, SyntaxKind}, + green::{Checkpoint, Children, GreenNode, GreenNodeBuilder, GreenToken, NodeCache, SyntaxKind}, syntax_text::SyntaxText, utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent}, };