diff --git a/src/lib.rs b/src/lib.rs index 5305182..f7a7e8a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -570,31 +570,49 @@ impl Jieba { route.clear(); } - #[allow(non_snake_case)] fn cut_internal<'a>(&self, sentence: &'a str, cut_all: bool, hmm: bool) -> Vec<&'a str> { + // This is the output buffer. let heuristic_capacity = sentence.len() / 2; let mut words = Vec::with_capacity(heuristic_capacity); + + if cut_all { + self.cut_internal2(sentence, cut_all, &mut words, |j, s, w| j.cut_all_internal(s, w)); + } else { + let mut route: Vec<(f64, usize)> = Vec::with_capacity(heuristic_capacity); + let mut dag = StaticSparseDAG::with_size_hint(heuristic_capacity); + if hmm { + // TODO: Why is this sentence.chars().count() and not sentence.len()? + let mut hmm_context = hmm::HmmContext::new(sentence.chars().count()); + self.cut_internal2(sentence, cut_all, &mut words, |j, s, w| { + j.cut_dag_hmm(s, w, &mut route, &mut dag, &mut hmm_context) + }); + } else { + self.cut_internal2(sentence, cut_all, &mut words, |j, s, w| { + j.cut_dag_no_hmm(s, w, &mut route, &mut dag) + }); + } + } + words + } + + fn cut_internal2<'a>( + &self, + sentence: &'a str, + cut_all: bool, + words: &mut Vec<&'a str>, + mut cut_strategy: impl FnMut(&Self, &'a str, &mut Vec<&'a str>), + ) { + // This is for this algorithm's selector. let re_han: &Regex = if cut_all { &RE_HAN_CUT_ALL } else { &RE_HAN_DEFAULT }; let re_skip: &Regex = if cut_all { &RE_SKIP_CUT_ALL } else { &RE_SKIP_DEFAULT }; let splitter = SplitMatches::new(re_han, sentence); - let mut route = Vec::with_capacity(heuristic_capacity); - let mut dag = StaticSparseDAG::with_size_hint(heuristic_capacity); - - let mut hmm_context = hmm::HmmContext::new(sentence.chars().count()); for state in splitter { match state { SplitState::Matched(_) => { let block = state.into_str(); assert!(!block.is_empty()); - - if cut_all { - self.cut_all_internal(block, &mut words); - } else if hmm { - self.cut_dag_hmm(block, &mut words, &mut route, &mut dag, &mut hmm_context); - } else { - self.cut_dag_no_hmm(block, &mut words, &mut route, &mut dag); - } + cut_strategy(self, block, words) } SplitState::Unmatched(_) => { let block = state.into_str(); @@ -622,7 +640,6 @@ impl Jieba { } } } - words } /// Cut the input text