diff --git a/cli/src/identify.rs b/cli/src/identify.rs index 26f2fe7..6f15a39 100644 --- a/cli/src/identify.rs +++ b/cli/src/identify.rs @@ -95,7 +95,7 @@ pub fn identify_data( want_diff: bool, ) -> Result { let inst = Instant::now(); - let matched = store.analyze(&text_data)?; + let matched = store.analyze(&text_data); info!( "{:?} in {} ms", @@ -126,7 +126,7 @@ pub fn identify_data( // try again, optimizing for the current best match if optimize { let inst = Instant::now(); - let (opt, score) = text_data.optimize_bounds(matched.data)?; + let (opt, score) = text_data.optimize_bounds(matched.data); let (lower, upper) = opt.lines_view(); info!( diff --git a/src/license.rs b/src/license.rs index c30a174..d71c2fe 100644 --- a/src/license.rs +++ b/src/license.rs @@ -3,7 +3,6 @@ use std::{collections::HashMap, fmt}; -use failure::{format_err, Error}; use serde_derive::{Deserialize, Serialize}; use crate::{ @@ -69,7 +68,7 @@ impl fmt::Display for LicenseType { /// # fn main() -> Result<(), Box> { /// # let license = TextData::from("My First License"); /// let sample = TextData::from("copyright 20xx me irl\n// My First License\nfn hello() {\n ..."); -/// let (optimized, score) = sample.optimize_bounds(&license)?; +/// let (optimized, score) = sample.optimize_bounds(&license); /// assert_eq!((1, 2), optimized.lines_view()); /// assert!(score > 0.99f32, "license within text matches"); /// # Ok(()) @@ -107,11 +106,6 @@ impl TextData { } } - // impl specialization might be nice to indicate that this type - // is lacking stored text; perhaps there's another way to indicate that? - // maybe an impl on an enum variant if/when that's available: - // https://github.com/rust-lang/rfcs/pull/1450 - /// Consume this `TextData`, returning one without normalized/processed /// text stored. /// @@ -148,19 +142,16 @@ impl TextData { /// /// Other methods on `TextView` respect this boundary, so it's not needed /// outside this struct. - pub fn with_view(&self, start: usize, end: usize) -> Result { - let view = match self.lines_normalized { - Some(ref lines) => &lines[start..end], - None => return Err(format_err!("TextData does not have original text")), - }; + pub fn with_view(&self, start: usize, end: usize) -> Self { + let view = &self.lines_normalized.as_ref().expect("TextData does not have original text")[start..end]; let view_joined = view.join("\n"); let processed = apply_aggressive(&view_joined); - Ok(TextData { + TextData { match_data: NgramSet::from_str(&processed, 2), lines_view: (start, end), lines_normalized: self.lines_normalized.clone(), text_processed: Some(processed), - }) + } } /// "Erase" the current lines in view and restore the view to its original @@ -170,12 +161,12 @@ impl TextData { /// (and located) with `optimize_bounds`. Now you want to find the other: /// white-out the matched lines, and re-run the overall search to find a /// new high score. - pub fn white_out(&self) -> Result { + pub fn white_out(&self) -> Self { // note that we're not using the view here... let lines = self .lines_normalized .as_ref() - .ok_or_else(|| format_err!("TextData does not have original text"))?; + .expect("TextData does not have original text"); // ...because it's used here to exclude lines let new_normalized: Vec = lines @@ -191,22 +182,17 @@ impl TextData { .collect(); let processed = apply_aggressive(&new_normalized.join("\n")); - Ok(TextData { + TextData { match_data: NgramSet::from_str(&processed, 2), lines_view: (0, new_normalized.len()), lines_normalized: Some(new_normalized), text_processed: Some(processed), - }) + } } /// Get a slice of the normalized lines in this `TextData`. - /// - /// If the text was discarded with `without_text`, this returns `None`. - pub fn lines(&self) -> Option<&[String]> { - match self.lines_normalized { - Some(ref lines) => Some(&lines[self.lines_view.0..self.lines_view.1]), - None => None, - } + pub fn lines(&self) -> &[String] { + &self.lines_normalized.as_ref().expect("TextData does not have original text")[self.lines_view.0..self.lines_view.1] } #[doc(hidden)] @@ -239,17 +225,17 @@ impl TextData { /// /// You should check the value of `lines_view` on the returned struct to /// find the line ranges. - pub fn optimize_bounds(&self, other: &TextData) -> Result<(Self, f32), Error> { + pub fn optimize_bounds(&self, other: &TextData) -> (Self, f32) { if self.lines_normalized.is_none() { - return Err(format_err!("TextData does not have original text")); + panic!("TextData does not have original text"); } let view = self.lines_view; // optimize the ending bounds of the text match let (end_optimized, _) = self.search_optimize( - &|end| self.with_view(view.0, end).unwrap().match_score(other), - &|end| self.with_view(view.0, end).unwrap(), + &|end| self.with_view(view.0, end).match_score(other), + &|end| self.with_view(view.0, end), ); let new_end = end_optimized.lines_view.1; @@ -258,12 +244,11 @@ impl TextData { &|start| { end_optimized .with_view(start, new_end) - .unwrap() .match_score(other) }, - &|start| end_optimized.with_view(start, new_end).unwrap(), + &|start| end_optimized.with_view(start, new_end), ); - Ok((optimized, score)) + (optimized, score) } fn search_optimize( @@ -327,25 +312,25 @@ mod tests { let license = TextData::from(license_text).without_text(); let sample = TextData::from(sample_text); - let (optimized, _) = sample.optimize_bounds(&license).unwrap(); + let (optimized, _) = sample.optimize_bounds(&license); println!("{:?}", optimized.lines_view); - println!("{:?}", optimized.lines_normalized.clone().unwrap()); + println!("{:?}", optimized.lines_normalized.clone()); assert_eq!((0, 3), optimized.lines_view); // add more to the string, try again (avoid int trunc screwups) let sample_text = format!("{}\none more line", sample_text); let sample = TextData::from(sample_text.as_str()); - let (optimized, _) = sample.optimize_bounds(&license).unwrap(); + let (optimized, _) = sample.optimize_bounds(&license); println!("{:?}", optimized.lines_view); - println!("{:?}", optimized.lines_normalized.clone().unwrap()); + println!("{:?}", optimized.lines_normalized.clone()); assert_eq!((0, 3), optimized.lines_view); // add to the beginning too let sample_text = format!("some content\nat\n\nthe beginning\n{}", sample_text); let sample = TextData::from(sample_text.as_str()); - let (optimized, _) = sample.optimize_bounds(&license).unwrap(); + let (optimized, _) = sample.optimize_bounds(&license); println!("{:?}", optimized.lines_view); - println!("{:?}", optimized.lines_normalized.clone().unwrap()); + println!("{:?}", optimized.lines_normalized.clone()); // end bounds at 7 and 8 have the same score, since they're empty lines (not // counted). askalono is not smart enough to trim this as close as it // can. @@ -365,22 +350,22 @@ mod tests { let license = TextData::from(license_text).without_text(); // sanity: the optimized bounds should be at (3, 7) - let (optimized, _) = sample.optimize_bounds(&license).unwrap(); + let (optimized, _) = sample.optimize_bounds(&license); assert_eq!((3, 7), optimized.lines_view); // this should still work - let sample = sample.with_view(3, 7).unwrap(); - let (optimized, _) = sample.optimize_bounds(&license).unwrap(); + let sample = sample.with_view(3, 7); + let (optimized, _) = sample.optimize_bounds(&license); assert_eq!((3, 7), optimized.lines_view); // but if we shrink the view further, it shouldn't be outside that range - let sample = sample.with_view(4, 6).unwrap(); - let (optimized, _) = sample.optimize_bounds(&license).unwrap(); + let sample = sample.with_view(4, 6); + let (optimized, _) = sample.optimize_bounds(&license); assert_eq!((4, 6), optimized.lines_view); // restoring the view should still be OK too - let sample = sample.with_view(0, 9).unwrap(); - let (optimized, _) = sample.optimize_bounds(&license).unwrap(); + let sample = sample.with_view(0, 9); + let (optimized, _) = sample.optimize_bounds(&license); assert_eq!((3, 7), optimized.lines_view); } @@ -413,11 +398,11 @@ mod tests { let a = TextData::from("aaa\nbbb\nccc\nddd"); assert_eq!(Some("aaa bbb ccc ddd"), a.text_processed()); - let b = a.with_view(1, 3).expect("with_view must be ok"); - assert_eq!(2, b.lines().unwrap().len()); + let b = a.with_view(1, 3); + assert_eq!(2, b.lines().len()); assert_eq!(Some("bbb ccc"), b.text_processed()); - let c = b.white_out().expect("white_out must be ok"); + let c = b.white_out(); assert_eq!(Some("aaa ddd"), c.text_processed()); } } diff --git a/src/store/analyze.rs b/src/store/analyze.rs index 8d7e641..d2362ab 100644 --- a/src/store/analyze.rs +++ b/src/store/analyze.rs @@ -3,8 +3,6 @@ use std::{cmp::Ordering, fmt}; -use failure::Error; - use crate::{license::LicenseType, license::TextData, store::base::Store}; /// Information about text that was compared against licenses in the store. @@ -99,7 +97,7 @@ impl Store { /// This parallelizes the search as much as it can to find the best match. /// Once a match is obtained, it can be optimized further; see methods on /// `TextData` for more information. - pub fn analyze(&self, text: &TextData) -> Result, Error> { + pub fn analyze(&self, text: &TextData) -> Match<'_> { let mut res: Vec>; // parallel analysis @@ -135,11 +133,11 @@ impl Store { } let m = &res[0]; - Ok(Match { + Match { score: m.score, name: m.name.to_string(), license_type: m.license_type, data: m.data, - }) + } } } diff --git a/src/store/base.rs b/src/store/base.rs index 5a706d0..8b21ef9 100644 --- a/src/store/base.rs +++ b/src/store/base.rs @@ -32,7 +32,7 @@ pub(crate) struct LicenseEntry { /// /// # fn main() -> Result<(), Box> { /// let store = Store::from_cache(File::open("askalono-cache.bin.gz")?)?; -/// let result = store.analyze(&TextData::from("what's this"))?; +/// let result = store.analyze(&TextData::from("what's this")); /// # Ok(()) /// # } /// ``` diff --git a/src/strategy.rs b/src/strategy.rs index bb9bfc7..c3aa19c 100644 --- a/src/strategy.rs +++ b/src/strategy.rs @@ -194,7 +194,7 @@ impl<'a> ScanStrategy<'a> { } fn scan_elimination(&self, text: &TextData) -> Result { - let mut analysis = self.store.analyze(text)?; + let mut analysis = self.store.analyze(text); let score = analysis.score; let mut license = None; let mut containing = Vec::new(); @@ -222,8 +222,7 @@ impl<'a> ScanStrategy<'a> { let mut current_text: Cow<'_, TextData> = Cow::Borrowed(text); for _n in 0..self.max_passes { let (optimized, optimized_score) = current_text - .optimize_bounds(analysis.data) - .expect("optimize_bounds failed in elimination loop"); + .optimize_bounds(analysis.data); // stop if we didn't find anything acceptable if optimized_score < self.confidence_threshold { @@ -241,8 +240,8 @@ impl<'a> ScanStrategy<'a> { }); // and white-out + reanalyze for next iteration - current_text = Cow::Owned(optimized.white_out().expect("optimized must have text")); - analysis = self.store.analyze(¤t_text)?; + current_text = Cow::Owned(optimized.white_out()); + analysis = self.store.analyze(¤t_text); } } @@ -304,8 +303,8 @@ impl<'a> ScanStrategy<'a> { 'start: for start in (starting_at..text_end).step_by(self.step_size) { // ...and also the end of window to find high scores. for end in (start..=text_end).step_by(self.step_size) { - let view = text.with_view(start, end).expect("view missing text"); - let analysis = self.store.analyze(&view)?; + let view = text.with_view(start, end); + let analysis = self.store.analyze(&view); // just getting a feel for the data at this point, not yet // optimizing the view. @@ -346,10 +345,9 @@ impl<'a> ScanStrategy<'a> { None => return Ok(None), }; let check = matched.data; - let view = text.with_view(found.0, found.1).expect("view missing text"); + let view = text.with_view(found.0, found.1); let (optimized, optimized_score) = view - .optimize_bounds(check) - .expect("optimize_bounds failed narrowing top-down result"); + .optimize_bounds(check); trace!( "optimized {} {} at ({:?})", diff --git a/tests/introspection.rs b/tests/introspection.rs index f28cf52..b7a17c8 100644 --- a/tests/introspection.rs +++ b/tests/introspection.rs @@ -31,14 +31,14 @@ fn self_apache_header() { let mut text = String::new(); f.read_to_string(&mut text).unwrap(); let text_data: TextData = text.into(); - let matched = store.analyze(&text_data).unwrap(); + let matched = store.analyze(&text_data); // check that it looked apache-2.0-ish assert_eq!("Apache-2.0", &matched.name); assert_eq!(askalono::LicenseType::Header, matched.license_type); // now try to find the bounds of the license header - let (optimized, _) = text_data.optimize_bounds(&matched.data).unwrap(); + let (optimized, _) = text_data.optimize_bounds(&matched.data); // the license is from (0-indexed) lines 3 thru 12 of this file, excluding // that copyright statement on line 1. diff --git a/tests/real_world.rs b/tests/real_world.rs index 3550ffb..84be8f3 100644 --- a/tests/real_world.rs +++ b/tests/real_world.rs @@ -48,7 +48,7 @@ fn assert_license( f.read_to_string(&mut text).unwrap(); let text_data: TextData = text.into(); - let matched = store.analyze(&text_data).unwrap(); + let matched = store.analyze(&text_data); assert_eq!( license_id, matched.name, "{} was identified as {} but should have been {}", diff --git a/tests/store_sanity.rs b/tests/store_sanity.rs index 7560a94..b2a8246 100644 --- a/tests/store_sanity.rs +++ b/tests/store_sanity.rs @@ -30,7 +30,7 @@ fn self_licenses() { let mut text = String::new(); f.read_to_string(&mut text).unwrap(); let text_data: TextData = text.into(); - let matched = store.analyze(&text_data).unwrap(); + let matched = store.analyze(&text_data); assert_eq!(license, &matched.name); assert_eq!( @@ -46,7 +46,7 @@ fn self_licenses() { fn empty_match() { let store = common::load_store(); let text = TextData::from(""); - let matched = store.analyze(&text).unwrap(); + let matched = store.analyze(&text); assert_eq!(0f32, matched.score); } diff --git a/wasm/pkg/src/lib.rs b/wasm/pkg/src/lib.rs index 23b89e0..30bf05b 100644 --- a/wasm/pkg/src/lib.rs +++ b/wasm/pkg/src/lib.rs @@ -38,7 +38,7 @@ impl MatchResult { #[wasm_bindgen] pub fn normalize_text(text: &str) -> String { let data = TextData::new(text); - data.lines().unwrap().join("\n") + data.lines().join("\n") } #[wasm_bindgen] @@ -50,7 +50,7 @@ impl AskalonoStore { } pub fn identify(&self, text: &str) -> MatchResult { - let matched = self.store.analyze(&text.into()).unwrap(); + let matched = self.store.analyze(&text.into()); MatchResult { name: matched.name, score: matched.score,