From d585d8195714ebc47fe8d3a6b09daf399e5a7188 Mon Sep 17 00:00:00 2001 From: Polochon_street Date: Wed, 3 Jul 2024 22:33:50 +0200 Subject: [PATCH] Refactor playlist / library playlist making --- CHANGELOG.md | 3 + examples/library.rs | 6 +- examples/library_extra_info.rs | 6 +- examples/playlist.rs | 22 +-- src/library.rs | 329 ++++++++++++++++++++------------- src/playlist.rs | 226 ++++++++++++++-------- 6 files changed, 373 insertions(+), 219 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d16ecd0..87c4c8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # Changelog ## bliss 0.8.0 +* Remove the `number_songs` option from `Library::playlist_from_custom`. + Since it now returns an Iterator, `number_songs` can just be replaced by `take`. +* `Library::playlist_from_custom` now returns an Iterator instead of a Vec. * `Library::playlist_from_custom` now also returns the song(s) the playlist was built from. diff --git a/examples/library.rs b/examples/library.rs index 06e7ea5..e82c2a2 100644 --- a/examples/library.rs +++ b/examples/library.rs @@ -191,9 +191,9 @@ fn main() -> Result<()> { .unwrap_or("20") .parse::()?; let library: Library = Library::from_config_path(config_path)?; - let songs = library.playlist_from::<()>(&[song_path], playlist_length)?; - let song_paths = songs - .into_iter() + let song_paths = library + .playlist_from::<()>(&[song_path])? + .take(playlist_length) .map(|s| s.bliss_song.path.to_string_lossy().to_string()) .collect::>(); for song in song_paths { diff --git a/examples/library_extra_info.rs b/examples/library_extra_info.rs index 39facc7..65f855a 100644 --- a/examples/library_extra_info.rs +++ b/examples/library_extra_info.rs @@ -209,9 +209,9 @@ fn main() -> Result<()> { .unwrap_or("20") .parse::()?; let library: Library = Library::from_config_path(config_path)?; - let songs = library.playlist_from::(&[song_path], playlist_length)?; - let playlist = songs - .into_iter() + let playlist = library + .playlist_from::(&[song_path])? + .take(playlist_length) .map(|s| { ( s.bliss_song.path.to_string_lossy().to_string(), diff --git a/examples/playlist.rs b/examples/playlist.rs index e141783..7eaad64 100644 --- a/examples/playlist.rs +++ b/examples/playlist.rs @@ -73,21 +73,21 @@ fn main() -> Result<()> { Err(e) => println!("error analyzing {}: {}", path.display(), e), }; } - analyzed_songs.extend_from_slice(&songs); let serialized = serde_json::to_string(&analyzed_songs).unwrap(); - let mut songs_to_chose_from: Vec<_> = analyzed_songs + fs::write(analysis_path, serialized)?; + + analyzed_songs.extend_from_slice(&songs); + let songs_to_chose_from: Vec<_> = analyzed_songs .into_iter() .filter(|x| x == &first_song || paths.contains(&x.path.to_string_lossy().to_string())) .collect(); - closest_to_songs(&[first_song], &mut songs_to_chose_from, &euclidean_distance); - dedup_playlist(&mut songs_to_chose_from, None); - - fs::write(analysis_path, serialized)?; - let playlist = songs_to_chose_from - .iter() - .map(|s| s.path.to_string_lossy().to_string()) - .collect::>() - .join("\n"); + let playlist = dedup_playlist( + closest_to_songs(&[first_song], &songs_to_chose_from, &euclidean_distance), + None, + ) + .map(|s| s.path.to_string_lossy().to_string()) + .collect::>() + .join("\n"); if let Some(m) = matches.value_of("output-playlist") { fs::write(m, playlist)?; } else { diff --git a/src/library.rs b/src/library.rs index 0b8eece..1499b9b 100644 --- a/src/library.rs +++ b/src/library.rs @@ -91,7 +91,8 @@ //! utilies such as [Library::analyze_paths], which analyzes all songs //! in given paths and stores it in the databases, as well as //! [Library::playlist_from], which allows you to generate a playlist -//! from any given analyzed song. +//! from any given analyzed song(s), and [Library::playlist_from_custom], +//! which allows you to customize the way you generate playlists. //! //! The [Library] structure also comes with a [LibrarySong] song struct, //! which represents a song stored in the database. @@ -338,7 +339,7 @@ pub struct Library { /// ``` /// is totally possible. #[derive(Debug, PartialEq, Clone)] -pub struct LibrarySong { +pub struct LibrarySong { /// Actual bliss song, containing the song's metadata, as well /// as the bliss analysis. pub bliss_song: Song, @@ -346,7 +347,7 @@ pub struct LibrarySong { pub extra_info: T, } -impl AsRef for LibrarySong { +impl AsRef for LibrarySong { fn as_ref(&self) -> &Song { &self.bliss_song } @@ -502,52 +503,57 @@ impl Library { /// /// Generating a playlist from a single song is also possible, and is just the special case /// where song_paths is a slice of length 1. - pub fn playlist_from( + pub fn playlist_from( &self, song_paths: &[&str], - playlist_length: usize, - ) -> Result>> { - self.playlist_from_custom( - song_paths, - playlist_length, - &euclidean_distance, - &mut closest_to_songs, - true, - ) + ) -> Result>> { + self.playlist_from_custom(song_paths, &euclidean_distance, closest_to_songs, true) } /// Build a playlist of `playlist_length` items from a set of already analyzed - /// song in the library at `initial_songs`, using distance metric `distance`, - /// the sorting function `sort_by` and deduplicating if `dedup` is set to - /// `true`. - /// Note: The playlist includes the songs specified in `initial_songs`, so `playlist_length` - /// has to be higher or equal than the length of `initial_songs`. + /// song(s) in the library `initial_song_paths`, using distance metric `distance`, + /// the sorting function `sort_by`. + /// Note: The resulting playlist includes the songs specified in `initial_song_paths`. /// /// You can use ready-to-use distance metrics such as - /// [ExtendedIsolationForest](extended_isolation_forest::Forest), and ready-to-use - /// sorting functions like [closest_to_songs]. + /// [ExtendedIsolationForest](extended_isolation_forest::Forest) or [euclidean_distance], + /// and ready-to-use sorting functions like [closest_to_songs]. + /// + /// If you want to use the sorting functions in a uniform manner, you can do something like + /// this: + /// ``` + /// use bliss_audio::library::LibrarySong; + /// use bliss_audio::playlist::{closest_to_songs, song_to_song}; + /// + /// // The user would be choosing this + /// let use_closest_to_songs = true; + /// let sort = |x: &[LibrarySong<()>], + /// y: &[LibrarySong<()>], + /// z| + /// -> Box>> { + /// match use_closest_to_songs { + /// false => Box::new(closest_to_songs(x, y, z)), + /// true => Box::new(song_to_song(x, y, z)), + /// } + /// }; + /// ``` + /// and use `playlist_from_custom` with that sort as `sort_by`. /// /// Generating a playlist from a single song is also possible, and is just the special case - /// where song_paths is a slice of length 1. - // TODO: making a playlist with the entire list of songs and then truncating is not - // really the best approach - maybe sort_by should take a number of songs an Option? - // Or maybe make `sort_by` return an iterator over songs? Something something a struct - // with an internal state etc - pub fn playlist_from_custom< - T: Serialize + DeserializeOwned, - F: FnMut(&[LibrarySong], &mut [LibrarySong], &dyn DistanceMetricBuilder), - >( + /// where song_paths is a slice with a single song. + pub fn playlist_from_custom<'a, T, F, I>( &self, - initial_songs: &[&str], - playlist_length: usize, - distance: &dyn DistanceMetricBuilder, - sort_by: &mut F, + initial_song_paths: &[&str], + distance: &'a dyn DistanceMetricBuilder, + sort_by: F, dedup: bool, - ) -> Result>> { - if playlist_length <= initial_songs.len() { - bail!("A playlist length less than the initial playlist size was provided.") - } - let mut playlist: Vec> = initial_songs + ) -> Result> + 'a> + where + T: Serialize + DeserializeOwned + Clone + 'a, + F: Fn(&[LibrarySong], &[LibrarySong], &'a dyn DistanceMetricBuilder) -> I, + I: Iterator> + 'a, + { + let initial_songs: Vec> = initial_song_paths .iter() .map(|s| { self.song_from_path(s).map_err(|_| { @@ -555,22 +561,23 @@ impl Library { }) }) .collect::, BlissError>>()?; - // Remove the songs that are already in the playlist, so they don't get + // Remove the initial songs, so they don't get // sorted in the mess. - let mut songs = self + let songs = self .songs_from_library()? .into_iter() - .filter(|s| !initial_songs.contains(&&*s.bliss_song.path.to_string_lossy().to_string())) + .filter(|s| { + !initial_song_paths.contains(&&*s.bliss_song.path.to_string_lossy().to_string()) + }) .collect::>(); - sort_by(&playlist, &mut songs, distance); + + let iterator = sort_by(&initial_songs, &songs, distance); + let mut iterator: Box>> = + Box::new(initial_songs.into_iter().chain(iterator)); if dedup { - dedup_playlist_custom_distance(&mut songs, None, distance); + iterator = Box::new(dedup_playlist_custom_distance(iterator, None, distance)); } - songs.truncate(playlist_length - playlist.len()); - // We're reallocating a whole vector here, there must better ways to do what we want to - // do. - playlist.append(&mut songs); - Ok(playlist) + Ok(iterator) } /// Make a playlist of `number_albums` albums closest to the album @@ -644,7 +651,7 @@ impl Library { /// use it, because you only pass the new paths that need to be analyzed to /// this function, make sure to delete yourself from the database the songs /// that have been deleted from storage. - pub fn update_library_extra_info>( + pub fn update_library_extra_info>( &mut self, paths_extra_info: Vec<(P, T)>, delete_everything_else: bool, @@ -683,7 +690,7 @@ impl Library { /// `convert_extra_info` is a function that you should specify how /// to convert that extra info to something serializable. pub fn update_library_convert_extra_info< - T: Serialize + DeserializeOwned, + T: Serialize + DeserializeOwned + Clone, U, P: Into, >( @@ -771,7 +778,7 @@ impl Library { /// CUE track names: passing `vec![file.cue]` will add /// individual tracks with the `cue_info` field set in the database. pub fn analyze_paths_extra_info< - T: Serialize + DeserializeOwned + std::fmt::Debug, + T: Serialize + DeserializeOwned + std::fmt::Debug + Clone, P: Into, >( &mut self, @@ -805,7 +812,7 @@ impl Library { /// `convert_extra_info` is a function that you should specify /// to convert that extra info to something serializable. pub fn analyze_paths_convert_extra_info< - T: Serialize + DeserializeOwned, + T: Serialize + DeserializeOwned + Clone, U, P: Into, >( @@ -923,7 +930,7 @@ impl Library { // Get songs from a songs / features statement. // BEWARE that the two songs and features query MUST be the same - fn _songs_from_statement( + fn _songs_from_statement( &self, songs_statement: &str, features_statement: &str, @@ -983,7 +990,7 @@ impl Library { /// // TODO maybe the error should make the song id / song path // accessible easily? - pub fn songs_from_library( + pub fn songs_from_library( &self, ) -> Result>> { let songs_statement = " @@ -1006,7 +1013,7 @@ impl Library { /// /// This will return all songs with corresponding bliss "album" tag, /// and will order them by track number. - pub fn songs_from_album( + pub fn songs_from_album( &self, album_title: &str, ) -> Result>> { @@ -1039,7 +1046,7 @@ impl Library { /// Get a LibrarySong from a given file path. /// TODO pathbuf here too - pub fn song_from_path( + pub fn song_from_path( &self, song_path: &str, ) -> Result> { @@ -1085,7 +1092,7 @@ impl Library { Ok(song) } - fn _song_from_row_closure( + fn _song_from_row_closure( row: &Row, ) -> Result, RusqliteError> { let path: String = row.get(0)?; @@ -1158,7 +1165,7 @@ impl Library { /// Store a [Song] in the database, overidding any existing /// song with the same path by that one. // TODO to_str() returns an option; return early and avoid panicking - pub fn store_song( + pub fn store_song( &mut self, library_song: &LibrarySong, ) -> Result<(), BlissError> { @@ -1393,8 +1400,11 @@ mod test { // Returning the TempDir here, so it doesn't go out of scope, removing // the directory. // - // Setup a test library made of 3 analyzed songs, with every field being different, + // Setup a test library made of analyzed songs, with every field being different, // as well as an unanalyzed song and a song analyzed with a previous version. + // + // TODO the SQL database should be populated with the actual songs created here using + // format strings #[cfg(feature = "ffmpeg")] fn setup_test_library() -> ( Library, @@ -1407,6 +1417,7 @@ mod test { LibrarySong, LibrarySong, LibrarySong, + LibrarySong, ), ) { let config_dir = TempDir::new("coucou").unwrap(); @@ -1446,12 +1457,12 @@ mod test { metadata_bliss_does_not_have: String::from("/path/to/charlie1001"), }, }; + let analysis_vector: [f32; NUMBER_FEATURES] = (0..NUMBER_FEATURES) .map(|x| x as f32 + 10.) .collect::>() .try_into() .unwrap(); - let song = Song { path: "/path/to/song2001".into(), artist: Some("Artist2001".into()), @@ -1475,6 +1486,34 @@ mod test { }, }; + let analysis_vector: [f32; NUMBER_FEATURES] = (0..NUMBER_FEATURES) + .map(|x| x as f32 + 10.) + .collect::>() + .try_into() + .unwrap(); + let song = Song { + path: "/path/to/song2201".into(), + artist: Some("Artist2001".into()), + title: Some("Title2001".into()), + album: Some("Remixes of Album2001".into()), + album_artist: Some("An Album Artist2001".into()), + track_number: Some("02".into()), + genre: Some("Electronica2001".into()), + analysis: Analysis { + internal_analysis: analysis_vector, + }, + duration: Duration::from_secs(410), + features_version: 1, + cue_info: None, + }; + let second_song_dupe = LibrarySong { + bliss_song: song, + extra_info: ExtraInfo { + ignore: false, + metadata_bliss_does_not_have: String::from("/path/to/charlie2201"), + }, + }; + let analysis_vector: [f32; NUMBER_FEATURES] = (0..NUMBER_FEATURES) .map(|x| x as f32 / 2.) .collect::>() @@ -1644,6 +1683,12 @@ mod test { 1, '{\"ignore\": false, \"metadata_bliss_does_not_have\": \"/path/to/charlie2001\"}', null, null ), + ( + 2201, '/path/to/song2201', 'Artist2001', 'Title2001', 'Remixes of Album2001', + 'An Album Artist2001', '02', 'Electronica2001', 410, true, + 1, '{\"ignore\": false, \"metadata_bliss_does_not_have\": + \"/path/to/charlie2201\"}', null, null + ), ( 3001, '/path/to/song3001', null, null, null, null, null, null, null, false, 1, '{}', null, null @@ -1717,7 +1762,8 @@ mod test { (6001, ?6, ?1), (7001, ?7, ?1), (7002, ?8, ?1), - (7003, ?9, ?1); + (7003, ?9, ?1), + (2201, ?10, ?1); ", params![ index, @@ -1729,6 +1775,7 @@ mod test { index as f32 * 50., index as f32 * 100., index as f32 * 101., + index as f32 + 10., ], ) .unwrap(); @@ -1754,6 +1801,7 @@ mod test { ( first_song, second_song, + second_song_dupe, third_song, fourth_song, fifth_song, @@ -1926,10 +1974,6 @@ mod test { } } - fn first_factor_divided_by_30_distance(a: &Array1, b: &Array1) -> f32 { - ((a[1] - b[1]).abs() / 30.).floor() - } - fn first_factor_distance(a: &Array1, b: &Array1) -> f32 { (a[1] - b[1]).abs() } @@ -1939,25 +1983,18 @@ mod test { fn test_library_playlist_song_not_existing() { let (library, _temp_dir, _) = setup_test_library(); assert!(library - .playlist_from::(&["not-existing"], 2) + .playlist_from::(&["not-existing"]) .is_err()); } - #[test] - #[cfg(feature = "ffmpeg")] - fn test_library_playlist_crop() { - let (library, _temp_dir, _) = setup_test_library(); - let songs: Vec> = - library.playlist_from(&["/path/to/song2001"], 2).unwrap(); - assert_eq!(2, songs.len()); - } - #[test] #[cfg(feature = "ffmpeg")] fn test_library_simple_playlist() { let (library, _temp_dir, _) = setup_test_library(); - let songs: Vec> = - library.playlist_from(&["/path/to/song2001"], 20).unwrap(); + let songs: Vec> = library + .playlist_from(&["/path/to/song2001"]) + .unwrap() + .collect(); assert_eq!( vec![ "/path/to/song2001", @@ -1977,15 +2014,27 @@ mod test { #[test] #[cfg(feature = "ffmpeg")] - fn test_library_playlist_length() { + fn test_library_playlist_dupe_order_preserved() { let (library, _temp_dir, _) = setup_test_library(); - let songs: Vec> = - library.playlist_from(&["/path/to/song2001"], 3).unwrap(); + let songs: Vec> = library + .playlist_from_custom( + &["/path/to/song2201"], + &euclidean_distance, + closest_to_songs, + false, + ) + .unwrap() + .collect(); assert_eq!( vec![ + "/path/to/song2201", "/path/to/song2001", "/path/to/song6001", "/path/to/song5001", + "/path/to/song1001", + "/path/to/song7001", + "/path/to/cuetrack.cue/CUE_TRACK001", + "/path/to/cuetrack.cue/CUE_TRACK002", ], songs .into_iter() @@ -1994,19 +2043,44 @@ mod test { ) } + fn first_factor_divided_by_30_distance(a: &Array1, b: &Array1) -> f32 { + ((a[1] - b[1]).abs() / 30.).floor() + } + #[test] #[cfg(feature = "ffmpeg")] - fn test_library_custom_playlist_distance() { + fn test_library_playlist_deduplication() { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = library .playlist_from_custom( &["/path/to/song2001"], - 20, + &first_factor_divided_by_30_distance, + closest_to_songs, + true, + ) + .unwrap() + .collect(); + assert_eq!( + vec![ + "/path/to/song2001", + "/path/to/song7001", + "/path/to/cuetrack.cue/CUE_TRACK001", + ], + songs + .into_iter() + .map(|s| s.bliss_song.path.to_string_lossy().to_string()) + .collect::>(), + ); + + let songs: Vec> = library + .playlist_from_custom( + &["/path/to/song2001"], &first_factor_distance, - &mut closest_to_songs, + &closest_to_songs, true, ) - .unwrap(); + .unwrap() + .collect(); assert_eq!( vec![ "/path/to/song2001", @@ -2024,36 +2098,21 @@ mod test { ) } - fn custom_sort( - _: &[LibrarySong], - songs: &mut [LibrarySong], - _distance: &dyn DistanceMetricBuilder, - ) { - songs.sort_by(|s1, s2| s1.bliss_song.path.cmp(&s2.bliss_song.path)); - } - #[test] #[cfg(feature = "ffmpeg")] - fn test_library_custom_playlist_sort() { + fn test_library_playlist_take() { let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = library - .playlist_from_custom( - &["/path/to/song2001"], - 20, - &euclidean_distance, - &mut custom_sort, - true, - ) - .unwrap(); + .playlist_from(&["/path/to/song2001"]) + .unwrap() + .take(4) + .collect(); assert_eq!( vec![ "/path/to/song2001", - "/path/to/cuetrack.cue/CUE_TRACK001", - "/path/to/cuetrack.cue/CUE_TRACK002", - "/path/to/song1001", - "/path/to/song5001", "/path/to/song6001", - "/path/to/song7001", + "/path/to/song5001", + "/path/to/song1001", ], songs .into_iter() @@ -2064,49 +2123,68 @@ mod test { #[test] #[cfg(feature = "ffmpeg")] - fn test_library_custom_playlist_dedup() { + fn test_library_custom_playlist_distance() { let (library, _temp_dir, _) = setup_test_library(); - let songs: Vec> = library .playlist_from_custom( &["/path/to/song2001"], - 20, - &first_factor_divided_by_30_distance, - &mut closest_to_songs, - true, + &first_factor_distance, + closest_to_songs, + false, ) - .unwrap(); + .unwrap() + .collect(); assert_eq!( vec![ "/path/to/song2001", + "/path/to/song2201", + "/path/to/song6001", + "/path/to/song5001", "/path/to/song1001", "/path/to/song7001", - "/path/to/cuetrack.cue/CUE_TRACK001" + "/path/to/cuetrack.cue/CUE_TRACK001", + "/path/to/cuetrack.cue/CUE_TRACK002", ], songs .into_iter() .map(|s| s.bliss_song.path.to_string_lossy().to_string()) .collect::>(), - ); + ) + } + fn custom_sort( + _: &[LibrarySong], + songs: &[LibrarySong], + _distance: &dyn DistanceMetricBuilder, + ) -> impl Iterator> { + let mut songs = songs.to_vec(); + songs.sort_by(|s1, s2| s1.bliss_song.path.cmp(&s2.bliss_song.path)); + songs.to_vec().into_iter() + } + + #[test] + #[cfg(feature = "ffmpeg")] + fn test_library_custom_playlist_sort() { + let (library, _temp_dir, _) = setup_test_library(); let songs: Vec> = library .playlist_from_custom( &["/path/to/song2001"], - 20, - &first_factor_distance, - &mut closest_to_songs, + &euclidean_distance, + custom_sort, false, ) - .unwrap(); + .unwrap() + .collect(); assert_eq!( vec![ "/path/to/song2001", - "/path/to/song6001", - "/path/to/song5001", - "/path/to/song1001", - "/path/to/song7001", "/path/to/cuetrack.cue/CUE_TRACK001", "/path/to/cuetrack.cue/CUE_TRACK002", + "/path/to/song1001", + "/path/to/song2201", + "/path/to/song5001", + "/path/to/song6001", + "/path/to/song7001", ], songs .into_iter() @@ -2130,6 +2208,8 @@ mod test { // Second album, well ordered. "/path/to/song6001".to_string(), "/path/to/song2001".to_string(), + // Seecond album, remixes + "/path/to/song2201".to_string(), // Third album. "/path/to/song7001".to_string(), // Fourth album. @@ -3011,7 +3091,7 @@ mod test { let (library, _temp_dir, expected_library_songs) = setup_test_library(); let library_songs = library.songs_from_library::().unwrap(); - assert_eq!(library_songs.len(), 7); + assert_eq!(library_songs.len(), 8); assert_eq!( expected_library_songs, ( @@ -3022,6 +3102,7 @@ mod test { library_songs[4].to_owned(), library_songs[5].to_owned(), library_songs[6].to_owned(), + library_songs[7].to_owned(), ) ); } diff --git a/src/playlist.rs b/src/playlist.rs index b0f1c88..9ad1322 100644 --- a/src/playlist.rs +++ b/src/playlist.rs @@ -109,24 +109,52 @@ impl DistanceMetric for Forest { } } -/// Sort `candidate_songs` in place by putting songs close to `selected_songs` first -/// using the `distance` metric. -pub fn closest_to_songs>( - selected_songs: &[T], - candidate_songs: &mut [T], - metric_builder: &dyn DistanceMetricBuilder, -) { - let selected_songs = selected_songs +/// Return a playlist made of songs as close as possible as `selected_songs` from +/// the pool of songs in `candidate_songs`, using the `distance` metric to quantify +/// the distance between songs. +pub fn closest_to_songs<'a, T: AsRef + Clone + 'a>( + initial_songs: &[T], + candidate_songs: &[T], + metric_builder: &'a dyn DistanceMetricBuilder, +) -> impl Iterator + 'a { + let initial_songs = initial_songs .iter() .map(|c| c.as_ref().analysis.as_arr1()) .collect::>(); - let metric = metric_builder.build(&selected_songs); + let metric = metric_builder.build(&initial_songs); + let mut candidate_songs = candidate_songs.to_vec(); candidate_songs .sort_by_cached_key(|song| n32(metric.distance(&song.as_ref().analysis.as_arr1()))); + candidate_songs.into_iter() } -/// Sort `songs` in place using the `distance` metric and ordering by -/// the smallest distance between each song. +struct SongToSongIterator<'a, T: AsRef + Clone> { + pool: Vec, + vectors: Vec>, + metric_builder: &'a dyn DistanceMetricBuilder, +} + +impl<'a, T: AsRef + Clone> Iterator for SongToSongIterator<'a, T> { + type Item = T; + + fn next(&mut self) -> Option { + if self.pool.is_empty() { + return None; + } + let metric = self.metric_builder.build(&self.vectors); + let distances: Array1 = Array::from_shape_fn(self.pool.len(), |j| { + metric.distance(&self.pool[j].as_ref().analysis.as_arr1()) + }); + let idx = distances.argmin().unwrap(); + self.vectors.clear(); + let song = self.pool.remove(idx); + self.vectors.push(song.as_ref().analysis.as_arr1()); + Some(song) + } +} + +/// Return an iterator of sorted songs from `candidate_songs` using +/// the `distance` metric and ordering by the smallest distance between each song. /// /// If the generated playlist is `[song1, song2, song3, song4]`, it means /// song2 is closest to song1, song3 is closest to song2, and song4 is closest @@ -134,29 +162,22 @@ pub fn closest_to_songs>( /// /// Note that this has a tendency to go from one style to the other very fast, /// and it can be slow on big libraries. -pub fn song_to_song>( - from: &[T], - songs: &mut [T], - metric_builder: &dyn DistanceMetricBuilder, -) { - let mut vectors = from +pub fn song_to_song<'a, T: AsRef + Clone + 'a>( + initial_songs: &[T], + candidate_songs: &[T], + metric_builder: &'a dyn DistanceMetricBuilder, +) -> impl Iterator + 'a { + let vectors = initial_songs .iter() .map(|s| s.as_ref().analysis.as_arr1()) .collect::>(); - - for i in 0..songs.len() { - { - let metric = metric_builder.build(&vectors); - let remaining_songs = &songs[i..]; - let distances: Array1 = Array::from_shape_fn(remaining_songs.len(), |j| { - metric.distance(&remaining_songs[j].as_ref().analysis.as_arr1()) - }); - let idx = distances.argmin().unwrap(); - songs.swap(idx + i, i); - } - vectors.clear(); - vectors.push(songs[i].as_ref().analysis.as_arr1()); - } + let pool = candidate_songs.to_vec(); + let iterator = SongToSongIterator { + vectors, + metric_builder, + pool, + }; + iterator.into_iter() } /// Remove duplicate songs from a playlist, in place. @@ -170,8 +191,11 @@ pub fn song_to_song>( /// * `songs`: The playlist to remove duplicates from. /// * `distance_threshold`: The distance threshold under which two songs are /// considered identical. If `None`, a default value of 0.05 will be used. -pub fn dedup_playlist>(songs: &mut Vec, distance_threshold: Option) { - dedup_playlist_custom_distance(songs, distance_threshold, &euclidean_distance); +pub fn dedup_playlist<'a, T: AsRef>( + playlist: impl Iterator + 'a, + distance_threshold: Option, +) -> impl Iterator + 'a { + dedup_playlist_custom_distance(playlist, distance_threshold, &euclidean_distance) } /// Remove duplicate songs from a playlist, in place, using a custom distance @@ -187,24 +211,44 @@ pub fn dedup_playlist>(songs: &mut Vec, distance_threshold: Op /// * `distance_threshold`: The distance threshold under which two songs are /// considered identical. If `None`, a default value of 0.05 will be used. /// * `distance`: A custom distance metric. -pub fn dedup_playlist_custom_distance>( - songs: &mut Vec, +pub fn dedup_playlist_custom_distance<'a, T: AsRef>( + playlist: impl Iterator + 'a, distance_threshold: Option, - metric_builder: &dyn DistanceMetricBuilder, -) { - songs.dedup_by(|s1, s2| { - let s1 = s1.as_ref(); - let s2 = s2.as_ref(); - let vector = [s1.analysis.as_arr1()]; - let metric = metric_builder.build(&vector); - n32(metric.distance(&s2.analysis.as_arr1())) < distance_threshold.unwrap_or(0.05) - || (s1.title.is_some() - && s2.title.is_some() - && s1.artist.is_some() - && s2.artist.is_some() - && s1.title == s2.title - && s1.artist == s2.artist) + metric_builder: &'a dyn DistanceMetricBuilder, +) -> impl Iterator + 'a { + let mut peekable = playlist.peekable(); + let final_iterator = std::iter::from_fn(move || { + let maybe_s1 = peekable.next(); + if let Some(s1) = maybe_s1 { + loop { + let maybe_s2 = peekable.peek(); + if let Some(s2) = maybe_s2 { + let s1_ref = s1.as_ref(); + let s2_ref = s2.as_ref(); + let vector = [s1_ref.analysis.as_arr1()]; + let metric = metric_builder.build(&vector); + let is_same = n32(metric.distance(&s2_ref.analysis.as_arr1())) + < distance_threshold.unwrap_or(0.05) + || (s1_ref.title.is_some() + && s2_ref.title.is_some() + && s1_ref.artist.is_some() + && s2_ref.artist.is_some() + && s1_ref.title == s2_ref.title + && s1_ref.artist == s2_ref.artist); + if is_same { + peekable.next(); + continue; + } else { + return Some(s1); + } + } else { + return Some(s1); + } + } + } + None }); + final_iterator } /// Return a list of albums in a `pool` of songs that are similar to @@ -371,7 +415,7 @@ mod test { ..Default::default() }; - let mut playlist = vec![ + let playlist = vec![ first_song.to_owned(), first_song_dupe.to_owned(), second_song.to_owned(), @@ -379,7 +423,9 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, None, &euclidean_distance); + let playlist = + dedup_playlist_custom_distance(playlist.into_iter(), None, &euclidean_distance) + .collect::>(); assert_eq!( playlist, vec![ @@ -388,7 +434,7 @@ mod test { fourth_song.to_owned(), ], ); - let mut playlist = vec![ + let playlist = vec![ first_song.to_owned(), first_song_dupe.to_owned(), second_song.to_owned(), @@ -396,9 +442,11 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, Some(20.), &euclidean_distance); + let playlist = + dedup_playlist_custom_distance(playlist.into_iter(), Some(20.), &euclidean_distance) + .collect::>(); assert_eq!(playlist, vec![first_song.to_owned()]); - let mut playlist = vec![ + let playlist = vec![ first_song.to_owned(), first_song_dupe.to_owned(), second_song.to_owned(), @@ -406,9 +454,9 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist(&mut playlist, Some(20.)); + let playlist = dedup_playlist(playlist.into_iter(), Some(20.)).collect::>(); assert_eq!(playlist, vec![first_song.to_owned()]); - let mut playlist = vec![ + let playlist = vec![ first_song.to_owned(), first_song_dupe.to_owned(), second_song.to_owned(), @@ -416,7 +464,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist(&mut playlist, None); + let playlist = dedup_playlist(playlist.into_iter(), None).collect::>(); assert_eq!( playlist, vec![ @@ -452,7 +500,7 @@ mod test { something: true, }; - let mut playlist = vec![ + let playlist = vec![ first_song.to_owned(), first_song_dupe.to_owned(), second_song.to_owned(), @@ -460,7 +508,9 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, None, &euclidean_distance); + let playlist = + dedup_playlist_custom_distance(playlist.into_iter(), None, &euclidean_distance) + .collect::>(); assert_eq!( playlist, vec![ @@ -469,7 +519,7 @@ mod test { fourth_song.to_owned(), ], ); - let mut playlist = vec![ + let playlist = vec![ first_song.to_owned(), first_song_dupe.to_owned(), second_song.to_owned(), @@ -477,9 +527,11 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist_custom_distance(&mut playlist, Some(20.), &cosine_distance); + let playlist = + dedup_playlist_custom_distance(playlist.into_iter(), Some(20.), &cosine_distance) + .collect::>(); assert_eq!(playlist, vec![first_song.to_owned()]); - let mut playlist = vec![ + let playlist = vec![ first_song.to_owned(), first_song_dupe.to_owned(), second_song.to_owned(), @@ -487,9 +539,9 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist(&mut playlist, Some(20.)); + let playlist = dedup_playlist(playlist.into_iter(), Some(20.)).collect::>(); assert_eq!(playlist, vec![first_song.to_owned()]); - let mut playlist = vec![ + let playlist = vec![ first_song.to_owned(), first_song_dupe.to_owned(), second_song.to_owned(), @@ -497,7 +549,7 @@ mod test { fourth_song.to_owned(), fifth_song.to_owned(), ]; - dedup_playlist(&mut playlist, None); + let playlist = dedup_playlist(playlist.into_iter(), None).collect::>(); assert_eq!( playlist, vec![ @@ -553,7 +605,8 @@ mod test { &second_song, &fourth_song, ]; - song_to_song(&[&first_song], &mut songs, &euclidean_distance); + let songs = + song_to_song(&[&first_song], &mut songs, &euclidean_distance).collect::>(); assert_eq!( songs, vec![ @@ -594,7 +647,8 @@ mod test { &second_song, ]; - song_to_song(&[&first_song], &mut songs, &euclidean_distance); + let songs = + song_to_song(&[&first_song], &mut songs, &euclidean_distance).collect::>(); assert_eq!( songs, @@ -654,15 +708,29 @@ mod test { ..Default::default() }; - let mut songs = [ + let songs = [ + &fifth_song, + &fourth_song, &first_song, &first_song_dupe, &second_song, &third_song, - &fourth_song, - &fifth_song, ]; - closest_to_songs(&[&first_song], &mut songs, &euclidean_distance); + let playlist: Vec<_> = + closest_to_songs(&[&first_song], &songs, &euclidean_distance).collect(); + // first_song_dupe is first because of the initial order of songs, and the fact + // that the sorting is desc, and then pop'd (same below) + assert_eq!( + playlist, + [ + &first_song, + &first_song_dupe, + &second_song, + &fifth_song, + &fourth_song, + &third_song + ], + ); let first_song = CustomSong { bliss_song: first_song, @@ -691,18 +759,19 @@ mod test { }; let mut songs = [ + &second_song, &first_song, + &fourth_song, &first_song_dupe, - &second_song, &third_song, - &fourth_song, &fifth_song, ]; - closest_to_songs(&[&first_song], &mut songs, &euclidean_distance); + let playlist: Vec<_> = + closest_to_songs(&[&first_song], &mut songs, &euclidean_distance).collect(); assert_eq!( - songs, + playlist, [ &first_song, &first_song_dupe, @@ -1164,13 +1233,14 @@ mod test { max_tree_depth: None, extension_level: 10, }; - closest_to_songs( + let playlist: Vec<_> = closest_to_songs( &mozart_piano_19.iter().collect::>(), &mut songs, &opts, - ); + ) + .collect(); for e in &kind_of_blue { - assert!(songs[songs.len() - 5..].contains(&e)); + assert!(playlist[playlist.len() - 5..].contains(&e)); } } }