Skip to content

Commit

Permalink
feat: trying msa with balanced partition
Browse files Browse the repository at this point in the history
  • Loading branch information
nishaq503 committed Nov 30, 2024
1 parent 36cd7ed commit 6b11ed8
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 20 deletions.
14 changes: 11 additions & 3 deletions crates/results/msa/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ struct Args {
#[arg(short('p'), long)]
pre_aligned: bool,

/// Whether to use a balanced partition.
#[arg(short('b'), long)]
balanced: bool,

/// The number of samples to use for the dataset.
#[arg(short('n'), long)]
num_samples: Option<usize>,
Expand Down Expand Up @@ -115,20 +119,24 @@ fn main() -> Result<(), String> {

let (off_ball, perm_data) = if msa_ball_path.exists() && msa_data_path.exists() {
// Read the Offset Ball and the dataset.
steps::read_offset_ball(&msa_ball_path, &msa_data_path)?
steps::read_permuted_ball(&msa_ball_path, &msa_data_path)?
} else {
let ball_path = path_manager.ball_path();
let ball = if ball_path.exists() {
// Read the Ball.
steps::read_ball(&ball_path)?
} else {
// Build the Ball.
steps::build_ball(&data, &metric, &ball_path, &path_manager.ball_csv_path())?
if args.balanced {
steps::build_balanced_ball(&data, &metric, &ball_path, &path_manager.ball_csv_path())?
} else {
steps::build_ball(&data, &metric, &ball_path, &path_manager.ball_csv_path())?
}
};
ftlog::info!("Ball has {} leaves.", ball.leaves().len());

// Build the Offset Ball and the dataset.
steps::build_offset_ball(ball, data, &metric, &msa_ball_path, &msa_data_path)?
steps::build_perm_ball(ball, data, &metric, &msa_ball_path, &msa_data_path)?
};
ftlog::info!("Offset Ball has {} leaves.", off_ball.leaves().len());

Expand Down
60 changes: 43 additions & 17 deletions crates/results/msa/src/steps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::path::Path;

use abd_clam::{
cakes::PermutedBall,
cluster::{adapter::ParBallAdapter, ClusterIO, Csv, ParPartition},
cluster::{adapter::ParBallAdapter, BalancedBall, ClusterIO, Csv, ParPartition},
dataset::{AssociatesMetadata, AssociatesMetadataMut, DatasetIO},
metric::ParMetric,
msa,
Expand Down Expand Up @@ -61,37 +61,35 @@ pub fn read_aligned<P: AsRef<Path>>(path: P) -> Result<Fv, String> {
.with_metadata(&metadata)
}

/// Build the Offset Ball and the dataset.
pub fn build_offset_ball<P: AsRef<Path>, M: ParMetric<Sequence, i32>>(
/// Build the `PermutedBall` and the permuted dataset.
pub fn build_perm_ball<P: AsRef<Path>, M: ParMetric<Sequence, i32>>(
ball: B<i32>,
data: Fv,
metric: &M,
ball_path: &P,
data_path: &P,
) -> Result<(Ob<i32>, Fv), String> {
ftlog::info!("Building Offset Ball and permuted dataset.");
let (off_ball, data) = PermutedBall::par_from_ball_tree(ball, data, metric);
ftlog::info!("Building PermutedBall and permuted dataset.");
let (ball, data) = PermutedBall::par_from_ball_tree(ball, data, metric);

ftlog::info!("Writing MSA ball to {:?}", ball_path.as_ref());
off_ball.write_to(ball_path)?;
ftlog::info!("Writing PermutedBall to {:?}", ball_path.as_ref());
ball.write_to(ball_path)?;

ftlog::info!("Writing MSA data to {:?}", data_path.as_ref());
ftlog::info!("Writing PermutedData to {:?}", data_path.as_ref());
data.write_to(data_path)?;

Ok((off_ball, data))
Ok((ball, data))
}

/// Read the Offset Ball and the dataset from disk.
pub fn read_offset_ball<P: AsRef<Path>>(ball_path: &P, data_path: &P) -> Result<(Ob<i32>, Fv), String> {
ftlog::info!("Reading MSA ball from {:?}", ball_path.as_ref());
let off_ball = Ob::read_from(ball_path)?;
/// Read the `PermutedBall` and the permuted dataset from disk.
pub fn read_permuted_ball<P: AsRef<Path>>(ball_path: &P, data_path: &P) -> Result<(Ob<i32>, Fv), String> {
ftlog::info!("Reading PermutedBall from {:?}", ball_path.as_ref());
let ball = Ob::read_from(ball_path)?;

ftlog::info!("Reading MSA data from {:?}", data_path.as_ref());
ftlog::info!("Reading PermutedData from {:?}", data_path.as_ref());
let data = FlatVec::read_from(data_path)?;

ftlog::info!("Finished reading MSA ball and data.");

Ok((off_ball, data))
Ok((ball, data))
}

/// Build the Ball and the dataset.
Expand Down Expand Up @@ -141,3 +139,31 @@ pub fn read_ball<P: AsRef<Path>>(path: &P) -> Result<B<i32>, String> {

Ok(ball)
}

/// Build the `Ball` with a balanced partition.
pub fn build_balanced_ball<P: AsRef<Path>, M: ParMetric<Sequence, i32>>(
data: &Fv,
metric: &M,
ball_path: &P,
csv_path: &P,
) -> Result<B<i32>, String> {
// Create the ball from scratch.
ftlog::info!("Building ball.");
let seed = Some(42);

let criteria = |c: &BalancedBall<_>| c.cardinality() > 1;
let ball = BalancedBall::par_new_tree(data, metric, &criteria, seed).into_ball();

let num_leaves = ball.leaves().len();
ftlog::info!("Built BalancedBall with {num_leaves} leaves.");

// Serialize the `BalancedBall` to disk.
ftlog::info!("Writing BalancedBall to {:?}", ball_path.as_ref());
ball.write_to(ball_path)?;

// Write the `BalancedBall` to a CSV file.
ftlog::info!("Writing BalancedBall to CSV at {:?}", csv_path.as_ref());
ball.write_to_csv(&csv_path)?;

Ok(ball)
}

0 comments on commit 6b11ed8

Please sign in to comment.