Skip to content

Commit

Permalink
indexed_genes has become a stand alone index replacement
Browse files Browse the repository at this point in the history
Used in the quantify_gtf package.
  • Loading branch information
stela2502 committed Dec 10, 2024
1 parent 0e53e2d commit c5f1bf0
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 3 deletions.
32 changes: 32 additions & 0 deletions src/singlecelldata/cell_data/cell_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,38 @@ impl CellData{
*mine += count - double_counts;
}
}

/// adds the other values into this object
pub fn merge_re_id_genes(&mut self, other: &CellData, other_genes: &Vec::<usize> ) {
self.total_umis += other.total_umis;
let mut too_much = BTreeMap::<usize, usize>::new();

for (gene_umi_combo, counts) in &other.genes {
// re-id the UMI count touple
let re_ided_umi_combo= GeneUmiHash(
other_genes[gene_umi_combo.0],
gene_umi_combo.1
);
match self.genes.entry(re_ided_umi_combo) {
std::collections::btree_map::Entry::Occupied(mut entry) => {
*entry.get_mut() += counts;
let counter = too_much.entry(re_ided_umi_combo.0).or_insert(0);
*counter += 1;
}
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(*counts);
}
}
}

//let other_total_reads = std::mem::take(&mut other.total_reads);
for (gene_id, count) in &other.total_reads {
let double_counts = too_much.get(&gene_id).unwrap_or(&0);
let mine = self.total_reads.entry(*gene_id).or_insert(0);
*mine += count - double_counts;
}
}

/* my old function
pub fn merge(&mut self, other:&CellData ){
Expand Down
22 changes: 19 additions & 3 deletions src/singlecelldata/indexed_genes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,25 @@ impl IndexedGenes{
Self{
names: BTreeMap::new(),
ids_to_name: Vec::with_capacity( 80_000 ),
offset : match( off ){
offset : match off {
Some(o) => o,
None => 0,
},
}

}


/// merges the other list into this list and returns a Vec of new ids for the other data
pub fn merge (&mut self, other: &IndexedGenes) -> Vec<usize>{
let mut rec = Vec::<usize>::with_capacity( other.len() );
for gene in &other.ids_to_name {
rec.push ( self.get_gene_id( gene ) )
}
rec
}


/// Returns the gene ID for the given gene name.
/// If the gene does not exist, assigns a new ID.
pub fn get_gene_id(&mut self, gene: &str) -> usize {
Expand All @@ -51,11 +63,15 @@ impl IndexedGenes{
new_id
}

pub fn len( &self ) -> usize{
self.names.len()
}

pub fn new ( data: &BTreeMap<String, usize>, offset: usize ) -> Self {
let mut ids_to_name = vec![String::new(); data.len() + offset ];
let mut names = BTreeMap::new();
let mut max_entry = 0_usize;
let _ =data.iter().for_each(| (_,val) | if *val > max_entry {max_entry = *val});
let _ = data.iter().for_each(| (_,val) | if *val > max_entry {max_entry = *val});
println!("I have gotten a max extry of {max_entry} and have a vec with {} available spaces",ids_to_name.len() );
if max_entry > (data.len() + offset) {
panic!("This is a library error - why is my largest id {max_entry} when it should be {}?!?", data.len() + offset);
Expand All @@ -73,7 +89,7 @@ impl IndexedGenes{
}

/// returns the external id - the ID used in the single_cell_data class.
pub fn ids_for_gene_names(&self, names:&Vec<String> ) -> Vec<usize>{
pub fn ids_for_gene_names(&self, names:&Vec::<String> ) -> Vec<usize>{
let mut ret = Vec::<usize>::with_capacity( names.len() );
for name in names{
if let Some(id) = self.names.get( name ){
Expand Down
31 changes: 31 additions & 0 deletions src/singlecelldata/singlecelldata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,37 @@ impl SingleCellData{
}
}
}

/// merge two SingleCellData objects - with a different gene list!!
pub fn merge_re_id_genes(&mut self, other: SingleCellData, other_ids: &Vec::<usize> ) {
if ! other.is_empty() {
// Reset all internal measurements
self.checked = false;
self.passing = 0;
self.genes_with_data.clear();

for other_cell in other.data.iter().flat_map(|map| map.values()) {
let index = self.to_key(&other_cell.name); // Extracting the first u8 of the u64 key
match self.data[index].entry(other_cell.name) {
std::collections::btree_map::Entry::Occupied(mut entry) => {
// If cell exists, merge with existing cell
//println!( "merge cell {} {}", other_cell.name, other_cell);
let cell = entry.get_mut();
cell.merge_re_id_genes(other_cell, &other_ids);
}
std::collections::btree_map::Entry::Vacant(entry) => {
// If cell doesn't exist, insert new cell
//println!( "steel cell {} {}", other_cell.name, other_cell);
let mut cell = CellData::new( other_cell.name.clone() );
cell.merge_re_id_genes( other_cell, &other_ids );
entry.insert( cell ); // Move ownership
}
}
}

}
}

/* // my old fucntion:
pub fn merge( &mut self, other:&SingleCellData ){
// reset all internal measurements
Expand Down

0 comments on commit c5f1bf0

Please sign in to comment.