Skip to content

Commit

Permalink
Merge pull request #6 from AndreaGuarracino/compressed_input
Browse files Browse the repository at this point in the history
Support gzipped PAF files
  • Loading branch information
AndreaGuarracino authored Mar 27, 2024
2 parents 732cedc + d2e77fb commit 7941d71
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 8 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ itertools = "0.10.0"
fnv = "1.0.7"
rgb = "0.8"
line_drawing = "0.8.0"
flate2 = "1.0.28"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ cargo install --force --path .

Generate alignments with the cigar string attached to the `cg:Z:` tag.
These can be made by several aligners, including `minimap2 -c`, `wfmash`, or `lastz --format=paf:wfmash`.
With alignments in `aln.paf`, we would convert it into a CHAIN format file using this call:
With alignments in `aln.paf` (or `aln.paf.gz` if gzip-compressed), we would convert it into a CHAIN format file using this call:

```
paf2chain -i aln.paf > aln.chain
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::paf::{PafFile, paf_query_end, paf_target, paf_target_length, paf_targ

fn main() {
let matches = App::new("paf2chain")
.version("0.1.0")
.version("0.1.1")
.author("Andrea Guarracino")
.about("Generate a CHAIN format file from a PAF format file")
.arg(
Expand Down
26 changes: 20 additions & 6 deletions src/paf.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use boomphf::Mphf;
use itertools::Itertools;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::io::{BufRead, BufReader, Read};
use std::path::Path;
use flate2::read::GzDecoder;

#[derive(Debug, Clone)]
struct AlignedSeq {
Expand Down Expand Up @@ -81,7 +83,19 @@ pub fn paf_target_end(line: &str) -> usize {

fn _for_each_line_in_file(paf_filename: &str, mut callback: impl FnMut(&str)) {
let file = File::open(paf_filename).unwrap();
let reader = BufReader::new(file);
// Determine if the file is gzipped based on its extension
let gzipped = Path::new(paf_filename)
.extension()
.map_or(false, |ext| ext == "gz");

// Create a dynamic reader based on the file type
let box_reader: Box<dyn Read> = if gzipped {
Box::new(GzDecoder::new(file))
} else {
Box::new(file)
};

let reader = BufReader::new(box_reader);
for line in reader.lines() {
callback(&line.unwrap());
}
Expand Down Expand Up @@ -109,15 +123,15 @@ impl PafFile {
let query_id = query_mphf.hash(&query_name) as usize;
if !seen_queries[query_id] {
seen_queries[query_id] = true;
let mut query = &mut queries[query_id];
let query = &mut queries[query_id];
query.name = query_name;
query.length = paf_query_length(l);
}
let target_name: String = paf_target(l);
let target_id = target_mphf.hash(&target_name) as usize;
if !seen_targets[target_id] {
seen_targets[target_id] = true;
let mut target = &mut targets[target_id];
let target = &mut targets[target_id];
target.name = target_name;
target.length = paf_target_length(l);
}
Expand All @@ -129,7 +143,7 @@ impl PafFile {
let mut target_offset: usize = 0;
targets_sort.iter().for_each(|t| {
let target_id = target_mphf.hash(&t.name) as usize;
let mut target = &mut targets[target_id];
let target = &mut targets[target_id];
target.rank = target_idx;
target_idx += 1;
target.offset = target_offset;
Expand All @@ -141,7 +155,7 @@ impl PafFile {
let mut query_offset: usize = 0;
queries_sort.iter().for_each(|q| {
let query_id = query_mphf.hash(&q.name) as usize;
let mut query = &mut queries[query_id];
let query = &mut queries[query_id];
query.rank = query_idx;
query_idx += 1;
query.offset = query_offset;
Expand Down

0 comments on commit 7941d71

Please sign in to comment.