diff --git a/data/unequal_lengths.csv b/data/unequal_lengths.csv new file mode 100644 index 0000000..17ed840 --- /dev/null +++ b/data/unequal_lengths.csv @@ -0,0 +1,5 @@ +ID,Lat,Lon,YYYYMMDDHHmm,TL,TLSTA,RRL1c,RRS1c,RR6,WWL6,WWS3,RRS3c,R650,RC,TS,TD +202207031100 +A006,54.88920,8.90870,202207031200,22.8,0.8,0.0,0.0,0.0,6.0,0.0,0.0,0.0,1,45.27,18.8 +A006,54.88920,8.90870,202207031300,23.3,2.1,0.0,0.0,0.0,4.0,0.0,0.0,0.0,1,44.33,18.0 +A006,54.88920,8.90870,202207031400,23.1,3.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,1,42.62,17.5 diff --git a/src/main.rs b/src/main.rs index 4dca843..59a5716 100644 --- a/src/main.rs +++ b/src/main.rs @@ -78,6 +78,18 @@ struct Cli { help = "Print all rows in file. May be piped to 'less -S'. Example `tidy-viewer data/diamonds.csv -f -a | less -R`" )] force_all_rows: bool, + #[structopt( + short = "j", + long = "jump-invalid-rows", + help = "Jump over (skip) invalid rows in the file. This includes rows with the incorrect number of columns." + )] + skip_invalid_rows: bool, + #[structopt( + short = "p", + long = "pedantic", + help = "Crashes when csv input is malformed. Useful to check for valid csv data." + )] + pedantic: bool, #[structopt( short = "t", long = "title", @@ -645,12 +657,18 @@ fn main() { return; }; - let rdr = r - .records() - .into_iter() - //.take(row_display_option + 1) - .map(|x| x.expect("a csv record")) - .collect::>(); + let rdr = r.records().collect::>(); + //.take(row_display_option + 1); + + let rdr = if opt.skip_invalid_rows { + rdr.into_iter() + .filter_map(|record| record.ok()) + .collect::>() + } else { + rdr.into_iter() + .map(|record| record.expect("valid csv data")) + .collect::>() + }; if debug_mode { println!("{:?}", "StringRecord"); @@ -693,7 +711,7 @@ fn main() { let column = rdr .iter() .take(rows) - .map(|row| row.get(col).unwrap()) + .map(|row| row.get(col).unwrap_or_default()) .collect(); v.push(column) } @@ -1181,6 +1199,7 @@ fn build_reader(opt: &Cli) -> Result>, std::io::Error> { } let reader = ReaderBuilder::new() + .flexible(!(opt.pedantic || opt.skip_invalid_rows)) .has_headers(false) .delimiter(delimiter) .from_reader(source);