Building a StringRecord from a raw line #326
-
It seems that currently there's no API to serde-deserialize raw records that are retrieved individually. My use-case is basically something like: #[derive(Deserialize)]
struct S<'a> {
a: &'a str,
b: &'a str,
}
let header: StringRecord = ["a", "b"].into_iter().collect(); // Constant header
let mut raw_record = csv::StringRecord::new(); // Reuse across iterations to avoid allocating
while let Some(record_str) = stream.await { // Note how this doesn't fit with Reader
raw_record.read_raw(record_str); // This doesn't exist (question is how to set settings of course...)
let deserialized: S = raw_record.deserialize(Some(&headers))?;
// And do stuff with this
} |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 9 replies
-
Possible workaround: #[tokio::main]
async fn main() {
tokio::task::spawn(async {
let mut csv_reader: csv::Reader<Cursor<Box<[u8]>>> = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(Cursor::new(Box::from([])));
let mut string_record = csv::StringRecord::new();
let headers = csv::StringRecord::from(vec!["a", "b", "c"]);
let mut lines = stream::iter(["a,b,c", "d,e,f", "g,h,i"]);
while let Some(l) = lines.next().await {
*csv_reader.get_mut() = Cursor::new(Box::from(l.as_bytes()));
#[derive(serde_derive::Deserialize, Debug)]
struct S<'a> {
a: &'a str,
b: &'a str,
c: &'a str,
}
assert!(csv_reader.read_record(&mut string_record)?);
let deserialized: S = string_record.deserialize(Some(&headers))?;
dbg!(deserialized);
}
Ok::<_, anyhow::Error>(())
}).unwrap();
} This seems pretty dirty: it seems pretty uncertain that this won't break on future Also, it uses the internal reader's BufReader as intermediate buffer, which creates unnecessary additional copies -> overhead.
Test to add to the test suite if this workaround is actually guaranteed behavior: #[test]
fn test_csv_line_by_line_feeding() {
let mut csv_reader: csv::Reader<Cursor<Box<[u8]>>> = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(Cursor::new(Box::from([])));
let mut string_record = csv::StringRecord::new();
let headers = csv::StringRecord::from(vec!["a", "b", "c"]);
#[derive(serde_derive::Deserialize, Debug, PartialEq, Eq)]
struct S<'a> {
a: &'a str,
b: &'a str,
c: &'a str,
}
let input = ["a,b,c", "d,e,f\n\naaaaa\n", "g,h,i\n"];
let expected_out = [
S { a: "a", b: "b", c: "c" },
S { a: "d", b: "e", c: "f" },
S { a: "g", b: "h", c: "i" },
];
assert_eq!(input.len(), expected_out.len());
for (line, expected_out) in input.iter().zip(expected_out) {
*csv_reader.get_mut() = Cursor::new(Box::from(line.as_bytes()));
csv_reader.seek(csv::Position::new()).unwrap(); // This will reset the reader state
assert!(csv_reader.read_record(&mut string_record).unwrap());
let deserialized: S = string_record.deserialize(Some(&headers)).unwrap();
assert_eq!(deserialized, expected_out);
}
} |
Beta Was this translation helpful? Give feedback.
-
I can't make heads or tails of what you're trying to do. But If you want to parse a single CSV record from a single line, then you need to create a But more generally, "parse a single CSV record from a single line" is incorrect because a CSV record can span multiple lines. |
Beta Was this translation helpful? Give feedback.
I can't make heads or tails of what you're trying to do. But
Reader::read_record
is the way to read a single record.If you want to parse a single CSV record from a single line, then you need to create a
Reader
for that single line.But more generally, "parse a single CSV record from a single line" is incorrect because a CSV record can span multiple lines.