Skip to content

Commit

Permalink
dyn Array with arrow-rs
Browse files Browse the repository at this point in the history
  • Loading branch information
«ratal» committed Feb 24, 2024
1 parent 0367bb2 commit 3a25523
Show file tree
Hide file tree
Showing 17 changed files with 2,035 additions and 3,079 deletions.
1,331 changes: 905 additions & 426 deletions Cargo.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/export/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//! Module to export mdf files to other file formats.
pub mod numpy;
pub mod parquet;
//pub mod parquet;
pub mod polars;
pub mod tensor;
// pub mod tensor;
6 changes: 3 additions & 3 deletions src/export/numpy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use arrow::buffer::NullBuffer;
use arrow::datatypes::DataType;

use numpy::npyffi::types::NPY_ORDER;
use numpy::{IntoPyArray, ToPyArray};
use numpy::ToPyArray;
use pyo3::prelude::*;
use pyo3::PyObject;

Expand All @@ -26,7 +26,7 @@ impl From<Order> for NPY_ORDER {

/// returns a numpy array from an arrow array
#[allow(dead_code)]
pub fn arrow_to_numpy(py: Python, array: Box<dyn ArrayBuilder>) -> PyObject {
pub fn arrow_to_numpy(py: Python, array: Box<dyn Array>) -> PyObject {
match array.data_type() {
DataType::Null => Python::None(py),
DataType::Boolean => {
Expand All @@ -41,7 +41,7 @@ pub fn arrow_to_numpy(py: Python, array: Box<dyn ArrayBuilder>) -> PyObject {
.as_any()
.downcast_ref::<PrimitiveBuilder<i8>>()
.expect("could not downcast to i8 array");
array.values().to_pyarray(py).into_py(py)
array.values_slice().to_pyarray(py).into_py(py)
}
DataType::Int16 => {
let array = array
Expand Down
12 changes: 3 additions & 9 deletions src/export/parquet.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
//! Exporting mdf to Parquet files.
use arrow2::{
use arrow::{
array::Array,
datatypes::DataType,
datatypes::{Field, Metadata, Schema},
error::{Error, Result},
io::parquet::write::{
array_to_columns, compress, to_parquet_schema, CompressedPage, CompressionOptions, DynIter,
DynStreamingIterator, Encoding, FallibleStreamingIterator, FileWriter, Version,
WriteOptions,
},
io::parquet::{read::ParquetError, write::transverse},
datatypes::{Field, Schema},
error::Result,
};
use codepage::to_encoding;
use encoding_rs::Encoding as EncodingRs;
Expand Down
10 changes: 4 additions & 6 deletions src/export/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@
use std::marker::PhantomData;
use std::mem;

use arrow2::array::Array;
use arrow2::bitmap::Bitmap;
use arrow2::buffer::Buffer;
use arrow2::datatypes::*;
use arrow::array::Array;
use arrow::buffer::Buffer;
use arrow::datatypes::*;

use arrow2::error::{Error, Result};
use arrow2::types::NativeType;
use arrow::error::Result;

/// Computes the strides required assuming a row major memory layout
fn compute_row_major_strides(shape: &[usize]) -> Result<Vec<usize>> {
Expand Down
43 changes: 14 additions & 29 deletions src/mdfinfo/mdfinfo3.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
//! Parsing of file metadata into MdfInfo3 struct
use anyhow::{Context, Result};
use arrow::array::{Array, ArrayBuilder, PrimitiveArray, PrimitiveBuilder};
use arrow::datatypes::{DataType, UInt8Type};
use arrow::array::{Array, ArrayBuilder, PrimitiveBuilder};
use arrow::datatypes::{DataType, UInt16Type, UInt32Type, UInt8Type};
use binrw::{BinRead, BinReaderExt};
use byteorder::{LittleEndian, ReadBytesExt};
use chrono::NaiveDate;
use encoding_rs::Encoding;
use std::any::Any;
use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::default::Default;
Expand Down Expand Up @@ -209,22 +210,6 @@ impl MdfInfo3 {
}
}
}
/// True if channel contains data
pub fn get_channel_data_validity(&self, channel_name: &str) -> bool {
let mut state: bool = false;
if let Some((_master, dg_pos, (_cg_pos, rec_id), cn_pos)) =
self.get_channel_id(channel_name)
{
if let Some(dg) = self.dg.get(dg_pos) {
if let Some(cg) = dg.cg.get(rec_id) {
if let Some(cn) = cg.cn.get(cn_pos) {
state = cn.channel_data_valid
}
}
}
}
state
}
/// returns channel's data array.
pub fn get_channel_data(&self, channel_name: &str) -> Option<Box<dyn Array>> {
let mut channel_names: HashSet<String> = HashSet::new();
Expand Down Expand Up @@ -868,7 +853,7 @@ pub struct Cn3 {
/// number of bytes taken by channel in record
pub n_bytes: u16,
/// channel data
pub data: Box<dyn ArrayBuilder>,
pub data: Box<dyn Array>,
/// false = little endian
pub endian: bool,
}
Expand All @@ -883,7 +868,7 @@ impl Default for Cn3 {
description: Default::default(),
pos_byte_beg: Default::default(),
n_bytes: Default::default(),
data: PrimitiveBuilder::<UInt8Type>::new().into_box_any(),
data: Box::new(PrimitiveBuilder::<UInt8Type>::new()),
endian: Default::default(),
}
}
Expand Down Expand Up @@ -1088,7 +1073,7 @@ fn parse_cn3_block(
unique_name,
pos_byte_beg,
n_bytes,
data: arrow_data_type_init(0, data_type, n_bytes as u32, false)?,
data: arrow_data_type_init(0, data_type, n_bytes as u32, 1)?,
endian,
};

Expand Down Expand Up @@ -1132,7 +1117,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3,
description: String::from("Milliseconds"),
pos_byte_beg,
n_bytes: 2,
data: PrimitiveArray::<u16>::new_empty(DataType::UInt16).boxed(),
data: PrimitiveBuilder::<UInt16Type>::new_empty(DataType::UInt16).boxed(),
endian: false,
};
let block2 = Cn3Block2 {
Expand All @@ -1150,7 +1135,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3,
description: String::from("Minutes"),
pos_byte_beg: pos_byte_beg + 2,
n_bytes: 1,
data: PrimitiveArray::<u8>::new_empty(DataType::UInt8).boxed(),
data: PrimitiveBuilder::<UInt8Type>::new_empty(DataType::UInt8).boxed(),
endian: false,
};
let block2 = Cn3Block2 {
Expand All @@ -1168,7 +1153,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3,
description: String::from("Hours"),
pos_byte_beg: pos_byte_beg + 3,
n_bytes: 1,
data: PrimitiveArray::<u8>::new_empty(DataType::UInt8).boxed(),
data: PrimitiveBuilder::<UInt8Type>::new_empty(DataType::UInt8).boxed(),
endian: false,
};
let block2 = Cn3Block2 {
Expand All @@ -1186,7 +1171,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3,
description: String::from("Days"),
pos_byte_beg: pos_byte_beg + 4,
n_bytes: 1,
data: PrimitiveArray::<u8>::new_empty(DataType::UInt8).boxed(),
data: PrimitiveBuilder::<UInt8Type>::new_empty(DataType::UInt8).boxed(),
endian: false,
};
let block2 = Cn3Block2 {
Expand All @@ -1204,7 +1189,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3,
description: String::from("Month"),
pos_byte_beg: pos_byte_beg + 5,
n_bytes: 1,
data: PrimitiveArray::<u8>::new_empty(DataType::UInt8).boxed(),
data: PrimitiveBuilder::<UInt8Type>::new_empty(DataType::UInt8).boxed(),
endian: false,
};
let block2 = Cn3Block2 {
Expand All @@ -1222,7 +1207,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3,
description: String::from("Years"),
pos_byte_beg: pos_byte_beg + 7,
n_bytes: 1,
data: PrimitiveArray::<u8>::new_empty(DataType::UInt8).boxed(),
data: PrimitiveBuilder::<UInt8Type>::new_empty(DataType::UInt8).boxed(),
endian: false,
};
(date_ms, min, hour, day, month, year)
Expand Down Expand Up @@ -1250,7 +1235,7 @@ fn can_open_time(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3) {
description: String::from("Milliseconds"),
pos_byte_beg,
n_bytes: 4,
data: PrimitiveArray::<u32>::new_empty(DataType::UInt32).boxed(),
data: PrimitiveBuilder::<UInt32Type>::new_empty(DataType::UInt32).boxed(),
endian: false,
};
let block2 = Cn3Block2 {
Expand All @@ -1268,7 +1253,7 @@ fn can_open_time(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3) {
description: String::from("Days"),
pos_byte_beg: pos_byte_beg + 4,
n_bytes: 2,
data: PrimitiveArray::<u16>::new_empty(DataType::UInt16).boxed(),
data: PrimitiveBuilder::<UInt16Type>::new_empty(DataType::UInt16).boxed(),
endian: false,
};
(ms, days)
Expand Down
Loading

0 comments on commit 3a25523

Please sign in to comment.