Skip to content

Commit

Permalink
[feat] clap subcommand changes, implement parsing (#17)
Browse files Browse the repository at this point in the history
* initial coding to show the team

* add subocmands ..not done

* cleaned redundat code

* CLAP subcommands mock,slice,parse

* lint

* fmt

---------

Co-authored-by: Rickard Lundin <rickard.lundin@enkla.com>
Co-authored-by: Wilhelm Ågren <36638274+wilhelmagren@users.noreply.github.com>
  • Loading branch information
3 people authored Dec 29, 2023
1 parent 3ed5a73 commit d075b6e
Show file tree
Hide file tree
Showing 6 changed files with 278 additions and 87 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "evolution"
version = "0.1.0"
version = "0.2.2"
edition = "2021"
description = "🦖 Evolve your fixed length data files into Apache Arrow tables, fully parallelized!"
authors = [
Expand All @@ -20,7 +20,7 @@ default-run = "evolution"
[dependencies]
arrow2 = { version = "0.18.0", features = ["io_ipc"] }
chrono = "0.4.31"
clap = { version = "4.4.8", features = ["derive"] }
clap = { version = "4.4.8", features = ["default", "derive"] }
crossbeam = "0.8.2"
colored = "2.0.4"
env_logger = "0.10.1"
Expand Down
51 changes: 47 additions & 4 deletions src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,18 @@
* SOFTWARE.
*
* File created: 2023-11-21
* Last updated: 2023-11-21
* Last updated: 2023-12-24
*/

use std::path::PathBuf;

use arrow2::array::MutablePrimitiveArray;
use arrow2::datatypes::{DataType, Field, Schema};
use arrow2::io::ipc::write::Record;

use crate::builder_datatypes::ColumnBuilderType;
use crate::schema;

///
#[allow(dead_code)]
struct FixedField {
Expand Down Expand Up @@ -99,12 +105,49 @@ struct FixedTable<'a> {
///
pub trait ColumnBuilder {
///
fn parse_value(&self, name: String) -> bool;
fn parse_value(&mut self, name: &str);

///
fn finish_column(&self) -> bool;
fn finish_column(&mut self);

/// I think this function won't be necessary.
/// `[arrow2]` supports bitmap nulling out-of-the-box.
fn nullify(&self);
fn nullify(&mut self);
}

pub(crate) fn parse_from_schema(
schema_path: PathBuf,
_in_file_path: PathBuf,
_out_file_path: PathBuf,
_n_threads: i16,
) {
let mut builders: Vec<Box<dyn ColumnBuilder>> = Vec::new();
for val in schema::FixedSchema::from_path(schema_path.into()).iter() {
match val.dtype().as_str() {
"i32" => builders.push(Box::new(ColumnBuilderType::<i32> {
rows: MutablePrimitiveArray::new(),
})),
"i64" => builders.push(Box::new(ColumnBuilderType::<i64> {
rows: MutablePrimitiveArray::new(),
})),

&_ => {}
};
}
}
/*
"bool" => Ok(DataType::Boolean),
"boolean" => Ok(DataType::Boolean),
"i16" => Ok(DataType::Int16),
"i32" => Ok(DataType::Int32),
"i64" => Ok(DataType::Int64),
"f16" => Ok(DataType::Float16),
"f32" => Ok(DataType::Float32),
"f64" => Ok(DataType::Float64),
"utf8" => Ok(DataType::Utf8),
"string" => Ok(DataType::Utf8),
"lutf8" => Ok(DataType::LargeUtf8),
"lstring" => Ok(DataType::LargeUtf8),
*/
116 changes: 116 additions & 0 deletions src/builder_datatypes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* MIT License
*
* Copyright (c) 2023 Firelink Data
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* File created: 2023-11-21
* Last updated: 2023-11-21
*/

use arrow2::array::MutablePrimitiveArray;
use arrow2::types::NativeType;

use crate::builder::ColumnBuilder;

/*
"bool" => Ok(DataType::Boolean),
"boolean" => Ok(DataType::Boolean),
"i16" => Ok(DataType::Int16),
"i32" => Ok(DataType::Int32),
"i64" => Ok(DataType::Int64),
"f16" => Ok(DataType::Float16),
"f32" => Ok(DataType::Float32),
"f64" => Ok(DataType::Float64),
"utf8" => Ok(DataType::Utf8),
"string" => Ok(DataType::Utf8),
"lutf8" => Ok(DataType::LargeUtf8),
"lstring" => Ok(DataType::LargeUtf8),
*/

pub(crate) struct ColumnBuilderType<T1: NativeType> {
pub rows: MutablePrimitiveArray<T1>,
}

impl ColumnBuilder for ColumnBuilderType<i32> {
fn parse_value(&mut self, name: &str)
where
Self: Sized,
{
match name.parse::<i32>() {
Ok(n) => {
self.rows.push(Some(n));
n
}
Err(_e) => {
self.nullify();
0
}
};
}

fn finish_column(&mut self)
where
Self: Sized,
{
todo!()
}

fn nullify(&mut self)
where
Self: Sized,
{
self.rows.push(None);
}
}

impl ColumnBuilder for ColumnBuilderType<i64> {
fn parse_value(&mut self, name: &str)
where
Self: Sized,
{
match name.parse::<i64>() {
Ok(n) => {
self.rows.push(Some(n));
n
}
Err(_e) => {
self.nullify();
0
}
};
}

fn finish_column(&mut self)
where
Self: Sized,
{
todo!()
}

fn nullify(&mut self)
where
Self: Sized,
{
self.rows.push(None);
}
}
Loading

0 comments on commit d075b6e

Please sign in to comment.