Skip to content

Commit

Permalink
working main :)
Browse files Browse the repository at this point in the history
  • Loading branch information
wilhelmagren committed May 10, 2024
1 parent 5dcf1cf commit 87bbbf5
Show file tree
Hide file tree
Showing 13 changed files with 130 additions and 73 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ Cargo.lock

# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb

.python-venv/
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ env_logger = "0.11.3"
half = "2.4.1"
log = "0.4.21"
num_cpus = "1.16.0"
padder = "1.1.0"
rand = { version = "0.8.5" }
rayon = { version = "1.10.0" }
serde = { version = "1.0.201", features = ["derive"] }
Expand All @@ -47,6 +46,7 @@ threadpool = "1.8.1"
substring = "1.4.5"
tempfile = "3.10.1"
libc = "0.2.154"
padder = { version = "1.2.0", features = ["serde"] }

[dev-dependencies]
glob = "0.3.1"
Expand Down
10 changes: 0 additions & 10 deletions resources/schema/generate_big_testfile.sh

This file was deleted.

24 changes: 8 additions & 16 deletions resources/schema/test_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,29 @@
"name": "id",
"offset": 0,
"length": 9,
"dtype": "i32",
"is_nullable": false,
"alignment": "mid",
"pad_symbol": ""
"dtype": "Int32",
"is_nullable": false
},
{
"name": "name",
"offset": 9,
"length": 32,
"dtype": "utf8",
"is_nullable": false,
"alignment": "mid",
"pad_symbol": ""
"dtype": "Utf8",
"is_nullable": false
},
{
"name": "city",
"offset": 41,
"length": 32,
"dtype": "utf8",
"is_nullable": true,
"alignment": "mid",
"pad_symbol": ""
"dtype": "Utf8",
"is_nullable": true
},
{
"name": "employed",
"offset": 73,
"length": 5,
"dtype": "boolean",
"is_nullable": true,
"alignment": "mid",
"pad_symbol": ""
"dtype": "Boolean",
"is_nullable": true
}
]
}
File renamed without changes.
5 changes: 4 additions & 1 deletion src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use std::fs;
use std::fs::File;
use std::path::PathBuf;

use crate::converters::arrow2_converter::{MasterBuilder, Slice2Arrow2};
// use crate::converters::arrow2_converter::{MasterBuilder, Slice2Arrow2};
use crate::converters::arrow_converter::{MasterBuilders, Slice2Arrow};
use crate::converters::self_converter::SampleSliceAggregator;
use crate::converters::Converter;
Expand Down Expand Up @@ -294,6 +294,7 @@ impl Cli {
s2a
}
Converters::Arrow2 => {
/*
let _out_file = fs::OpenOptions::new()
.create(true)
.append(true)
Expand All @@ -308,6 +309,8 @@ impl Cli {
});
s2a
*/
todo!()
}

Converters::None => {
Expand Down
2 changes: 1 addition & 1 deletion src/converters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use arrow::array::ArrayRef;
use parquet::format;
use std::cmp::min;

pub(crate) mod arrow2_converter;
// pub(crate) mod arrow2_converter;
pub(crate) mod self_converter;

pub mod arrow_converter;
Expand Down
52 changes: 26 additions & 26 deletions src/converters/arrow2_converter.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
/*
* MIT License
*
* Copyright (c) 2024 Firelink Data
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* File created: 2023-11-21
* Last updated: 2023-11-21
*/
//
// MIT License
//
// Copyright (c) 2024 Firelink Data
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// File created: 2023-11-21
// Last updated: 2024-05-10
//

use arrow2::datatypes::{DataType, Field, Schema};
use arrow2::io::ipc::write::Record;
Expand Down
44 changes: 26 additions & 18 deletions src/converters/arrow_converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use parquet::format;
use rayon::iter::IndexedParallelIterator;
use rayon::prelude::*;

use crate::datatype::DataType;
use crate::converters::{ColumnBuilder, Converter};
use crate::slicers::{FnFindLastLineBreak, FnLineBreakLen};
use crate::{converters, schema};
Expand Down Expand Up @@ -92,31 +93,38 @@ impl MasterBuilders {
for _i in 1..=instances {
let mut buildersmut: Vec<Box<dyn ColumnBuilder + Sync + Send>> =
Vec::with_capacity(antal_col);
for val in schema.iter() {
match val.dtype().as_str() {
"i32" => buildersmut.push(Box::new(HandlerInt32Builder {
for col in schema.iter() {
match col.dtype() {
DataType::Boolean => buildersmut.push(Box::new(HandlerBooleanBuilder {
boolean_builder: BooleanBuilder::new(),
runes_in_column: col.length(),
name: col.name().clone(),
})),
DataType::Float16 => todo!(),
DataType::Float32 => todo!(),
DataType::Float64 => todo!(),
DataType::Int16 => todo!(),
DataType::Int32 => buildersmut.push(Box::new(HandlerInt32Builder {
int32builder: Int32Builder::new(),
runes_in_column: val.length(),
name: val.name().clone(),
runes_in_column: col.length(),
name: col.name().clone(),
})),
"i64" => buildersmut.push(Box::new(HandlerInt64Builder {
DataType::Int64 => buildersmut.push(Box::new(HandlerInt64Builder {
int64builder: Int64Builder::new(),
runes_in_column: val.length(),
name: val.name().clone(),
runes_in_column: col.length(),
name: col.name().clone(),
})),
"boolean" => buildersmut.push(Box::new(HandlerBooleanBuilder {
boolean_builder: BooleanBuilder::new(),
runes_in_column: val.length(),
name: val.name().clone(),
DataType::Utf8 => buildersmut.push(Box::new(HandlerStringBuilder {
string_builder: StringBuilder::new(),
runes_in_column: col.length(),
name: col.name().clone(),
})),
"utf8" => buildersmut.push(Box::new(HandlerStringBuilder {
DataType::LargeUtf8 => buildersmut.push(Box::new(HandlerStringBuilder {
string_builder: StringBuilder::new(),
runes_in_column: val.length(),
name: val.name().clone(),
runes_in_column: col.length(),
name: col.name().clone(),
})),

&_ => {}
};
}
}
builders.push(buildersmut);
}
Expand Down
41 changes: 41 additions & 0 deletions src/datatype.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//
// MIT License
//
// Copyright (c) 2024 Firelink Data
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// File created: 2024-05-10
// Last updated: 2024-05-10
//

use serde::{Deserialize, Serialize};

#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Serialize)]
pub(crate) enum DataType {
Boolean,
Float16,
Float32,
Float64,
Int16,
Int32,
Int64,
Utf8,
LargeUtf8,
}
1 change: 1 addition & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ use clap::Parser;
use log::{debug, error, info};

mod cli;
mod datatype;
mod error;
mod logger;
mod mocker;
Expand Down
10 changes: 10 additions & 0 deletions src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,21 @@ pub struct FixedColumn {

///
impl FixedColumn {
///
pub fn name(&self) -> &String {
&self.name
}

/// Get the length of the column.
pub fn length(&self) -> usize {
self.length
}

///
pub fn dtype(&self) -> DataType {
self.dtype
}

/// Get the alignment mode of the column.
pub fn alignment(&self) -> Alignment {
self.alignment
Expand Down
10 changes: 10 additions & 0 deletions tools/generate_big_testfile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

TEST_FILE="test_schema_mock.flf"
echo "Loop $1 times to duplicate data to $TEST_FILE"

for ((i=1;i<=$1;i++));
do
cat "./resources/test-flf/$TEST_FILE" >> "BIG-$TEST_FILE"
done

0 comments on commit 87bbbf5

Please sign in to comment.