Skip to content

Commit

Permalink
tch normalize
Browse files Browse the repository at this point in the history
  • Loading branch information
r730 committed Nov 6, 2023
1 parent e194050 commit a15b110
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 7 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@

## libtorch 安装
1. 下载: https://download.pytorch.org/libtorch/cpu/libtorch-macos-2.1.0.zip
2. 配置环境变量:*注意三个地址都配置到libtorch的根目录,不要加lib和incloud*
2. macos 配置环境变量:*注意三个地址都配置到libtorch的根目录,不要加lib和incloud*
export LIBTORCH_INCLUDE=/Users/tom/Downloads/libtorch
export LIBTORCH_LIB=/Users/tom/Downloads/libtorch
export LIBTORCH=/Users/tom/Downloads/libtorch
export DYLD_LIBRARY_PATH=/Users/tom/Downloads/libtorch/lib:$DYLD_LIBRARY_PATH
3. ubuntu22.04.3版本上使用 Tesla M40(NVIDIA-Linux-x86_64-535.129.03.run) cuda118版本,测试tch cuda版本通过(缺少包可以拷贝到debug下)
- `export LD_LIBRARY_PATH=${LIBTORCH}/lib:$LD_LIBRARY_PATH`
- `cargo test --test tch_test`
74 changes: 73 additions & 1 deletion src/normalize_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,61 @@ async fn normalize(df: Option<DataFrame>) -> anyhow::Result<DataFrame>{
Ok(DataFrame::empty())
}

pub fn tch_normalization(tch_matrix: &[Vec<f64>]) -> Vec<Vec<f64>> {
// 计算均值向量
let mean_vector = calculate_mean_vector(tch_matrix);
// 计算标准差向量
let std_vector = calculate_standard_deviation_vector(tch_matrix);
// 归一化处理
let normalized_tch_matrix = normalize_matrix(tch_matrix, &mean_vector, &std_vector);
normalized_tch_matrix
}

fn calculate_mean_vector(tch_matrix: &[Vec<f64>]) -> Vec<f64> {
let mut mean_vector = vec![];
for row in tch_matrix {
let mut sum = 0.0;
for &value in row {
sum += value;
}
mean_vector.push(sum / tch_matrix.len() as f64);
}
mean_vector
}
fn calculate_standard_deviation_vector(tch_matrix: &[Vec<f64>]) -> Vec<f64> {
let mut std_vector = vec![];
for row in tch_matrix {
let mut variance = 0.0;
let mean = calculate_mean(row);
for &value in row {
variance += (value - mean) * (value - mean);
}
std_vector.push(variance.sqrt() as f64);
}
std_vector
}

fn calculate_mean(numbers: &Vec<f64>) -> f64 {
let sum = numbers.iter().sum::<f64>();
let length = numbers.len() as f64;
sum as f64 / length
}

fn normalize_matrix(
tch_matrix: &[Vec<f64>],
mean_vector: &Vec<f64>,
std_vector: &Vec<f64>,
) -> Vec<Vec<f64>> {
let mut normalized_tch_matrix = vec![Vec::new(); tch_matrix.len()];
for (i, row) in tch_matrix.iter().enumerate() {
let mut normalized_row = vec![0.0; row.len()];
for (j, &value) in row.iter().enumerate() {
normalized_row[j] = (value - mean_vector[i]) / std_vector[i];
}
normalized_tch_matrix.push(normalized_row);
}
normalized_tch_matrix
}



Expand All @@ -93,11 +148,28 @@ async fn normalize(df: Option<DataFrame>) -> anyhow::Result<DataFrame>{
mod test {
use polars::prelude::{CsvReader, SerReader};

use crate::normalize_data::traverse_source_directory;
use crate::normalize_data::{tch_normalization, traverse_source_directory};
use crate::util::Envs;

use super::normalize;

#[test]
pub fn normalization_works(){
std::env::set_var("RUST_LOG", "qlib_data=debug");
// 初始化日志
tracing_subscriber::fmt::init();

let tch_matrix: Vec<Vec<f64>> = vec![
vec![1.0, 2.0, 3.0],
vec![4.0, 5.0, 6.0],
vec![7.0, 8.0, 9.0],
];
let normalized_tch_matrix = tch_normalization(&tch_matrix);
for row in normalized_tch_matrix {
tracing::debug!("row: {:?}", row);
}
}

#[tokio::test]
pub async fn normalize_format_works()->anyhow::Result<()>{
std::env::set_var("RUST_LOG", "qlib_data=debug");
Expand Down
32 changes: 27 additions & 5 deletions tests/tch_test.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,45 @@
#[cfg(test)]
mod alstm {
use tch::nn::OptimizerConfig;
use tch::{
kind,
nn::{self, Module},
Device, Tensor,
};
use tch::{kind, nn::{self, Module}, Device, Tensor, Kind};

fn my_module(p: nn::Path, dim: i64) -> impl nn::Module {
let x1 = p.zeros("x1", &[dim]);
let x2 = p.zeros("x2", &[dim]);
nn::func(move |xs| xs * &x1 + xs.exp() * &x2)
}

fn normalize_data(input: &Tensor) -> Tensor {
let mean = input.mean(Kind::Float);
let std = input.std(false);

let normalized_input = input - mean;
normalized_input / std
}

#[test]
fn normalize_data_works(){
// 加载数据,这里以一个简单的示例为例,实际应用中,您需要根据实际情况加载数据
let data = vec![
Tensor::from_slice(&vec![1.0, 2.0, 3.0, 4.0, 5.0]),
Tensor::from_slice(&vec![6.0, 7.0, 8.0, 9.0, 10.0]),
];

// 数据标准化
let normalized_data = data.iter().map(|tensor| normalize_data(tensor)).collect::<Vec<_>>();

// 打印标准化后的数据
for (i, tensor) in normalized_data.iter().enumerate() {
println!("Data normalized {}: {}", i, tensor);
}
}

#[test]
fn alstm_build_model_cuda_works() {
let vs = nn::VarStore::new(Device::Cuda(0));
let my_module = my_module(vs.root(), 7);
let mut opt = nn::Sgd::default().build(&vs, 1e-2).unwrap();

for _idx in 1..5000 {
// Dummy mini-batches made of zeros.
let xs = Tensor::zeros(&[7], kind::FLOAT_CUDA);
Expand Down

0 comments on commit a15b110

Please sign in to comment.