diff --git a/README.md b/README.md index 750bd8f..4d58068 100644 --- a/README.md +++ b/README.md @@ -18,10 +18,11 @@ ## libtorch 安装 1. 下载: https://download.pytorch.org/libtorch/cpu/libtorch-macos-2.1.0.zip -2. 配置环境变量:*注意三个地址都配置到libtorch的根目录,不要加lib和incloud* +2. macos 配置环境变量:*注意三个地址都配置到libtorch的根目录,不要加lib和incloud* export LIBTORCH_INCLUDE=/Users/tom/Downloads/libtorch export LIBTORCH_LIB=/Users/tom/Downloads/libtorch export LIBTORCH=/Users/tom/Downloads/libtorch export DYLD_LIBRARY_PATH=/Users/tom/Downloads/libtorch/lib:$DYLD_LIBRARY_PATH 3. ubuntu22.04.3版本上使用 Tesla M40(NVIDIA-Linux-x86_64-535.129.03.run) cuda118版本,测试tch cuda版本通过(缺少包可以拷贝到debug下) +- `export LD_LIBRARY_PATH=${LIBTORCH}/lib:$LD_LIBRARY_PATH` - `cargo test --test tch_test` \ No newline at end of file diff --git a/src/normalize_data.rs b/src/normalize_data.rs index fc97fb3..e57c2ea 100644 --- a/src/normalize_data.rs +++ b/src/normalize_data.rs @@ -85,6 +85,61 @@ async fn normalize(df: Option) -> anyhow::Result{ Ok(DataFrame::empty()) } +pub fn tch_normalization(tch_matrix: &[Vec]) -> Vec> { + // 计算均值向量 + let mean_vector = calculate_mean_vector(tch_matrix); + // 计算标准差向量 + let std_vector = calculate_standard_deviation_vector(tch_matrix); + // 归一化处理 + let normalized_tch_matrix = normalize_matrix(tch_matrix, &mean_vector, &std_vector); + normalized_tch_matrix +} + +fn calculate_mean_vector(tch_matrix: &[Vec]) -> Vec { + let mut mean_vector = vec![]; + for row in tch_matrix { + let mut sum = 0.0; + for &value in row { + sum += value; + } + mean_vector.push(sum / tch_matrix.len() as f64); + } + mean_vector +} +fn calculate_standard_deviation_vector(tch_matrix: &[Vec]) -> Vec { + let mut std_vector = vec![]; + for row in tch_matrix { + let mut variance = 0.0; + let mean = calculate_mean(row); + for &value in row { + variance += (value - mean) * (value - mean); + } + std_vector.push(variance.sqrt() as f64); + } + std_vector +} + +fn calculate_mean(numbers: &Vec) -> f64 { + let sum = numbers.iter().sum::(); + let length = numbers.len() as f64; + sum as f64 / length +} + +fn normalize_matrix( + tch_matrix: &[Vec], + mean_vector: &Vec, + std_vector: &Vec, +) -> Vec> { + let mut normalized_tch_matrix = vec![Vec::new(); tch_matrix.len()]; + for (i, row) in tch_matrix.iter().enumerate() { + let mut normalized_row = vec![0.0; row.len()]; + for (j, &value) in row.iter().enumerate() { + normalized_row[j] = (value - mean_vector[i]) / std_vector[i]; + } + normalized_tch_matrix.push(normalized_row); + } + normalized_tch_matrix +} @@ -93,11 +148,28 @@ async fn normalize(df: Option) -> anyhow::Result{ mod test { use polars::prelude::{CsvReader, SerReader}; - use crate::normalize_data::traverse_source_directory; + use crate::normalize_data::{tch_normalization, traverse_source_directory}; use crate::util::Envs; use super::normalize; + #[test] + pub fn normalization_works(){ + std::env::set_var("RUST_LOG", "qlib_data=debug"); + // 初始化日志 + tracing_subscriber::fmt::init(); + + let tch_matrix: Vec> = vec![ + vec![1.0, 2.0, 3.0], + vec![4.0, 5.0, 6.0], + vec![7.0, 8.0, 9.0], + ]; + let normalized_tch_matrix = tch_normalization(&tch_matrix); + for row in normalized_tch_matrix { + tracing::debug!("row: {:?}", row); + } + } + #[tokio::test] pub async fn normalize_format_works()->anyhow::Result<()>{ std::env::set_var("RUST_LOG", "qlib_data=debug"); diff --git a/tests/tch_test.rs b/tests/tch_test.rs index f13b01d..804b51f 100644 --- a/tests/tch_test.rs +++ b/tests/tch_test.rs @@ -1,11 +1,7 @@ #[cfg(test)] mod alstm { use tch::nn::OptimizerConfig; - use tch::{ - kind, - nn::{self, Module}, - Device, Tensor, - }; + use tch::{kind, nn::{self, Module}, Device, Tensor, Kind}; fn my_module(p: nn::Path, dim: i64) -> impl nn::Module { let x1 = p.zeros("x1", &[dim]); @@ -13,11 +9,37 @@ mod alstm { nn::func(move |xs| xs * &x1 + xs.exp() * &x2) } + fn normalize_data(input: &Tensor) -> Tensor { + let mean = input.mean(Kind::Float); + let std = input.std(false); + + let normalized_input = input - mean; + normalized_input / std + } + + #[test] + fn normalize_data_works(){ + // 加载数据,这里以一个简单的示例为例,实际应用中,您需要根据实际情况加载数据 + let data = vec![ + Tensor::from_slice(&vec![1.0, 2.0, 3.0, 4.0, 5.0]), + Tensor::from_slice(&vec![6.0, 7.0, 8.0, 9.0, 10.0]), + ]; + + // 数据标准化 + let normalized_data = data.iter().map(|tensor| normalize_data(tensor)).collect::>(); + + // 打印标准化后的数据 + for (i, tensor) in normalized_data.iter().enumerate() { + println!("Data normalized {}: {}", i, tensor); + } + } + #[test] fn alstm_build_model_cuda_works() { let vs = nn::VarStore::new(Device::Cuda(0)); let my_module = my_module(vs.root(), 7); let mut opt = nn::Sgd::default().build(&vs, 1e-2).unwrap(); + for _idx in 1..5000 { // Dummy mini-batches made of zeros. let xs = Tensor::zeros(&[7], kind::FLOAT_CUDA);