From ae0b4fa9d755f72c435483bb8dcdc96f70af2e8d Mon Sep 17 00:00:00 2001
From: zrb <2292676443@qq.com>
Date: Tue, 7 May 2024 15:47:28 +0800
Subject: [PATCH] stp eigen amend

---
 CMakeLists.txt                 |   3 +-
 benchmarks/t1.bench            |   2 +-
 examples/dag_test.cpp          |   5 +-
 examples/mc_mul_test.cpp       |  76 +++++
 examples/stp_test.cpp          |  47 +++
 include/stp/stp_circuit.hpp    |  45 +--
 include/stp/stp_dag2nmx.hpp    |  11 +-
 include/stp/stp_eigen.hpp      | 586 ++++++++++++++-------------------
 include/stp/stp_logic_expr.hpp |  11 +-
 include/stp/stp_simulator.hpp  |  11 +-
 include/stp/stp_timer.hpp      |  84 +++--
 include/stp/stp_utils.hpp      |   6 +
 12 files changed, 484 insertions(+), 403 deletions(-)
 create mode 100644 examples/mc_mul_test.cpp
 create mode 100644 examples/stp_test.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ae7c111..be7784d 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,12 +6,13 @@ project(stp LANGUAGES CXX)
 set(STP_CXX_STANDARD "17" CACHE STRING "C++ standard")
 set(CMAKE_CXX_STANDARD ${STP_CXX_STANDARD})
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CXX_FLAGS_RELEASE "-O3")
 
 # Options
 option(STP_EXAMPLES "Build examples" ON)
 option(STP_TEST "Build tests" ON)
 
+add_compile_options(-O4 -W -Wall -Wextra)
+
 # Add main include files
 add_subdirectory(include)
 
diff --git a/benchmarks/t1.bench b/benchmarks/t1.bench
index 94a946a..86937ce 100755
--- a/benchmarks/t1.bench
+++ b/benchmarks/t1.bench
@@ -7,4 +7,4 @@ n4 = LUT 0x1(n2)
 n5 = LUT 0x8(n3, n4)
 n6 = LUT 0xe(n1, n2)
 n7 = LUT 0x1(n5)
-n8 = LUT 0x6(n6, n7)
\ No newline at end of file
+n8 = LUT 0x6(n6, n7) 
\ No newline at end of file
diff --git a/examples/dag_test.cpp b/examples/dag_test.cpp
index 2b60eab..e8f4936 100755
--- a/examples/dag_test.cpp
+++ b/examples/dag_test.cpp
@@ -30,9 +30,10 @@ int main(int argc, char **argv)
 	std::cout << "***************************************" << std::endl;
 	circuit_normalize_impl cn(c, false);
 	std::string m1 = cn.run_str(false);
-	std::string m2 = cn.run_str(true);
-	std::cout << "old methon\n";
+	std::cout << "old methon\n";	
 	std::cout << m1 << "\n";
+	
+	std::string m2 = cn.run_str(true);
 	std::cout << "new methon\n";
 	std::cout << m2 << "\n";
 
diff --git a/examples/mc_mul_test.cpp b/examples/mc_mul_test.cpp
new file mode 100644
index 0000000..fe6d277
--- /dev/null
+++ b/examples/mc_mul_test.cpp
@@ -0,0 +1,76 @@
+#include <iostream>
+#include <stp/stp_eigen.hpp>
+
+using Eigen::MatrixXi;
+
+void print(const matrix_chain& mc)
+{
+  int i = 1;
+  for(const matrix& m : mc)
+  {
+    std::cout << "M" << i << ": " << m.rows() << "x" << m.cols() << "\n";
+    i++;
+  }
+}
+
+matrix random_generation(int row, int col)
+{
+  matrix result(row, col);
+  for(int i = 0; i < row; i++)
+    for(int j = 0; j < col; j++)
+      result(i, j) = rand() % 2;
+  return result;
+}
+
+void test1()
+{
+  matrix_chain mc;
+  for(int i = 0; i < 22; i++)
+  {
+    mc.push_back(random_generation(2, 4));
+  }
+  // print(mc);
+  matrix r1 = stp::matrix_chain_multiply(mc, true);
+  matrix r2 = stp::matrix_chain_multiply(mc, true, stp::mc_multiply_method::sequence);
+  assert(r1 == r2);
+}
+
+void test2()
+{
+  matrix_chain mc;
+  for(int i = 0; i < 22; i++)
+  {
+    mc.push_back(random_generation(4, 2));
+  }
+  // print(mc);
+  matrix r1 = stp::matrix_chain_multiply(mc, true);
+  matrix r2 = stp::matrix_chain_multiply(mc, true, stp::mc_multiply_method::sequence);
+  assert(r1 == r2);
+}
+
+void test3()
+{
+  matrix_chain mc;
+  for(int i = 0; i < 100; i++)
+  {
+    if(rand() % 2 == 1) mc.push_back(random_generation(4, 2));
+    else                mc.push_back(random_generation(2, 4));
+  }
+  // print(mc);
+  matrix r1 = stp::matrix_chain_multiply(mc, true);
+  matrix r2 = stp::matrix_chain_multiply(mc, true, stp::mc_multiply_method::sequence);
+  assert(r1 == r2);
+}
+
+
+int main()
+{ 
+  std::cout << "-------------------------------------------------------\n";
+  test1();
+  std::cout << "-------------------------------------------------------\n";
+  test2();
+  std::cout << "-------------------------------------------------------\n";
+  test3();
+  std::cout << "-------------------------------------------------------\n";
+  return 0;
+}
diff --git a/examples/stp_test.cpp b/examples/stp_test.cpp
new file mode 100644
index 0000000..d0c2244
--- /dev/null
+++ b/examples/stp_test.cpp
@@ -0,0 +1,47 @@
+#include <iostream>
+#include <stp/stp_eigen.hpp>
+
+using Eigen::MatrixXi;
+
+matrix random_generation(int row, int col)
+{
+  matrix result(row, col);
+  for(int i = 0; i < row; i++)
+    for(int j = 0; j < col; j++)
+      result(i, j) = rand() % 2;
+  return result;
+}
+
+int main()
+{
+  //n % p = 0 type matrix multiplication test
+  for(int i = 0; i < 10; i++)
+  {
+    int m = 200;
+    int n = 4 << i;
+    int p = 4;
+    int q = 100;
+    matrix A = random_generation(m, n);
+    matrix B = random_generation(p, q);
+    stp::semi_tensor_product(A, B, true, stp::stp_method::definition_1);
+    stp::semi_tensor_product(A, B, true, stp::stp_method::definition_2); 
+    stp::semi_tensor_product(A, B, true);
+  }
+
+  std::cout << "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n";
+  
+  //p % n = 0 type matrix multiplication test
+  for(int i = 0; i < 10; i++)
+  {
+    int m = 200;
+    int n = 4;
+    int p = 4 << i;
+    int q = 100;
+    matrix A = random_generation(m, n);
+    matrix B = random_generation(p, q);
+    stp::semi_tensor_product(A, B, true, stp::stp_method::definition_1);
+    stp::semi_tensor_product(A, B, true, stp::stp_method::definition_2); 
+    stp::semi_tensor_product(A, B, true);
+  }
+  return 0;
+}
diff --git a/include/stp/stp_circuit.hpp b/include/stp/stp_circuit.hpp
index 113eb56..2daf31b 100755
--- a/include/stp/stp_circuit.hpp
+++ b/include/stp/stp_circuit.hpp
@@ -24,7 +24,7 @@
  */
 
 /*!
-  \file stp_eigen.hpp
+  \file stp_circuit.hpp
   \brief header file for some basic STP operations
   \author Zhufei Chu
   \author Ruibing Zhang
@@ -118,10 +118,12 @@ class stp_circuit
     m_outputs.reserve(20);
   }
 
-  stp_circuit(const std::string& str)  //表达式字符串构造
+  //表达式字符串构造 f = (a & b & ~c) | (a & ~c) | (~b & ~c)
+  stp_circuit(const std::string& str)
   {
 
-  }
+  } 
+
 #pragma region create Primary I / O and node
   uint32_t create_pi(const std::string& name) 
   {
@@ -277,26 +279,27 @@ class stp_circuit
     int tt_length = 1 << inputs_num;
     matrix result(2, tt_length);
     uint32_t idx = 0;
+    std::string str = "";
+    // 0x???
     for(int i = 2; i < tt.size(); i++)
     {
-      char c = tt[i];
-      std::string str = hex2bin(c);
-      for(int i = str.size() - tt_length; i < str.size(); i++ )
+      str += hex2bin(tt[i]);
+    }    
+    for(int i = str.size() - tt_length; i < str.size(); i++ )
+    {
+      char t = str[i];
+      if(t == '0') 
       {
-        char t = str[i];
-        if(t == '0') 
-        {
-          result(0, idx) = 0;
-          result(1, idx) = 1;
-        }
-        else
-        {
-          result(0, idx) = 1;
-          result(1, idx) = 0;         
-        }
-        idx++;
+        result(0, idx) = 0;
+        result(1, idx) = 1;
       }
-    }    
+      else
+      {
+        result(0, idx) = 1;
+        result(1, idx) = 0;         
+      }
+      idx++;
+    }
     assert(idx == tt_length);
     return result;
   }
@@ -319,10 +322,10 @@ class stp_circuit
     if(c == 'd' || c == 'D') return "1101";
     if(c == 'e' || c == 'E') return "1110";
     if(c == 'f' || c == 'F') return "1111";
-    std::cout << c << std::endl;
     assert(false);
     return "";
   }
+  
 public:
   void print_circuit()
   {
@@ -356,7 +359,7 @@ class stp_circuit
   }
 
 private:
-	std::vector<node>     m_nodes;  
+	std::vector<node> m_nodes;  
 	std::vector<id> m_inputs;
 	std::vector<id> m_outputs;
 
diff --git a/include/stp/stp_dag2nmx.hpp b/include/stp/stp_dag2nmx.hpp
index abe1681..ace4a7b 100755
--- a/include/stp/stp_dag2nmx.hpp
+++ b/include/stp/stp_dag2nmx.hpp
@@ -69,7 +69,7 @@ namespace stp
 
         get_chain();
 
-        result = se.normalize_matrix( chain );
+        result = normalize_matrix( chain );
         return result;
       }
 
@@ -116,7 +116,7 @@ namespace stp
 
         chain = expr_to_chain( normal ); 
 
-        result = se.matrix_chain_multiply( chain );
+        result = matrix_chain_multiply( chain );
 
         return result;
       }
@@ -171,7 +171,7 @@ namespace stp
         }
       }
 
-      matrix get_matritx( const u_int32_t n )
+      matrix get_matritx( const uint32_t n )
       {
         if(circuit.is_pi(n)) 
         {
@@ -200,7 +200,7 @@ namespace stp
           {
             str.erase(str.begin());
             assert(std::stoi(str) == 2);
-            return se.generate_swap_matrix(2, 2);
+            return generate_swap_matrix(2, 2);
           }
         }
         return circuit.get_node(n).get_mtx();
@@ -381,7 +381,7 @@ namespace stp
         { 
           if( other_matrix[normal[i]].substr( 0, 1 ) == "I" )
           {
-            new_chain.push_back( se.kronecker_product( chain[i], chain[i+1]) );
+            new_chain.push_back( kronecker_product( chain[i], chain[i+1]) );
             i++;
           }
           else
@@ -429,7 +429,6 @@ namespace stp
 
     private:
       matrix result;
-      stp_eigen se;
       const stp_circuit& circuit;
       bool verbose;
       matrix_chain chain;
diff --git a/include/stp/stp_eigen.hpp b/include/stp/stp_eigen.hpp
index 0b41aca..48e4539 100755
--- a/include/stp/stp_eigen.hpp
+++ b/include/stp/stp_eigen.hpp
@@ -45,11 +45,6 @@ using matrix_chain = std::vector<matrix>; // Defined matrix chain
 namespace stp
 {
 
-  inline unsigned get_lcm( unsigned m, unsigned n )
-  {
-    return ( m * n ) / std::gcd( m, n );
-  }
-
   inline matrix generate_swap_matrix( const int& m, const int& n )
   {
     matrix swap_matrixXi = matrix::Zero( m * n, m * n );
@@ -93,326 +88,104 @@ namespace stp
     return KP;
   }
 
-  inline matrix semi_tensor_product( const matrix& A, const matrix& B )
-  {
-    unsigned m = A.rows();
-    unsigned n = A.cols();
-
-    unsigned p = B.rows();
-    unsigned q = B.cols();
-
-    unsigned t = get_lcm( n, p );
-
-    matrix Ia = matrix::Identity( t / n, t / n );
-    matrix Ib = matrix::Identity( t / p, t / p );
-
-    matrix KPa = kronecker_product( A, Ia );
-    matrix KPb = kronecker_product( B, Ib );
-
-    return KPa * KPb;
-  }
-
-  inline matrix semi_tensor_product2( const matrix& A, const matrix& B )
-  {
-    int m = A.rows(); 
-    int n = A.cols(); 
-
-    int p = B.rows(); 
-    int q = B.cols(); 
-
-    int row, col;
-
-    matrix result_matrix;
-
-    if (n % p == 0)
-    {
-      int times = n / p;
-      row = m;
-      col = times * q;
-      result_matrix = matrix::Zero(row, col);
-      for (int i = 0; i < q; ++i) 
-      {
-        for (int j = 0; j < p; ++j)
-        {
-          result_matrix.block(0, i * times, m, times) += B(j, i) * A.block(0, j * times, m, times);
-        }
-      }
-    }
-    else if (p % n == 0)
-    {
-      int times = p / n;
-      row = m * times;
-      col = q;
-      result_matrix = matrix::Zero(row, col);
-      for (int i = 0; i < m; ++i) //
-      {
-        for (int j = 0; j < n; ++j)
-        {
-          result_matrix.block(i * times, 0, times, q) += A(i, j) * B.block(j * times, 0, times, q);
-        }
-      }
-    }
-    else
-    {
-      std::cout << "matrix type error!" << std::endl;
-    }
-    return result_matrix;
-  }
-
-  inline matrix matrix_chain_multiply( const matrix_chain& mc )
+  enum class stp_method : uint8_t
   {
-    assert( mc.size() > 0 );
-    matrix result_matrix;
-
-    if ( mc.size() == 1 )
-    {
-      return mc[0];
-    }
-
-    result_matrix = semi_tensor_product( mc[0], mc[1] );
-
-    for ( int i = 2; i < mc.size(); i++ )
-    {
-      result_matrix = semi_tensor_product( result_matrix, mc[i] );
-    }
-    return result_matrix;
-  }
+    self_adaptation = 0,
+    definition_1 = 1,
+    definition_2 = 2,
+  };
 
-  class stp_eigen 
+  class semi_tensor_product_impl
   {
     public:
-      matrix generate_swap_matrix( const int& m, const int& n )
+      semi_tensor_product_impl( const matrix& A, const matrix& B, bool verbose, const stp_method method)
+       : A(A), B(B), verbose(verbose), method(method)
       {
-        matrix swap_matrixXi = matrix::Zero( m * n, m * n );
-        int p, q;
-        for ( int i = 0; i < m * n / 2 + 1; i++ )
-        {
-          p = i / m;
-          q = i % m;
-          int j = q * n + p;
-          swap_matrixXi( i, j ) = 1;
-          swap_matrixXi( m * n - 1 - i, m * n - 1 - j ) = 1;
-        }
-        return swap_matrixXi;
+        m = A.rows();
+        n = A.cols();
+        p = B.rows();
+        q = B.cols();
       }
-
-      matrix kronecker_product( const matrix& A, const matrix& B )
+    
+      matrix run()
       {
-        /* trivial cases */
-        auto a_dimensions = A.rows() * A.cols();
-        auto b_dimensions = B.rows() * B.cols();
-
-        if ( a_dimensions == 1u )
-        {
-          return B;
-        }
-        if ( b_dimensions == 1u )
-        {
-          return A;
-        }
-
-        stp_timer timer(true);
-
-        matrix KP( A.rows() * B.rows(), A.cols() * B.cols() );
-
-        for ( int i = 0; i < A.rows(); ++i )
+        matrix result;
+        uint64_t complexity_1 = cost1();
+        uint64_t complexity_2 = cost2();
+        switch (method)
         {
-          for ( int j = 0; j < A.cols(); ++j )
+        case stp_method::definition_1:
+          result = call_with_stopwatch( time, [&]() { return semi_tensor_product1(); } );
+          break;
+        case stp_method::definition_2:
+          result = call_with_stopwatch( time, [&]() { return semi_tensor_product2(); } );
+          break;
+        case stp_method::self_adaptation:
+          if(complexity_1 < complexity_2)
           {
-            KP.block( i * B.rows(), j * B.cols(), B.rows(), B.cols() ) = A( i, j ) * B;
+            result = call_with_stopwatch( time, [&]() { return semi_tensor_product1(); } );
           }
+          else
+          {
+            result = call_with_stopwatch( time, [&]() { return semi_tensor_product2(); } );
+          }
+          break; 
+        default:
+          break;
         }
 
-        kp_time = timer.get_elapsed_ms();
-        total_time = kp_time + stp_time;
-
-        return KP;
-      }
-    
-      matrix semi_tensor_product( const matrix& A, const matrix& B )
-      {
-        int n = A.cols(); 
-        int p = B.rows();
-        matrix result;
-
-        stp_timer timer(true);
-
-        if(std::gcd( n, p ) < 4) 
+        if(verbose)
         {
-          result = semi_tensor_product1( A, B );
+          report(result);
         }
-        else
-        {
-          result = semi_tensor_product2( A, B );
-        }
-
-        stp_time = timer.get_elapsed_ms();
-        total_time = kp_time + stp_time;
 
         return result;
       }
-      
-      matrix matrix_chain_multiply( const matrix_chain& mc )
-      {
-        assert( mc.size() > 0 );
-
-        if ( mc.size() == 1 )
-        {
-          return mc[0];
-        }
 
-        if ( mc.size() == 2 )
-        {
-          return semi_tensor_product( mc[0], mc[1] );
-        }
-
-        std::vector<int> orders = matrix_chain_order( mc );
-        return parse_multiplication_order(orders, mc);
+    private:
+      uint32_t get_lcm( uint32_t m, uint32_t n )
+      {
+        return ( m * n ) / std::gcd( m, n );
       }
 
-      matrix normalize_matrix( matrix_chain mc )
+      uint64_t cost1()
       {
-        matrix Mr( 4, 2 ); // Reduced power matrix
-        Mr << 1, 0, 0, 0, 0, 0, 0, 1;
-
-        matrix I2( 2, 2 ); // Identity matrix
-        I2 << 1, 0, 0, 1;
-
-        matrix normal_matrix;
-        int p_variable;
-        int p;
-
-        int max = 0; // the max is the number of variable
+        uint64_t t = get_lcm(n, p);
+        uint64_t kp = 2 * ((m * t / n) * t + t * (t * q / p));
+        uint64_t total = kp + ((2 + 1) * t) * ((m * t / n) * (t * q / p));
+        return total;
+      }
 
-        for ( int i = 0; i < mc.size(); i++ )
+      uint64_t cost2()
+      {
+        uint64_t total;
+        if(n % p == 0)
         {
-          if ( mc[i]( 0, 0 ) > max )
-          {
-            max = mc[i]( 0, 0 );
-          }
-        }
-
-        std::vector<int> idx( max + 1 ); // id[0] is the max of idx
-        p_variable = mc.size() - 1;
+          uint64_t times  = n / p;
+          uint64_t row = m;
+          uint64_t col = times * q;
 
-        while ( p_variable >= 0 )
+          total = (2 + 1) * (m * times) * p * q;
+        }
+        else if(p % n == 0)
         {
-          bool find_variable = false;
-          matrix& matrix = mc[p_variable];
-          int var = get_variable( matrix );
-
-          if ( var != 0 ) // 1:find a variable
-          {
-            if ( idx[var] == 0 ) // the variable appears for the first time ：end : not_end
-            {
-              idx[var] = idx[0] + 1;
-              idx[0]++;
-
-              if ( p_variable == mc.size() - 1 ) // the variable shows in the end
-              {
-                mc.pop_back();
-                p_variable--;
-                continue;
-              }
-            }
-            else // the variable appears for the not first time
-            {
-              if ( idx[var] == idx[0] )
-              {
-                find_variable = true;
-              }
-              else
-              {
-                find_variable = true;
-                mc.push_back( generate_swap_matrix( 2, 1 << ( idx[0] - idx[var] ) ) );
-
-                for ( int i = 1; i <= max; i++ )
-                {
-                  if ( idx[i] != 0 && idx[i] > idx[var] )
-                    idx[i]--;
-                }
-
-                idx[var] = idx[0];
-              }
-            }
-
-            matrix_chain mc_temp;
-            mc_temp.clear();
+          uint64_t times  = p / n;
+          uint64_t row = m * times;
+          uint64_t col = q;
 
-            for ( p = p_variable + 1; p < mc.size(); p++ )
-            {
-              mc_temp.push_back( mc[p] );
-            }
-
-            while ( p > p_variable + 1 )
-            {
-              mc.pop_back();
-              p--;
-            }
-
-            if ( mc_temp.size() > 0 )
-            {
-              mc.push_back( matrix_chain_multiply( mc_temp ) );
-            }
-
-            if ( p_variable != mc.size() - 1 )
-            {
-              mc[p_variable] = kronecker_product( I2, mc[p_variable + 1] );
-              mc.pop_back();
-            }
-
-            if ( find_variable )
-            {
-              mc.push_back( Mr );
-            }
-            continue;
-          }
-          else
-          {
-            p_variable--;
-          }
+          total = (2 + 1) * (times * q) * m * n;
         }
-
-        for ( int i = max; i > 0; i-- ) 
+        else
         {
-          mc.push_back( generate_swap_matrix( 2, pow( 2, idx[0] - idx[i] ) ) );
-
-          for ( int j = 1; j <= max; j++ )
-          {
-            if ( ( idx[j] != 0 ) && ( idx[j] > idx[i] ) )
-            {
-              idx[j]--;
-            }
-          }
-
-          idx[i] = max;
+          std::cout << "matrix type error!" << std::endl;
+          assert(false);
         }
-
-        normal_matrix = matrix_chain_multiply( mc );
-        return normal_matrix;
+        return total;
       }
 
-      void print_stats()
+      matrix semi_tensor_product1()
       {
-        std::cout << "kronecker   product: " << kp_time << "ms" <<  std::endl;
-        std::cout << "semi_tensor product: " << stp_time << "ms" <<  std::endl;
-        std::cout << "              total: " << total_time << "ms" <<  std::endl;
-      }
-
-    private:
-      unsigned get_lcm( unsigned m, unsigned n )
-      {
-        return ( m * n ) / std::gcd( m, n );
-      }
-
-      // the method used to implement semi tensor product
-      matrix semi_tensor_product1( const matrix& A, const matrix& B )
-      {
-        unsigned m = A.rows();
-        unsigned n = A.cols();
-
-        unsigned p = B.rows();
-        unsigned q = B.cols();
+        cost = cost1();
 
         unsigned t = get_lcm( n, p );
 
@@ -425,18 +198,10 @@ namespace stp
         return KPa * KPb;
       }
 
-      matrix semi_tensor_product2( const matrix& A, const matrix& B )
+      matrix semi_tensor_product2()
       {
-        int m = A.rows(); 
-        int n = A.cols(); 
-
-        int p = B.rows(); 
-        int q = B.cols(); 
-
         int row, col;
-
         matrix result_matrix;
-
         if (n % p == 0)
         {
           int times = n / p;
@@ -470,18 +235,128 @@ namespace stp
           std::cout << "matrix type error!" << std::endl;
           assert(false);
         }
+        cost = cost2();
         return result_matrix;
       }
 
-      // the method used to implement matrix chain multiply
-      std::vector<int> matrix_chain_order(const matrix_chain& mc)
+      void report(const matrix& result)
+      {
+        std::cout << "----------------------------------------------------------\n";
+        std::cout << "Dimension A: " << m << " x " << n << "\n";
+        std::cout << "Dimension B: " << p << " x " << q << "\n";
+        if(method == stp_method::definition_1)  
+        {
+          std::cout << "Mandatory use the method defined 1!\n";
+        }
+        else if(method == stp_method::definition_2)  
+        {
+          std::cout << "Mandatory use the method defined 2!\n";
+        }
+        else 
+        {
+          if(cost1() < cost2())
+          {
+            std::cout << "Use the method defined 1!\n";
+          }
+          else
+          {
+            std::cout << "Use the method defined 2!\n";
+          }
+        }
+        std::cout << "Dimension result: " << result.rows() << " x " << result.cols() << "\n";
+        std::cout << "total time: " << to_millisecond( time ) << "ms\n";
+        std::cout << "cost: " << cost << "\n";
+        std::cout << "ratio: " << 100000000 * to_millisecond( time ) / cost << "\n";
+        std::cout << "----------------------------------------------------------\n";
+      }
+
+    private:
+      const matrix& A;
+      const matrix& B;
+      const bool verbose {false};
+      const stp_method method;
+      uint32_t m{0u}, n{0u}, p{0u}, q{0u};
+      stopwatch<>::duration time{0};
+      uint64_t cost{0u};
+  };
+
+  /*! \brief Compute the semi-tensor product of matrix A and matrix B.
+    Example:
+      matrix A, B;
+      matrix result = semi_tensor_product(A, B);
+  */
+  inline matrix semi_tensor_product( const matrix& A, const matrix& B, const bool verbose = false, const stp_method method = stp_method::self_adaptation)
+  {
+    semi_tensor_product_impl stp_impl( A, B, verbose, method);
+    return stp_impl.run();
+  }
+
+  enum class mc_multiply_method : uint8_t
+  {
+    sequence = 0,
+    dynamic_programming = 1
+  };
+
+  class matrix_chain_multiply_impl
+  {
+    public:
+      matrix_chain_multiply_impl(const matrix_chain& mc, bool verbose, const mc_multiply_method method)
+      : mc(mc), verbose(verbose), method(method)
+      {
+        assert( mc.size() > 0 );
+      }
+
+      matrix run()
+      { 
+        switch (method)
+        {
+        case mc_multiply_method::sequence:
+          matrix_chain_multiply();
+          break;
+        case mc_multiply_method::dynamic_programming:
+          {
+            orders = matrix_chain_order();
+            result = parse_multiplication_order(orders);
+          }
+          break;       
+        default:
+          break;
+        }
+
+        if(verbose)
+        {
+          print();
+        }
+
+        return result;
+      }
+
+    private:
+      void matrix_chain_multiply()
+      {
+        if ( mc.size() == 1 )
+        {
+          result = mc[0];
+        }
+
+        total_cost += complexity_analysis(mc[0].rows(), mc[0].cols(), mc[1].rows(), mc[1].cols())[0];
+        result = call_with_stopwatch( time, [&]() { return semi_tensor_product( mc[0], mc[1] ); } );
+
+        for ( int i = 2; i < mc.size(); i++ )
+        {
+          total_cost += complexity_analysis(result.rows(), result.cols(), mc[i].rows(), mc[i].cols())[0];
+          result = call_with_stopwatch( time, [&]() { return semi_tensor_product( result, mc[i] ); } );
+        }
+      }
+
+      std::vector<int> matrix_chain_order()
       {
         struct mc_info
         {
-          uint32_t op_num = 0;  // Records the total complexity of the current operation
-          uint32_t flag   = 0;  // Record the matrix multiplication sequence
-          uint32_t row    = 0; 
-          uint32_t col    = 0;  
+          uint64_t op_num = 0;  // Records the total complexity of the current operation
+          uint64_t flag   = 0;  // Record the matrix multiplication sequence
+          uint64_t row    = 0; 
+          uint64_t col    = 0;  
         };
         
         int length = mc.size();
@@ -493,17 +368,17 @@ namespace stp
           dp[i][i].col = mc[i].cols();
         }
 
-        for (int l = 2; l <= length; l++) /* 矩阵链的长度 */
+        for (int l = 2; l <= length; l++) /* The length of the matrix chain */
         {
           for (int i = 0; i < length - l + 1; i++)
           {
             int j = i + l - 1; 
             dp[i][j].op_num = INT_MAX;
-            std::vector<int> temp;
+            std::vector<uint64_t> temp;
             for (int k = i; k < j; k++)
             {
               temp = complexity_analysis(dp[i][k].row, dp[i][k].col, dp[k+1][j].row,  dp[k+1][j].col);
-              int q = dp[i][k].op_num + dp[k+1][j].op_num + temp[0];
+              uint64_t q = dp[i][k].op_num + dp[k+1][j].op_num + temp[0];
               if (q < dp[i][j].op_num)
               {
                 dp[i][j].op_num = q;
@@ -516,33 +391,41 @@ namespace stp
         }
         
         // Parse the Dynamic programming table
-        std::vector<int> result;
+        std::vector<int> rule;
         std::vector<std::vector<int>> flags(length, std::vector<int>(length));
         for(int i = 0; i < length; i++)
           for(int j = 0; j < length; j++)
             flags[i][j] = dp[i][j].flag;
-        optimal_parens(flags, 0, length - 1, result);
-        return result;
+        optimal_parens(flags, 0, length - 1, rule);
+        return rule;
       }
 
-      std::vector<int> complexity_analysis(int m, int n, int p, int q)
+      std::vector<uint64_t> complexity_analysis(int m, int n, int p, int q)
       {
         /*
           temp[0] operational complexity
           temp[1] rows
           temp[2] cols
         */ 
-        std::vector<int> temp(3, 0);
+        std::vector<uint64_t> temp(3, 0u);
         if (n % p == 0)
         {
-          temp[0] = n * q / p;   
+          uint64_t times  = n / p;
+          uint64_t row = m;
+          uint64_t col = times * q;
+          // temp[0] = n * q / p;   
+          temp[0] = (2 + 1) * (m * times) * p * q;
           temp[1] = m;
           temp[2] = n * q / p;
           return temp;
         }
         else if (p % n == 0)
         {
-          temp[0] = q;
+          uint64_t times  = p / n;
+          uint64_t row = m * times;
+          uint64_t col = q;
+          // temp[0] = q;
+          temp[0] = (2 + 1) * (times * q) * m * n;
           temp[1] = m * p / n;
           temp[2] = q;
           return temp;
@@ -554,24 +437,24 @@ namespace stp
         return temp;
       }
 
-      void optimal_parens(const std::vector<std::vector<int>> &s, int i, int j, std::vector<int> &result)
+      void optimal_parens(const std::vector<std::vector<int>> &s, int i, int j, std::vector<int> &rule)
       {
         if (i == j)
-            result.push_back(i);
+          rule.push_back(i);
         else
         {
-          result.push_back(-1);
-          optimal_parens(s, i, s[i][j], result);
-          optimal_parens(s, s[i][j] + 1, j, result);
-          result.push_back(-2);
+          rule.push_back(-1);
+          optimal_parens(s, i, s[i][j], rule);
+          optimal_parens(s, s[i][j] + 1, j, rule);
+          rule.push_back(-2);
         }
       }
 
-      matrix parse_multiplication_order(std::vector<int>& order, const std::vector<matrix>& mc)
+      matrix parse_multiplication_order(std::vector<int> order)
       {
         std::vector<matrix> stacks;
         std::vector<int> idx;
-        for (unsigned int i = 0; i < order.size(); ++i)
+        for (int i = 0; i < order.size(); ++i)
         {
           if (order[i] == -1)
             idx.push_back(i);
@@ -579,7 +462,10 @@ namespace stp
           {
             if (stacks.size() >= 2)
             {
-              stacks[stacks.size() - 2] = semi_tensor_product(stacks[stacks.size() - 2], stacks[stacks.size() - 1]);
+              matrix& A = stacks[stacks.size() - 2];
+              matrix& B = stacks[stacks.size() - 1];
+              total_cost += complexity_analysis(A.rows(), A.cols(), B.rows(), B.cols())[0];
+              A = call_with_stopwatch( time, [&]() {  return semi_tensor_product(A, B);  });
               stacks.pop_back();
             }
             order.erase(order.begin() + idx.back(), order.begin() + i);
@@ -593,28 +479,42 @@ namespace stp
         return stacks[0];
       }
 
-      int get_variable( const matrix& mat )
+      void print()
       {
-        if( is_variable( mat ) )
-        {
-          return mat( 0, 0 );
-        }
-        else
+        if(method == mc_multiply_method::dynamic_programming)
         {
-          return 0;
+          std::cout << "use dynamic_programming!\n";
+          for(int t : orders)
+          {
+            if(t == -1)        std::cout << "(";
+            else if(t == -2)   std::cout << ")";
+            else               std::cout << "M" << t;
+          }
+          std::cout << "\n";
         }
+        else  
+          std::cout << "use sequence\n";
+        std::cout << "total time: " << to_millisecond( time ) << "ms\n";
+        std::cout << "total cost: " << total_cost << "\n";
+        std::cout << "1000000 * (times/total_cost) = " << 1000000 * to_millisecond( time ) / total_cost << "\n";
       }
 
-      bool is_variable( const matrix& mat )
-      {
-        return mat.rows() == 2 && mat.cols() == 1;
-      }
-      
-    private:
-      uint64_t stp_time = 0u;
-      uint64_t kp_time = 0u;
-      uint64_t total_time = 0;
+    private:  
+      const bool verbose {false};
+      const mc_multiply_method method {0u};
+      const matrix_chain& mc;
+      matrix result;
+      std::vector<int> orders;
+      stopwatch<>::duration time{0};
+      uint64_t total_cost{0u};
   };
 
+  /**/
+  inline matrix matrix_chain_multiply( const matrix_chain& mc, const bool verbose = false, const mc_multiply_method method = mc_multiply_method::dynamic_programming)
+  {
+    matrix_chain_multiply_impl mcm_impl(mc, verbose, method);
+    return mcm_impl.run();
+  }
+
 } // namespace stp
 
diff --git a/include/stp/stp_logic_expr.hpp b/include/stp/stp_logic_expr.hpp
index 7659635..ec196a6 100755
--- a/include/stp/stp_logic_expr.hpp
+++ b/include/stp/stp_logic_expr.hpp
@@ -78,11 +78,7 @@ namespace stp
           
           auto mc = expr_to_chain( normal );
 
-          matrix result = se.matrix_chain_multiply( mc );
-          if( verbose )
-          {
-            se.print_stats();
-          }
+          matrix result = matrix_chain_multiply( mc );
           
           return result; 
         }
@@ -212,7 +208,7 @@ namespace stp
           }
           else if( token == "W2" )
           {
-            chain.push_back( se.generate_swap_matrix( 2, 2 ) );
+            chain.push_back( generate_swap_matrix( 2, 2 ) );
           }
           else if( token == "PR2" )
           {
@@ -231,7 +227,7 @@ namespace stp
         {
           if( normal[i].substr( 0, 1 ) == "I" )
           {
-            new_chain.push_back( se.kronecker_product( chain[i], chain[i+1] ) );
+            new_chain.push_back( kronecker_product( chain[i], chain[i+1] ) );
             i++;
           }
           else
@@ -424,7 +420,6 @@ namespace stp
       }
 
     private:
-      stp_eigen se;
       std::string expr;
       std::vector<std::string> all_tokens;
       std::vector<std::string> input_names; //the name of variables
diff --git a/include/stp/stp_simulator.hpp b/include/stp/stp_simulator.hpp
index 310eed8..faaa202 100755
--- a/include/stp/stp_simulator.hpp
+++ b/include/stp/stp_simulator.hpp
@@ -46,12 +46,12 @@ class stp_simulator
 public:
   stp_simulator(const stp_circuit& circuit) : circuit(circuit) {} 
 
-  //full emulation, obtaining a truth table represented by a string
+  // full emulation, obtaining a truth table represented by a string
   std::string run()
   {
     init();
     sim(circuit.get_outputs()[0]);
-    // print_info();
+    print_info();
     return get_tt();
   }
 
@@ -155,6 +155,13 @@ class stp_simulator
     }
   }
   
+  void reset()
+  {
+    patterns_num = 0u;
+    sim_info.resize(circuit.get_nodes().size(), std::vector<uint8_t>());
+    had_sim.resize(circuit.get_nodes().size(), false);
+  }
+
 private:
   const stp_circuit& circuit;
   std::vector<std::vector<uint8_t>> sim_info;
diff --git a/include/stp/stp_timer.hpp b/include/stp/stp_timer.hpp
index f21ff53..dfa40db 100755
--- a/include/stp/stp_timer.hpp
+++ b/include/stp/stp_timer.hpp
@@ -12,7 +12,7 @@
  *
  * The above copyright notice and this permission notice shall be
  * included in all copies or substantial portions of the Software.
- *
+ * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -33,32 +33,78 @@
 #pragma once 
 
 #include <chrono>
+#include <iostream>
+#include <type_traits>
+#include <fmt/format.h>
 
-class stp_timer
+template<class Clock = std::chrono::steady_clock>
+class stopwatch
 {
 public:
-  stp_timer(bool do_start = false)
-  {
-    if (do_start) start();
-  }
+  using clock = Clock;
+  using duration = typename Clock::duration;
+  using time_point = typename Clock::time_point;
 
-  void start()
+  /*! \brief Default constructor.
+   *
+   * Starts tracking time.
+   */
+  explicit stopwatch( duration& dur )
+      : dur( dur ),
+        beg( clock::now() )
   {
-    m_start_point = std::chrono::high_resolution_clock::now();
   }
 
-  uint64_t get_elapsed_ms()
-  {
-    auto now = std::chrono::high_resolution_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(now - m_start_point).count();
-  }
-  
-  uint64_t get_elapsed_us()
+  /*! \brief Default deconstructor.
+   *
+   * Stops tracking time and updates duration.
+   */
+  ~stopwatch()
   {
-    auto now = std::chrono::high_resolution_clock::now();
-    return std::chrono::duration_cast<std::chrono::microseconds>(now - m_start_point).count();
+    dur += ( clock::now() - beg );
   }
 
 private:
-  std::chrono::time_point<std::chrono::high_resolution_clock> m_start_point;
-};
\ No newline at end of file
+  duration& dur;
+  time_point beg;
+};
+
+/*! \brief Calls a function and tracks time.
+ *
+ * The function that is passed as second parameter can be any callable object
+ * that takes no parameters.  This construction can be used to avoid
+ * pre-declaring the result type of a computation that should be tracked.
+ *
+   \verbatim embed:rst
+
+   Example
+
+   .. code-block:: c++
+
+      stopwatch<>::duration time{0};
+
+      auto result = call_with_stopwatch( time, [&]() { return function( parameters ); } );
+   \endverbatim
+ *
+ * \param dur Duration reference (time will be added to it)
+ * \param fn Callable object with no arguments
+ */
+
+template<class Fn, class Clock = std::chrono::steady_clock>
+std::invoke_result_t<Fn> call_with_stopwatch( typename Clock::duration& dur, Fn&& fn )
+{
+  stopwatch<Clock> t( dur );
+  return fn();
+}
+
+template<class Duration>
+inline double to_seconds( Duration const& dur )
+{
+  return std::chrono::duration_cast<std::chrono::duration<double>>( dur ).count();
+}
+
+template<class Duration>
+inline double to_millisecond( Duration const& dur )
+{
+  return 1000 * std::chrono::duration_cast<std::chrono::duration<double>>( dur ).count();
+}
\ No newline at end of file
diff --git a/include/stp/stp_utils.hpp b/include/stp/stp_utils.hpp
index 8aed4de..417e933 100755
--- a/include/stp/stp_utils.hpp
+++ b/include/stp/stp_utils.hpp
@@ -8,11 +8,17 @@
 #include <vector>
 #include <unordered_map>
 #include <bitset>
+#include <numeric>
 
 using matrix = Eigen::MatrixXi;           // Defines the type of matrix to use
 
 namespace stp
 {
+  inline unsigned get_lcm( unsigned m, unsigned n )
+  {
+    return ( m * n ) / std::gcd( m, n );
+  }
+
   inline void print_strings( const std::vector<std::string>& inputs )
   {
     for( auto s : inputs )