[Tensornet] Update observe calculation (#2522)

* Add non path reuse option Signed-off-by: Thien Nguyen <thiennguyen@nvidia.com> * Add docs and test Signed-off-by: Thien Nguyen <thiennguyen@nvidia.com> * Code review: add a note about observe with tensornet and update code comments Signed-off-by: Thien Nguyen <thiennguyen@nvidia.com> --------- Signed-off-by: Thien Nguyen <thiennguyen@nvidia.com>
NVIDIA · Jan 24, 2025 · db48d95 · db48d95
1 parent 19c33af
commit db48d95
Show file tree

Hide file tree

Showing 7 changed files with 132 additions and 3 deletions.
diff --git a/docs/sphinx/using/backends/simulators.rst b/docs/sphinx/using/backends/simulators.rst
@@ -482,13 +482,26 @@ Specific aspects of the simulation can be configured by setting the following of
 * **`OMP_PLACES=cores`**: Set this environment variable to improve CPU parallelization.
 * **`OMP_NUM_THREADS=X`**: To enable CPU parallelization, set X to `NUMBER_OF_CORES_PER_NODE/NUMBER_OF_GPUS_PER_NODE`.
 * **`CUDAQ_TENSORNET_CONTROLLED_RANK=X`**: Specify the number of controlled qubits whereby the full tensor body of the controlled gate is expanded. If the number of controlled qubits is greater than this value, the gate is applied as a controlled tensor operator to the tensor network state. Default value is 1.
+* **`CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=X`**: Set this environment variable to `TRUE` (`ON`) or `FALSE` (`OFF`) to enable or disable contraction path reuse when computing expectation values. Default is `OFF`.
 
 .. note:: 
 
   This backend requires an NVIDIA GPU and CUDA runtime libraries. 
   If you do not have these dependencies installed, you may encounter an error stating `Invalid simulator requested`. 
   See the section :ref:`dependencies-and-compatibility` for more information about how to install dependencies.
 
+.. note:: 
+
+  When using contraction path reuse (`CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=TRUE`), :code:`tensornet` backends perform a single contraction path optimization with an opaque spin operator term. This path is then used to contract all the actual terms in the spin operator, hence saving the path finding time.
+
+  As we use an opaque spin operator term as a placeholder for contraction path optimization, the resulting contraction path is not as optimal as if the actual spin operator is used.
+  For instance, if the spin operator is sparse (only acting on a few qubits), the contraction can be significantly simplified.  
+
+.. note:: 
+
+  :code:`tensornet` backends only return the overall expectation value for a :class:`cudaq.SpinOperator` when using the `cudaq::observe` method. 
+  Term-by-term expectation values will not be available in the resulting `ObserveResult` object.
+  If needed, these values can be computed by calling `cudaq::observe` on individual terms instead.  
 
 Matrix product state 
 +++++++++++++++++++++++++++++++++++

diff --git a/runtime/nvqir/cutensornet/simulator_cutensornet.cpp b/runtime/nvqir/cutensornet/simulator_cutensornet.cpp
@@ -24,6 +24,10 @@ SimulatorTensorNetBase::SimulatorTensorNetBase()
   HANDLE_CUTN_ERROR(cutensornetCreate(&m_cutnHandle));
   // The scratch pad must be allocated after we have selected the device.
   scratchPad.allocate();
+
+  // Check whether observe path reuse is enabled.
+  m_reuseContractionPathObserve =
+      cudaq::getEnvBool("CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE", false);
 }
 
 static std::vector<std::complex<double>>
@@ -277,6 +281,17 @@ cudaq::observe_result
 SimulatorTensorNetBase::observe(const cudaq::spin_op &ham) {
   LOG_API_TIME();
   prepareQubitTensorState();
+  if (!m_reuseContractionPathObserve) {
+    // If contraction path reuse is disabled, convert spin_op to
+    // cutensornetNetworkOperator_t and compute the expectation value.
+    TensorNetworkSpinOp spinOp(ham, m_cutnHandle);
+    std::complex<double> expVal =
+        m_state->computeExpVal(spinOp.getNetworkOperator());
+    expVal += spinOp.getIdentityTermOffset();
+    return cudaq::observe_result(expVal.real(), ham,
+                                 cudaq::sample_result(cudaq::ExecutionResult(
+                                     {}, ham.to_string(false), expVal.real())));
+  }
 
   std::vector<std::string> termStrs;
   std::vector<cudaq::spin_op::spin_op_term> terms;

diff --git a/runtime/nvqir/cutensornet/simulator_cutensornet.h b/runtime/nvqir/cutensornet/simulator_cutensornet.h
@@ -103,6 +103,14 @@ class SimulatorTensorNetBase : public nvqir::CircuitSimulatorBase<double> {
   // cutensornetStateApplyControlledTensorOperator). Tensornet supports
   // arbitrary values.
   std::size_t m_maxControlledRankForFullTensorExpansion = 1;
+
+  // Flag to enable contraction path reuse when computing the expectation value
+  // (observe).
+  //   Default is off (no contraction path reuse).
+  //   Reusing the path, while saving the path finding time, prevents lightcone
+  //   simplification, e.g., when the spin op is sparse (only acting on a few
+  //   qubits).
+  bool m_reuseContractionPathObserve = false;
 };
 
 } // end namespace nvqir
diff --git a/runtime/nvqir/cutensornet/tensornet_state.cpp b/runtime/nvqir/cutensornet/tensornet_state.cpp
@@ -681,15 +681,14 @@ std::vector<std::complex<double>> TensorNetState::computeExpVals(
                                    placeHolderArraySize,
                                    cudaMemcpyHostToDevice));
       std::complex<double> expVal;
-      std::complex<double> stateNorm{0.0, 0.0};
       {
         ScopedTraceWithContext("cutensornetExpectationCompute");
         HANDLE_CUTN_ERROR(cutensornetExpectationCompute(
             m_cutnHandle, tensorNetworkExpectation, workDesc, &expVal,
-            static_cast<void *>(&stateNorm),
+            /*stateNorm*/ nullptr,
             /*cudaStream*/ 0));
       }
-      allExpVals.emplace_back(expVal / std::abs(stateNorm));
+      allExpVals.emplace_back(expVal);
     }
   }
 
@@ -699,6 +698,69 @@ std::vector<std::complex<double>> TensorNetState::computeExpVals(
   return allExpVals;
 }
 
+std::complex<double> TensorNetState::computeExpVal(
+    cutensornetNetworkOperator_t tensorNetworkOperator) {
+  LOG_API_TIME();
+  cutensornetStateExpectation_t tensorNetworkExpectation;
+  // Step 1: create
+  {
+    ScopedTraceWithContext("cutensornetCreateExpectation");
+    HANDLE_CUTN_ERROR(cutensornetCreateExpectation(m_cutnHandle, m_quantumState,
+                                                   tensorNetworkOperator,
+                                                   &tensorNetworkExpectation));
+  }
+  // Step 2: configure
+  const int32_t numHyperSamples =
+      8; // desired number of hyper samples used in the tensor network
+         // contraction path finder
+  {
+    ScopedTraceWithContext("cutensornetExpectationConfigure");
+    HANDLE_CUTN_ERROR(cutensornetExpectationConfigure(
+        m_cutnHandle, tensorNetworkExpectation,
+        CUTENSORNET_EXPECTATION_OPT_NUM_HYPER_SAMPLES, &numHyperSamples,
+        sizeof(numHyperSamples)));
+  }
+
+  // Step 3: Prepare
+  cutensornetWorkspaceDescriptor_t workDesc;
+  HANDLE_CUTN_ERROR(
+      cutensornetCreateWorkspaceDescriptor(m_cutnHandle, &workDesc));
+  {
+    ScopedTraceWithContext("cutensornetExpectationPrepare");
+    HANDLE_CUTN_ERROR(cutensornetExpectationPrepare(
+        m_cutnHandle, tensorNetworkExpectation, scratchPad.scratchSize,
+        workDesc, /*cudaStream*/ 0));
+  }
+
+  // Attach the workspace buffer
+  int64_t worksize{0};
+  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(
+      m_cutnHandle, workDesc, CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
+      CUTENSORNET_MEMSPACE_DEVICE, CUTENSORNET_WORKSPACE_SCRATCH, &worksize));
+  if (worksize <= static_cast<int64_t>(scratchPad.scratchSize)) {
+    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(
+        m_cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
+        CUTENSORNET_WORKSPACE_SCRATCH, scratchPad.d_scratch, worksize));
+  } else {
+    throw std::runtime_error("ERROR: Insufficient workspace size on Device!");
+  }
+
+  // Step 4: Compute
+  std::complex<double> expVal;
+
+  {
+    ScopedTraceWithContext("cutensornetExpectationCompute");
+    HANDLE_CUTN_ERROR(cutensornetExpectationCompute(
+        m_cutnHandle, tensorNetworkExpectation, workDesc, &expVal,
+        /*stateNorm*/ nullptr,
+        /*cudaStream*/ 0));
+  }
+  // Step 5: clean up
+  HANDLE_CUTN_ERROR(cutensornetDestroyExpectation(tensorNetworkExpectation));
+  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
+  return expVal;
+}
+
 std::unique_ptr<TensorNetState> TensorNetState::createFromMpsTensors(
     const std::vector<MPSTensor> &in_mpsTensors, ScratchDeviceMem &inScratchPad,
     cutensornetHandle_t handle, std::mt19937 &randomEngine) {

diff --git a/runtime/nvqir/cutensornet/tensornet_state.h b/runtime/nvqir/cutensornet/tensornet_state.h
@@ -151,6 +151,11 @@ class TensorNetState {
   std::vector<std::complex<double>>
   computeExpVals(const std::vector<std::vector<bool>> &symplecticRepr);
 
+  /// @brief Evaluate the expectation value of a given
+  /// `cutensornetNetworkOperator_t`
+  std::complex<double>
+  computeExpVal(cutensornetNetworkOperator_t tensorNetworkOperator);
+
   /// @brief Number of qubits that this state represents.
   std::size_t getNumQubits() const { return m_numQubits; }
 

diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
@@ -221,6 +221,28 @@ if(TARGET nvqir-tensornet)
       endif() # NGPUS
     endif() # NVIDIA_SMI
   endif() # MPI_CXX_FOUND
+
+  # Test CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=ON mode (on a few test cases that have cudaq::observe)
+  add_executable(test_tensornet_observe_path_reuse 
+    integration/builder_tester.cpp  
+    integration/deuteron_variational_tester.cpp
+    integration/observe_result_tester.cpp
+  )
+  target_include_directories(test_tensornet_observe_path_reuse PRIVATE .)
+  target_compile_definitions(test_tensornet_observe_path_reuse 
+                             PRIVATE -DNVQIR_BACKEND_NAME=tensornet)
+  if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT APPLE)
+    target_link_options(test_tensornet_observe_path_reuse PRIVATE -Wl,--no-as-needed)
+  endif()
+  target_link_libraries(test_tensornet_observe_path_reuse
+    PRIVATE 
+    cudaq
+    cudaq-builder
+    cudaq-platform-default
+    nvqir-tensornet
+    gtest_main)
+  # Run this test with "CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=TRUE"
+  gtest_discover_tests(test_tensornet_observe_path_reuse TEST_SUFFIX _PathReuse PROPERTIES ENVIRONMENT "CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=ON" PROPERTIES LABELS "gpu_required")
 endif() 
 
 # Create an executable for SpinOp UnitTests

diff --git a/unittests/integration/observe_result_tester.cpp b/unittests/integration/observe_result_tester.cpp
@@ -83,6 +83,9 @@ CUDAQ_TEST(ObserveResult, checkSimple) {
   EXPECT_TRUE(x0x1Counts.size() == 4);
 }
 
+// By default, tensornet backends only compute the overall expectation value in
+// observe, i.e., no sub-term calculations.
+#ifndef CUDAQ_BACKEND_TENSORNET
 CUDAQ_TEST(ObserveResult, checkExpValBug) {
 
   auto kernel = []() __qpu__ {
@@ -112,3 +115,4 @@ CUDAQ_TEST(ObserveResult, checkExpValBug) {
   EXPECT_NEAR(exp, .79, 1e-1);
 }
 #endif
+#endif