Skip to content

Commit

Permalink
[Tensornet] Update observe calculation (#2522)
Browse files Browse the repository at this point in the history
* Add non path reuse option

Signed-off-by: Thien Nguyen <thiennguyen@nvidia.com>

* Add docs and test

Signed-off-by: Thien Nguyen <thiennguyen@nvidia.com>

* Code review: add a note about observe with tensornet and update code comments

Signed-off-by: Thien Nguyen <thiennguyen@nvidia.com>

---------

Signed-off-by: Thien Nguyen <thiennguyen@nvidia.com>
  • Loading branch information
1tnguyen authored Jan 24, 2025
1 parent 19c33af commit db48d95
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 3 deletions.
13 changes: 13 additions & 0 deletions docs/sphinx/using/backends/simulators.rst
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,26 @@ Specific aspects of the simulation can be configured by setting the following of
* **`OMP_PLACES=cores`**: Set this environment variable to improve CPU parallelization.
* **`OMP_NUM_THREADS=X`**: To enable CPU parallelization, set X to `NUMBER_OF_CORES_PER_NODE/NUMBER_OF_GPUS_PER_NODE`.
* **`CUDAQ_TENSORNET_CONTROLLED_RANK=X`**: Specify the number of controlled qubits whereby the full tensor body of the controlled gate is expanded. If the number of controlled qubits is greater than this value, the gate is applied as a controlled tensor operator to the tensor network state. Default value is 1.
* **`CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=X`**: Set this environment variable to `TRUE` (`ON`) or `FALSE` (`OFF`) to enable or disable contraction path reuse when computing expectation values. Default is `OFF`.
.. note::
This backend requires an NVIDIA GPU and CUDA runtime libraries.
If you do not have these dependencies installed, you may encounter an error stating `Invalid simulator requested`.
See the section :ref:`dependencies-and-compatibility` for more information about how to install dependencies.
.. note::
When using contraction path reuse (`CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=TRUE`), :code:`tensornet` backends perform a single contraction path optimization with an opaque spin operator term. This path is then used to contract all the actual terms in the spin operator, hence saving the path finding time.
As we use an opaque spin operator term as a placeholder for contraction path optimization, the resulting contraction path is not as optimal as if the actual spin operator is used.
For instance, if the spin operator is sparse (only acting on a few qubits), the contraction can be significantly simplified.
.. note::
:code:`tensornet` backends only return the overall expectation value for a :class:`cudaq.SpinOperator` when using the `cudaq::observe` method.
Term-by-term expectation values will not be available in the resulting `ObserveResult` object.
If needed, these values can be computed by calling `cudaq::observe` on individual terms instead.
Matrix product state
+++++++++++++++++++++++++++++++++++
Expand Down
15 changes: 15 additions & 0 deletions runtime/nvqir/cutensornet/simulator_cutensornet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ SimulatorTensorNetBase::SimulatorTensorNetBase()
HANDLE_CUTN_ERROR(cutensornetCreate(&m_cutnHandle));
// The scratch pad must be allocated after we have selected the device.
scratchPad.allocate();

// Check whether observe path reuse is enabled.
m_reuseContractionPathObserve =
cudaq::getEnvBool("CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE", false);
}

static std::vector<std::complex<double>>
Expand Down Expand Up @@ -277,6 +281,17 @@ cudaq::observe_result
SimulatorTensorNetBase::observe(const cudaq::spin_op &ham) {
LOG_API_TIME();
prepareQubitTensorState();
if (!m_reuseContractionPathObserve) {
// If contraction path reuse is disabled, convert spin_op to
// cutensornetNetworkOperator_t and compute the expectation value.
TensorNetworkSpinOp spinOp(ham, m_cutnHandle);
std::complex<double> expVal =
m_state->computeExpVal(spinOp.getNetworkOperator());
expVal += spinOp.getIdentityTermOffset();
return cudaq::observe_result(expVal.real(), ham,
cudaq::sample_result(cudaq::ExecutionResult(
{}, ham.to_string(false), expVal.real())));
}

std::vector<std::string> termStrs;
std::vector<cudaq::spin_op::spin_op_term> terms;
Expand Down
8 changes: 8 additions & 0 deletions runtime/nvqir/cutensornet/simulator_cutensornet.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ class SimulatorTensorNetBase : public nvqir::CircuitSimulatorBase<double> {
// cutensornetStateApplyControlledTensorOperator). Tensornet supports
// arbitrary values.
std::size_t m_maxControlledRankForFullTensorExpansion = 1;

// Flag to enable contraction path reuse when computing the expectation value
// (observe).
// Default is off (no contraction path reuse).
// Reusing the path, while saving the path finding time, prevents lightcone
// simplification, e.g., when the spin op is sparse (only acting on a few
// qubits).
bool m_reuseContractionPathObserve = false;
};

} // end namespace nvqir
68 changes: 65 additions & 3 deletions runtime/nvqir/cutensornet/tensornet_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -681,15 +681,14 @@ std::vector<std::complex<double>> TensorNetState::computeExpVals(
placeHolderArraySize,
cudaMemcpyHostToDevice));
std::complex<double> expVal;
std::complex<double> stateNorm{0.0, 0.0};
{
ScopedTraceWithContext("cutensornetExpectationCompute");
HANDLE_CUTN_ERROR(cutensornetExpectationCompute(
m_cutnHandle, tensorNetworkExpectation, workDesc, &expVal,
static_cast<void *>(&stateNorm),
/*stateNorm*/ nullptr,
/*cudaStream*/ 0));
}
allExpVals.emplace_back(expVal / std::abs(stateNorm));
allExpVals.emplace_back(expVal);
}
}

Expand All @@ -699,6 +698,69 @@ std::vector<std::complex<double>> TensorNetState::computeExpVals(
return allExpVals;
}

std::complex<double> TensorNetState::computeExpVal(
cutensornetNetworkOperator_t tensorNetworkOperator) {
LOG_API_TIME();
cutensornetStateExpectation_t tensorNetworkExpectation;
// Step 1: create
{
ScopedTraceWithContext("cutensornetCreateExpectation");
HANDLE_CUTN_ERROR(cutensornetCreateExpectation(m_cutnHandle, m_quantumState,
tensorNetworkOperator,
&tensorNetworkExpectation));
}
// Step 2: configure
const int32_t numHyperSamples =
8; // desired number of hyper samples used in the tensor network
// contraction path finder
{
ScopedTraceWithContext("cutensornetExpectationConfigure");
HANDLE_CUTN_ERROR(cutensornetExpectationConfigure(
m_cutnHandle, tensorNetworkExpectation,
CUTENSORNET_EXPECTATION_OPT_NUM_HYPER_SAMPLES, &numHyperSamples,
sizeof(numHyperSamples)));
}

// Step 3: Prepare
cutensornetWorkspaceDescriptor_t workDesc;
HANDLE_CUTN_ERROR(
cutensornetCreateWorkspaceDescriptor(m_cutnHandle, &workDesc));
{
ScopedTraceWithContext("cutensornetExpectationPrepare");
HANDLE_CUTN_ERROR(cutensornetExpectationPrepare(
m_cutnHandle, tensorNetworkExpectation, scratchPad.scratchSize,
workDesc, /*cudaStream*/ 0));
}

// Attach the workspace buffer
int64_t worksize{0};
HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(
m_cutnHandle, workDesc, CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
CUTENSORNET_MEMSPACE_DEVICE, CUTENSORNET_WORKSPACE_SCRATCH, &worksize));
if (worksize <= static_cast<int64_t>(scratchPad.scratchSize)) {
HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(
m_cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
CUTENSORNET_WORKSPACE_SCRATCH, scratchPad.d_scratch, worksize));
} else {
throw std::runtime_error("ERROR: Insufficient workspace size on Device!");
}

// Step 4: Compute
std::complex<double> expVal;

{
ScopedTraceWithContext("cutensornetExpectationCompute");
HANDLE_CUTN_ERROR(cutensornetExpectationCompute(
m_cutnHandle, tensorNetworkExpectation, workDesc, &expVal,
/*stateNorm*/ nullptr,
/*cudaStream*/ 0));
}
// Step 5: clean up
HANDLE_CUTN_ERROR(cutensornetDestroyExpectation(tensorNetworkExpectation));
HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
return expVal;
}

std::unique_ptr<TensorNetState> TensorNetState::createFromMpsTensors(
const std::vector<MPSTensor> &in_mpsTensors, ScratchDeviceMem &inScratchPad,
cutensornetHandle_t handle, std::mt19937 &randomEngine) {
Expand Down
5 changes: 5 additions & 0 deletions runtime/nvqir/cutensornet/tensornet_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ class TensorNetState {
std::vector<std::complex<double>>
computeExpVals(const std::vector<std::vector<bool>> &symplecticRepr);

/// @brief Evaluate the expectation value of a given
/// `cutensornetNetworkOperator_t`
std::complex<double>
computeExpVal(cutensornetNetworkOperator_t tensorNetworkOperator);

/// @brief Number of qubits that this state represents.
std::size_t getNumQubits() const { return m_numQubits; }

Expand Down
22 changes: 22 additions & 0 deletions unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,28 @@ if(TARGET nvqir-tensornet)
endif() # NGPUS
endif() # NVIDIA_SMI
endif() # MPI_CXX_FOUND

# Test CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=ON mode (on a few test cases that have cudaq::observe)
add_executable(test_tensornet_observe_path_reuse
integration/builder_tester.cpp
integration/deuteron_variational_tester.cpp
integration/observe_result_tester.cpp
)
target_include_directories(test_tensornet_observe_path_reuse PRIVATE .)
target_compile_definitions(test_tensornet_observe_path_reuse
PRIVATE -DNVQIR_BACKEND_NAME=tensornet)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT APPLE)
target_link_options(test_tensornet_observe_path_reuse PRIVATE -Wl,--no-as-needed)
endif()
target_link_libraries(test_tensornet_observe_path_reuse
PRIVATE
cudaq
cudaq-builder
cudaq-platform-default
nvqir-tensornet
gtest_main)
# Run this test with "CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=TRUE"
gtest_discover_tests(test_tensornet_observe_path_reuse TEST_SUFFIX _PathReuse PROPERTIES ENVIRONMENT "CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=ON" PROPERTIES LABELS "gpu_required")
endif()

# Create an executable for SpinOp UnitTests
Expand Down
4 changes: 4 additions & 0 deletions unittests/integration/observe_result_tester.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ CUDAQ_TEST(ObserveResult, checkSimple) {
EXPECT_TRUE(x0x1Counts.size() == 4);
}

// By default, tensornet backends only compute the overall expectation value in
// observe, i.e., no sub-term calculations.
#ifndef CUDAQ_BACKEND_TENSORNET
CUDAQ_TEST(ObserveResult, checkExpValBug) {

auto kernel = []() __qpu__ {
Expand Down Expand Up @@ -112,3 +115,4 @@ CUDAQ_TEST(ObserveResult, checkExpValBug) {
EXPECT_NEAR(exp, .79, 1e-1);
}
#endif
#endif

0 comments on commit db48d95

Please sign in to comment.