From e016f9ca612482fe2b15af4d54828cf3c1899f6f Mon Sep 17 00:00:00 2001
From: Jake Lishman <jake.lishman@ibm.com>
Date: Wed, 15 Feb 2023 00:08:30 +0000
Subject: [PATCH] Remove Sabre's manual insertion-order iteration and
 unnecessary sorts (#9560)

* Remove Sabre's manual insertion-order iteration and unnecessary sorts

The manual insertion-order tracking was initially added in 02a1939
(gh-9012) during a complete rewrite of the scoring algorithms for Sabre
to demonstrate that the new algorithm could be made fully RNG
compatible.  It maintained the identical iteration order to the previous
iteration once the front-layer data structure was swapped from being a
raw list to hash-based.

The new data structure doesn't require the manual tracking to be
reproducible as long as the iteration order is independent of the hash
seed.  This swaps the relevant places over the `IndexMap` to be able to
remove a lot of the manual tracking.  In casual testing, this didn't
appear to have much effect on performance, but the resulting code is
much simpler.

The sorts (and deliberate canonicalisation of the swaps) was necessary
to match RNG compatibility in the pre-relative-score Sabre, but since
the swaps are now iterated through in a deterministic order and
guaranteed to be generated only once each (the previous version used a
hashset to remove duplicates), neither step is necessary.

For a QV circuit of depth 5 at 1081 qubits on heavy hex, this is worth
almost a 2x speedup in routing performance.

* Fix RNG-related tests

* Add RNG-change note
---
 .../sabre-sort-rng-056f26f205e38bab.yaml      | 15 ++++
 src/sabre_swap/layer.rs                       | 83 +++----------------
 src/sabre_swap/mod.rs                         | 12 +--
 .../TestsSabreSwap_handle_measurement.qasm    |  8 +-
 test/python/transpiler/test_sabre_layout.py   | 41 ++-------
 5 files changed, 39 insertions(+), 120 deletions(-)
 create mode 100644 releasenotes/notes/sabre-sort-rng-056f26f205e38bab.yaml

diff --git a/releasenotes/notes/sabre-sort-rng-056f26f205e38bab.yaml b/releasenotes/notes/sabre-sort-rng-056f26f205e38bab.yaml
new file mode 100644
index 000000000000..c39bbfc553bf
--- /dev/null
+++ b/releasenotes/notes/sabre-sort-rng-056f26f205e38bab.yaml
@@ -0,0 +1,15 @@
+---
+features:
+  - |
+    Internal tweaks to the routing algorithm in :class:`.SabreSwap`, used in
+    transpilation of non-dynamic circuits at all non-zero optimization levels,
+    have sped up routing for very large circuits.  As an example, the time to
+    route a depth-5 :class:`.QuantumVolume` circuit for a 1081-qubit heavy-hex
+    coupling map is approximately halved.
+upgrade:
+  - |
+    The result of transpilations for fixed seeds may have changed compared to
+    previous versions of Qiskit Terra.  This is because of internal tweaks to
+    the routing algorithm used by :class:`.SabreSwap` and :class:`.SabreLayout`,
+    which are the default routing and layout passes respectively, to make them
+    significantly faster for large circuits.
diff --git a/src/sabre_swap/layer.rs b/src/sabre_swap/layer.rs
index d365b76f4f84..b3e5d597b4f3 100644
--- a/src/sabre_swap/layer.rs
+++ b/src/sabre_swap/layer.rs
@@ -10,7 +10,9 @@
 // copyright notice, and modified files need to carry a notice indicating
 // that they have been altered from the originals.
 
+use ahash;
 use hashbrown::HashMap;
+use indexmap::IndexMap;
 use ndarray::prelude::*;
 use rustworkx_core::petgraph::prelude::*;
 
@@ -21,22 +23,10 @@ use crate::nlayout::NLayout;
 /// unsatisfied 2q predecessor, which disqualifies it from being in the front layer.
 pub struct FrontLayer {
     /// Map of the (index to the) node to the qubits it acts on.
-    nodes: HashMap<NodeIndex, [usize; 2]>,
+    nodes: IndexMap<NodeIndex, [usize; 2], ahash::RandomState>,
     /// Map of each qubit to the node that acts on it and the other qubit that node acts on, if this
     /// qubit is active (otherwise `None`).
     qubits: Vec<Option<(NodeIndex, usize)>>,
-    /// Tracking the insertion order of nodes, so iteration can always go through them in a
-    /// deterministic order.  This is important for reproducibility from a set seed - when building
-    /// up the extended set with a fixed, finite size, the iteration order through the nodes of the
-    /// front layer is important.  We need to maintain the insertion order even with removals from
-    /// the layer.
-    iteration_order: Vec<Option<NodeIndex>>,
-    /// The index of the first populated entry in the `iteration_order`.  If the iteration order is
-    /// empty, this will be 0.
-    iteration_start: usize,
-    /// The index one past the last populated entry in the `iteration_order`.  If the iteration
-    /// order is empty, this will be 0.
-    iteration_end: usize,
 }
 
 impl FrontLayer {
@@ -44,37 +34,20 @@ impl FrontLayer {
         FrontLayer {
             // This is the maximum capacity of the front layer, since each qubit must be one of a
             // pair, and can only have one gate in the layer.
-            nodes: HashMap::with_capacity(num_qubits / 2),
+            nodes: IndexMap::with_capacity_and_hasher(
+                num_qubits / 2,
+                ahash::RandomState::default(),
+            ),
             qubits: vec![None; num_qubits],
-            iteration_order: vec![None; num_qubits],
-            iteration_start: 0,
-            iteration_end: 0,
         }
     }
 
-    /// Add a node into the front layer, with the two qubits it operates on.  This usually has
-    /// constant-time complexity, except if the iteration-order buffer is full.
+    /// Add a node into the front layer, with the two qubits it operates on.
     pub fn insert(&mut self, index: NodeIndex, qubits: [usize; 2]) {
         let [a, b] = qubits;
         self.qubits[a] = Some((index, b));
         self.qubits[b] = Some((index, a));
         self.nodes.insert(index, qubits);
-
-        self.iteration_order[self.iteration_end] = Some(index);
-        self.iteration_end += 1;
-        if self.iteration_end == self.iteration_order.len() {
-            // Condense items back to the start of the vector.
-            let mut ptr = 0;
-            for i in self.iteration_start..self.iteration_end {
-                if let Some(value) = self.iteration_order[i] {
-                    self.iteration_order[i] = None;
-                    self.iteration_order[ptr] = Some(value);
-                    ptr += 1;
-                }
-            }
-            self.iteration_start = 0;
-            self.iteration_end = ptr;
-        }
     }
 
     /// Remove a node from the front layer.
@@ -82,33 +55,6 @@ impl FrontLayer {
         let [q0, q1] = self.nodes.remove(index).unwrap();
         self.qubits[q0] = None;
         self.qubits[q1] = None;
-
-        // If the element was at the start of the iteration order, advance the pointer.
-        match self.iteration_order[self.iteration_start] {
-            Some(a) if a == *index => {
-                self.iteration_order[self.iteration_start] = None;
-                if self.iteration_start + 1 == self.iteration_end {
-                    self.iteration_start = 0;
-                    self.iteration_end = 0;
-                }
-                while self.iteration_start < self.iteration_end
-                    && self.iteration_order[self.iteration_start].is_none()
-                {
-                    self.iteration_start += 1;
-                }
-            }
-            _ => (),
-        }
-        // Search through and remove the element.  We leave a gap and preserve the insertion order.
-        for i in (self.iteration_start + 1)..self.iteration_end {
-            match self.iteration_order[i] {
-                Some(a) if a == *index => {
-                    self.iteration_order[i] = None;
-                    break;
-                }
-                _ => (),
-            }
-        }
     }
 
     /// Query whether a qubit has an active node.
@@ -189,24 +135,17 @@ impl FrontLayer {
 
     /// Iterator over the nodes and the pair of qubits they act on.
     pub fn iter(&self) -> impl Iterator<Item = (&NodeIndex, &[usize; 2])> {
-        (&self.iteration_order)[self.iteration_start..self.iteration_end]
-            .iter()
-            .filter_map(move |node_opt| node_opt.as_ref().map(|node| (node, &self.nodes[node])))
+        self.nodes.iter()
     }
 
     /// Iterator over the nodes.
     pub fn iter_nodes(&self) -> impl Iterator<Item = &NodeIndex> {
-        (&self.iteration_order)[self.iteration_start..self.iteration_end]
-            .iter()
-            .filter_map(|node_opt| node_opt.as_ref())
+        self.nodes.keys()
     }
 
     /// Iterator over the qubits that have active nodes on them.
     pub fn iter_active(&self) -> impl Iterator<Item = &usize> {
-        (&self.iteration_order)[self.iteration_start..self.iteration_end]
-            .iter()
-            .filter_map(move |node_opt| node_opt.as_ref().map(|node| &self.nodes[node]))
-            .flatten()
+        self.nodes.values().flatten()
     }
 }
 
diff --git a/src/sabre_swap/mod.rs b/src/sabre_swap/mod.rs
index 00bbed543207..0be06dca1591 100644
--- a/src/sabre_swap/mod.rs
+++ b/src/sabre_swap/mod.rs
@@ -80,14 +80,7 @@ fn obtain_swaps<'a>(
             .filter_map(move |&neighbor| {
                 let virtual_neighbor = layout.phys_to_logic[neighbor];
                 if virtual_neighbor > v || !front_layer.is_active(virtual_neighbor) {
-                    // This normalisation line is only necessary to ensure equal output in the
-                    // swap-sorting stage later to the previous version of this algorithm; it can be
-                    // removed when we break that matching.  It isn't needed for determinism.
-                    if v < virtual_neighbor {
-                        Some([v, virtual_neighbor])
-                    } else {
-                        Some([virtual_neighbor, v])
-                    }
+                    Some([v, virtual_neighbor])
                 } else {
                     None
                 }
@@ -564,9 +557,6 @@ fn choose_best_swap(
             best_swaps.push(swap);
         }
     }
-    if best_swaps.len() > 1 {
-        best_swaps.sort_unstable();
-    }
     *best_swaps.choose(rng).unwrap()
 }
 
diff --git a/test/python/qasm/TestsSabreSwap_handle_measurement.qasm b/test/python/qasm/TestsSabreSwap_handle_measurement.qasm
index 11370fef5993..80754de18a52 100644
--- a/test/python/qasm/TestsSabreSwap_handle_measurement.qasm
+++ b/test/python/qasm/TestsSabreSwap_handle_measurement.qasm
@@ -8,7 +8,7 @@ measure q[2] -> c[2];
 swap q[2],q[3];
 cx q[2],q[1];
 measure q[1] -> c[1];
-swap q[1],q[2];
-cx q[1],q[0];
-measure q[0] -> c[0];
-measure q[1] -> c[3];
+swap q[0],q[1];
+cx q[2],q[1];
+measure q[1] -> c[0];
+measure q[2] -> c[3];
diff --git a/test/python/transpiler/test_sabre_layout.py b/test/python/transpiler/test_sabre_layout.py
index f1757a955f56..7ed0f69e768f 100644
--- a/test/python/transpiler/test_sabre_layout.py
+++ b/test/python/transpiler/test_sabre_layout.py
@@ -59,11 +59,7 @@ def test_5q_circuit_20q_coupling(self):
         pass_.run(dag)
 
         layout = pass_.property_set["layout"]
-        self.assertEqual(layout[qr[0]], 18)
-        self.assertEqual(layout[qr[1]], 11)
-        self.assertEqual(layout[qr[2]], 13)
-        self.assertEqual(layout[qr[3]], 12)
-        self.assertEqual(layout[qr[4]], 14)
+        self.assertEqual([layout[q] for q in circuit.qubits], [18, 11, 13, 12, 14])
 
     def test_6q_circuit_20q_coupling(self):
         """Test finds layout for 6q circuit on 20q device."""
@@ -95,12 +91,7 @@ def test_6q_circuit_20q_coupling(self):
         pass_.run(dag)
 
         layout = pass_.property_set["layout"]
-        self.assertEqual(layout[qr0[0]], 12)
-        self.assertEqual(layout[qr0[1]], 7)
-        self.assertEqual(layout[qr0[2]], 14)
-        self.assertEqual(layout[qr1[0]], 11)
-        self.assertEqual(layout[qr1[1]], 18)
-        self.assertEqual(layout[qr1[2]], 13)
+        self.assertEqual([layout[q] for q in circuit.qubits], [7, 8, 12, 6, 11, 13])
 
     def test_layout_with_classical_bits(self):
         """Test sabre layout with classical bits recreate from issue #8635."""
@@ -132,14 +123,9 @@ def test_layout_with_classical_bits(self):
         res = transpile(qc, FakeKolkata(), layout_method="sabre", seed_transpiler=1234)
         self.assertIsInstance(res, QuantumCircuit)
         layout = res._layout.initial_layout
-        self.assertEqual(layout[qc.qubits[0]], 11)
-        self.assertEqual(layout[qc.qubits[1]], 22)
-        self.assertEqual(layout[qc.qubits[2]], 21)
-        self.assertEqual(layout[qc.qubits[3]], 19)
-        self.assertEqual(layout[qc.qubits[4]], 26)
-        self.assertEqual(layout[qc.qubits[5]], 8)
-        self.assertEqual(layout[qc.qubits[6]], 17)
-        self.assertEqual(layout[qc.qubits[7]], 1)
+        self.assertEqual(
+            [layout[q] for q in qc.qubits], [13, 10, 11, 12, 17, 14, 22, 26, 5, 16, 25, 19, 7, 8]
+        )
 
     # pylint: disable=line-too-long
     def test_layout_many_search_trials(self):
@@ -193,20 +179,9 @@ def test_layout_many_search_trials(self):
         )
         self.assertIsInstance(res, QuantumCircuit)
         layout = res._layout.initial_layout
-        self.assertEqual(layout[qc.qubits[0]], 19)
-        self.assertEqual(layout[qc.qubits[1]], 10)
-        self.assertEqual(layout[qc.qubits[2]], 1)
-        self.assertEqual(layout[qc.qubits[3]], 14)
-        self.assertEqual(layout[qc.qubits[4]], 4)
-        self.assertEqual(layout[qc.qubits[5]], 11)
-        self.assertEqual(layout[qc.qubits[6]], 8)
-        self.assertEqual(layout[qc.qubits[7]], 7)
-        self.assertEqual(layout[qc.qubits[8]], 9)
-        self.assertEqual(layout[qc.qubits[9]], 0)
-        self.assertEqual(layout[qc.qubits[10]], 5)
-        self.assertEqual(layout[qc.qubits[11]], 13)
-        self.assertEqual(layout[qc.qubits[12]], 2)
-        self.assertEqual(layout[qc.qubits[13]], 3)
+        self.assertEqual(
+            [layout[q] for q in qc.qubits], [22, 21, 4, 12, 1, 23, 16, 18, 19, 25, 14, 13, 10, 7]
+        )
 
 
 if __name__ == "__main__":