From b2092e019ed67497e110e80dcfdb744bdc0e6c5d Mon Sep 17 00:00:00 2001
From: Frederik Van der Veken <frederik@cern.ch>
Date: Thu, 7 Mar 2024 17:32:57 +0100
Subject: [PATCH 1/7] Missing prebuilt kernel for aperture interpolation

---
 xtrack/prebuilt_kernels/kernel_definitions.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/xtrack/prebuilt_kernels/kernel_definitions.py b/xtrack/prebuilt_kernels/kernel_definitions.py
index d2a97f958..f0ab6ad15 100644
--- a/xtrack/prebuilt_kernels/kernel_definitions.py
+++ b/xtrack/prebuilt_kernels/kernel_definitions.py
@@ -78,6 +78,13 @@
         'config': BASE_CONFIG,
         'classes': ONLY_XTRACK_ELEMENTS + NO_SYNRAD_ELEMENTS,
     }),
+    ('default_only_xtrack_no_limit', {
+        'config': {
+            **{k: v for k, v in BASE_CONFIG.items()
+                if k != 'XTRACK_GLOBAL_XY_LIMIT'}
+        },
+        'classes': ONLY_XTRACK_ELEMENTS + NO_SYNRAD_ELEMENTS,
+    }),
     ('only_xtrack_non_tracking_kernels', {
         'config': BASE_CONFIG,
         'classes': [],
@@ -174,6 +181,13 @@
             'config': {},
             'classes': DEFAULT_XCOLL_ELEMENTS,
         }),
+        ('default_xcoll_no_limit', {
+            'config': {
+                **{k: v for k, v in BASE_CONFIG.items()
+                    if k != 'XTRACK_GLOBAL_XY_LIMIT'}
+            },
+            'classes': DEFAULT_XCOLL_ELEMENTS,
+        }),
         ('default_xcoll_frozen_longitudinal', {
             'config': {**BASE_CONFIG, **FREEZE_LONGITUDINAL},
             'classes': DEFAULT_XCOLL_ELEMENTS,

From bde9e0af1ce7f628d49b2e5dd6f91d654a99d03a Mon Sep 17 00:00:00 2001
From: "Frederik F. Van der Veken" <frederik@cern.ch>
Date: Sat, 9 Mar 2024 00:56:01 +0100
Subject: [PATCH 2/7] Added ZetaShift to prebuilt xcoll kernels (for
 off-momentum loss maps)

---
 xtrack/prebuilt_kernels/kernel_definitions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/xtrack/prebuilt_kernels/kernel_definitions.py b/xtrack/prebuilt_kernels/kernel_definitions.py
index f0ab6ad15..c84885841 100644
--- a/xtrack/prebuilt_kernels/kernel_definitions.py
+++ b/xtrack/prebuilt_kernels/kernel_definitions.py
@@ -166,6 +166,7 @@
     DEFAULT_XCOLL_ELEMENTS = [
         *ONLY_XTRACK_ELEMENTS,
         *NO_SYNRAD_ELEMENTS,
+        ZetaShift,
         xc.BlackAbsorber,
         xc.EverestBlock,
         xc.EverestCollimator,

From 8c3ac6b696d19fdd5bb17fd0ebc8a25810f0d367 Mon Sep 17 00:00:00 2001
From: "Frederik F. Van der Veken" <frederik@cern.ch>
Date: Sat, 9 Mar 2024 19:29:38 +0100
Subject: [PATCH 3/7] When initialising the different threads for openmp, the
 particle size per thread is calculated based on the *active* particles. As
 the particles are reorganised at the end of tracking, this can produce a huge
 speed-up when tracking turn-by-turn (as is the case with a progress
 indicator) and particles often die, freeing up resources for the threads.

---
 xtrack/tracker.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/xtrack/tracker.py b/xtrack/tracker.py
index 72fff5f08..186143114 100644
--- a/xtrack/tracker.py
+++ b/xtrack/tracker.py
@@ -466,19 +466,20 @@ def _build_kernel(
                              int64_t offset_tbt_monitor,
                 /*gpuglmem*/ int8_t* io_buffer){
 
-            const int64_t capacity = ParticlesData_get__capacity(particles);               //only_for_context cpu_openmp
-            const int num_threads = omp_get_max_threads();                                 //only_for_context cpu_openmp
-            const int64_t chunk_size = (capacity + num_threads - 1)/num_threads; // ceil division  //only_for_context cpu_openmp
-            #pragma omp parallel for                                                       //only_for_context cpu_openmp
-            for (int chunk = 0; chunk < num_threads; chunk++) {                            //only_for_context cpu_openmp
-            int64_t part_id = chunk * chunk_size;                                          //only_for_context cpu_openmp
-            int64_t end_id = (chunk + 1) * chunk_size;                                     //only_for_context cpu_openmp
-            if (end_id > capacity) end_id = capacity;                                      //only_for_context cpu_openmp
+            const int64_t capacity = ParticlesData_get__capacity(particles);                   //only_for_context cpu_openmp
+            const int64_t alive = ParticlesData_get__num_active_particles(particles);          //only_for_context cpu_openmp
+            const int num_threads = omp_get_max_threads();                                     //only_for_context cpu_openmp
+            const int64_t chunk_size = (alive + num_threads - 1)/num_threads; // ceil division //only_for_context cpu_openmp
+            #pragma omp parallel for                                                           //only_for_context cpu_openmp
+            for (int chunk = 0; chunk < num_threads; chunk++) {                                //only_for_context cpu_openmp
+            int64_t part_id = chunk * chunk_size;                                              //only_for_context cpu_openmp
+            int64_t end_id = (chunk + 1) * chunk_size;                                         //only_for_context cpu_openmp
+            if (end_id > alive) end_id = alive;                                                //only_for_context cpu_openmp
 
             int64_t part_id = 0;                                      //only_for_context cpu_serial
             int64_t part_id = blockDim.x * blockIdx.x + threadIdx.x;  //only_for_context cuda
             int64_t part_id = get_global_id(0);                       //only_for_context opencl
-            int64_t end_id = 0; // unused outside of openmp  //only_for_context cpu_serial cuda opencl
+            int64_t end_id = 0; // unused outside of openmp           //only_for_context cpu_serial cuda opencl
 
             LocalParticle lpart;
             lpart.io_buffer = io_buffer;

From 12e503fb596be5098e456238ae7680c32b087a06 Mon Sep 17 00:00:00 2001
From: "Frederik F. Van der Veken" <frederik@cern.ch>
Date: Sat, 9 Mar 2024 21:41:40 +0100
Subject: [PATCH 4/7] Same for per particle kernels in BeamElement.

---
 xtrack/base_element.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/xtrack/base_element.py b/xtrack/base_element.py
index 8bf0911f4..334b24da0 100644
--- a/xtrack/base_element.py
+++ b/xtrack/base_element.py
@@ -105,16 +105,17 @@ def _generate_per_particle_kernel_from_local_particle_function(
 '''
                              int64_t flag_increment_at_element,
                 /*gpuglmem*/ int8_t* io_buffer){
-            const int num_threads = omp_get_max_threads();                                 //only_for_context cpu_openmp
-            const int64_t capacity = ParticlesData_get__capacity(particles);               //only_for_context cpu_openmp
-            const int64_t chunk_size = (capacity + num_threads - 1)/num_threads; // ceil division  //only_for_context cpu_openmp
-            #pragma omp parallel for                                                       //only_for_context cpu_openmp
-            for (int64_t batch_id = 0; batch_id < num_threads; batch_id++) {               //only_for_context cpu_openmp
+            const int num_threads = omp_get_max_threads();                                     //only_for_context cpu_openmp
+            const int64_t capacity = ParticlesData_get__capacity(particles);                   //only_for_context cpu_openmp
+            const int64_t alive = ParticlesData_get__num_active_particles(particles);          //only_for_context cpu_openmp
+            const int64_t chunk_size = (alive + num_threads - 1)/num_threads; // ceil division //only_for_context cpu_openmp
+            #pragma omp parallel for                                                           //only_for_context cpu_openmp
+            for (int64_t batch_id = 0; batch_id < num_threads; batch_id++) {                   //only_for_context cpu_openmp
                 LocalParticle lpart;
                 lpart.io_buffer = io_buffer;
                 int64_t part_id = batch_id * chunk_size;                                       //only_for_context cpu_openmp
                 int64_t end_id = (batch_id + 1) * chunk_size;                                  //only_for_context cpu_openmp
-                if (end_id > capacity) end_id = capacity;                                      //only_for_context cpu_openmp
+                if (end_id > alive) end_id = alive;                                            //only_for_context cpu_openmp
 
                 int64_t part_id = 0;                    //only_for_context cpu_serial
                 int64_t part_id = blockDim.x * blockIdx.x + threadIdx.x; //only_for_context cuda

From 2e3a201d2558644532c67efc3547a9e3652f12b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= <szymon@lopaciuk.eu>
Date: Wed, 13 Mar 2024 08:38:55 +0100
Subject: [PATCH 5/7] rename variables

---
 xtrack/base_element.py | 6 +++---
 xtrack/tracker.py      | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/xtrack/base_element.py b/xtrack/base_element.py
index 334b24da0..dd5f6bab1 100644
--- a/xtrack/base_element.py
+++ b/xtrack/base_element.py
@@ -107,15 +107,15 @@ def _generate_per_particle_kernel_from_local_particle_function(
                 /*gpuglmem*/ int8_t* io_buffer){
             const int num_threads = omp_get_max_threads();                                     //only_for_context cpu_openmp
             const int64_t capacity = ParticlesData_get__capacity(particles);                   //only_for_context cpu_openmp
-            const int64_t alive = ParticlesData_get__num_active_particles(particles);          //only_for_context cpu_openmp
-            const int64_t chunk_size = (alive + num_threads - 1)/num_threads; // ceil division //only_for_context cpu_openmp
+            const int64_t num_active = ParticlesData_get__num_active_particles(particles);     //only_for_context cpu_openmp
+            const int64_t chunk_size = (num_active + num_threads - 1)/num_threads; // ceil division //only_for_context cpu_openmp
             #pragma omp parallel for                                                           //only_for_context cpu_openmp
             for (int64_t batch_id = 0; batch_id < num_threads; batch_id++) {                   //only_for_context cpu_openmp
                 LocalParticle lpart;
                 lpart.io_buffer = io_buffer;
                 int64_t part_id = batch_id * chunk_size;                                       //only_for_context cpu_openmp
                 int64_t end_id = (batch_id + 1) * chunk_size;                                  //only_for_context cpu_openmp
-                if (end_id > alive) end_id = alive;                                            //only_for_context cpu_openmp
+                if (end_id > num_active) end_id = num_active;                                            //only_for_context cpu_openmp
 
                 int64_t part_id = 0;                    //only_for_context cpu_serial
                 int64_t part_id = blockDim.x * blockIdx.x + threadIdx.x; //only_for_context cuda
diff --git a/xtrack/tracker.py b/xtrack/tracker.py
index 186143114..123e630b5 100644
--- a/xtrack/tracker.py
+++ b/xtrack/tracker.py
@@ -467,14 +467,14 @@ def _build_kernel(
                 /*gpuglmem*/ int8_t* io_buffer){
 
             const int64_t capacity = ParticlesData_get__capacity(particles);                   //only_for_context cpu_openmp
-            const int64_t alive = ParticlesData_get__num_active_particles(particles);          //only_for_context cpu_openmp
+            const int64_t num_active = ParticlesData_get__num_active_particles(particles);     //only_for_context cpu_openmp
             const int num_threads = omp_get_max_threads();                                     //only_for_context cpu_openmp
-            const int64_t chunk_size = (alive + num_threads - 1)/num_threads; // ceil division //only_for_context cpu_openmp
+            const int64_t chunk_size = (num_active + num_threads - 1)/num_threads; // ceil division //only_for_context cpu_openmp
             #pragma omp parallel for                                                           //only_for_context cpu_openmp
             for (int chunk = 0; chunk < num_threads; chunk++) {                                //only_for_context cpu_openmp
             int64_t part_id = chunk * chunk_size;                                              //only_for_context cpu_openmp
             int64_t end_id = (chunk + 1) * chunk_size;                                         //only_for_context cpu_openmp
-            if (end_id > alive) end_id = alive;                                                //only_for_context cpu_openmp
+            if (end_id > num_active) end_id = num_active;                                      //only_for_context cpu_openmp
 
             int64_t part_id = 0;                                      //only_for_context cpu_serial
             int64_t part_id = blockDim.x * blockIdx.x + threadIdx.x;  //only_for_context cuda

From fa07c6248d0eba3be8378ec328fcd17f85339a43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= <szymon@lopaciuk.eu>
Date: Wed, 13 Mar 2024 16:53:29 +0100
Subject: [PATCH 6/7] fixes for skip reordering

---
 tests/test_particles_api.py | 135 ++++++++++++++++++++++++++++++++++++
 xtrack/base_element.py      |  32 +++++++--
 xtrack/tracker.py           |  27 ++++++--
 3 files changed, 183 insertions(+), 11 deletions(-)
 create mode 100644 tests/test_particles_api.py

diff --git a/tests/test_particles_api.py b/tests/test_particles_api.py
new file mode 100644
index 000000000..f1a5db34a
--- /dev/null
+++ b/tests/test_particles_api.py
@@ -0,0 +1,135 @@
+# copyright ############################### #
+# This file is part of the Xpart Package.   #
+# Copyright (c) CERN, 2023.                 #
+# ######################################### #
+import numpy as np
+import pytest
+
+import xtrack as xt
+import xobjects as xo
+import xpart as xp
+
+
+def test_check_is_active_sorting_openmp():
+    test_context = xo.ContextCpu(omp_num_threads=5)
+
+    class TestElement(xt.BeamElement):
+        _xofields = {
+            'states': xo.Int64[:],
+        }
+
+        _extra_c_sources = ["""
+            #define XT_OMP_SKIP_REORGANIZE
+
+            /*gpufun*/
+            void TestElement_track_local_particle(
+                TestElementData el,
+                LocalParticle* part0
+            ) {
+                //start_per_particle_block (part0->part)
+                    int64_t state = check_is_active(part);
+                    int64_t id = LocalParticle_get_particle_id(part);
+                    TestElementData_set_states(el, id, state);
+                //end_per_particle_block
+            }
+        """]
+
+    el = TestElement(
+        _context=test_context,
+        states=np.zeros(18, dtype=np.int64),
+    )
+    particles = xp.Particles(
+        _context=test_context,
+        state=[
+            1, 0, 1, 0, 1,  # should be reordered to 1, 1, 1, 0, 0
+            0, 0, 0, 0, 0,  # should be left intact
+            0, 1, 0, 1, 1,  # should be reordered to 1, 1, 1, 0, 0
+            1, 1, 0,        # should be left intact
+        ],
+        _capacity=22,       # there are 4 particles that are unallocated
+        _no_reorganize=True,
+    )
+
+    el.track(particles)
+
+    # We have five threads, so the particles should be split into chunks
+    # of 5, 5, 5, 3 + 2 (unallocated), 2 (unallocated).
+    assert len(particles.state) == 22
+
+    # Check that each chunk is reorganized correctly.
+    # First batch:
+    assert np.all(particles.state[0:5] == [1, 1, 1, 0, 0])
+    assert set(particles.particle_id[0:3]) == {0, 2, 4}
+    assert set(particles.particle_id[3:5]) == {1, 3}
+
+    # Second batch:
+    assert np.all(particles.state[5:10] == [0, 0, 0, 0, 0])
+    # Don't reorder if not needed:
+    assert np.all(particles.particle_id[5:10] == [5, 6, 7, 8, 9])
+
+    # Third batch:
+    assert np.all(particles.state[10:15] == [1, 1, 1, 0, 0])
+    assert set(particles.particle_id[10:13]) == {11, 13, 14}
+    assert set(particles.particle_id[13:15]) == {10, 12}
+
+    # Fourth batch:
+    assert np.all(particles.state[15:20] == [1, 1, 0, -999999999, -999999999])
+    # Don't reorder if not needed:
+    assert np.all(particles.particle_id[15:18] == [15, 16, 17])
+
+    # Fifth batch (unallocated):
+    assert np.all(particles.state[20:22] == [-999999999, -999999999])
+
+
+@pytest.mark.parametrize(
+    'test_context',
+    [
+        xo.ContextCpu(),
+        xo.ContextCpu(omp_num_threads=4),
+    ]
+)
+def test_check_is_active_sorting_cpu_default(test_context):
+    class TestElement(xt.BeamElement):
+        _xofields = {
+            'states': xo.Int64[:],
+        }
+
+        _extra_c_sources = ["""
+            /*gpufun*/
+            void TestElement_track_local_particle(
+                TestElementData el,
+                LocalParticle* part0
+            ) {
+                //start_per_particle_block (part0->part)
+                    int64_t state = check_is_active(part);
+                    int64_t id = LocalParticle_get_particle_id(part);
+                    TestElementData_set_states(el, id, state);
+                //end_per_particle_block
+            }
+        """]
+
+    el = TestElement(
+        _context=test_context,
+        states=np.zeros(18, dtype=np.int64),
+    )
+    particles = xp.Particles(
+        _context=test_context,
+        state=[
+            1, 0, 1, 0, 1,
+            0, 0, 0, 0, 0,
+            0, 1, 0, 1, 1,
+            1, 1, 0,
+        ],
+        _no_reorganize=True,
+    )
+    # We want to simulate a situation where a recount is needed, so we put a
+    # value of active particles to be equal to the total number of particles:
+    particles._num_active_particles = 18
+
+    el.track(particles)
+
+    # Here we don't reorganize by batches, so we just check the whole array
+    # to see if it's sensible:
+    assert np.all(particles.state == ([1] * 8) + ([0] * 10))
+    assert set(particles.particle_id[:8]) == {0, 2, 4, 11, 13, 14, 15, 16}
+    assert set(particles.particle_id[8:]) == {1, 3, 5, 6, 7, 8, 9, 10, 12, 17}
diff --git a/xtrack/base_element.py b/xtrack/base_element.py
index dd5f6bab1..3ba5941e0 100644
--- a/xtrack/base_element.py
+++ b/xtrack/base_element.py
@@ -26,7 +26,7 @@
     const int64_t XT_part_block_end_idx = LocalParticle_get__num_active_particles(part0); //only_for_context cpu_serial
 
     //#pragma omp simd // TODO: currently does not work, needs investigating
-    for (int64_t XT_part_block_ii=XT_part_block_start_idx; XT_part_block_ii<XT_part_block_end_idx; XT_part_block_ii++) { //only_for_context cpu_openmp cpu_serial
+    for (int64_t XT_part_block_ii = XT_part_block_start_idx; XT_part_block_ii<XT_part_block_end_idx; XT_part_block_ii++) { //only_for_context cpu_openmp cpu_serial
 
         LocalParticle lpart = *part0;    //only_for_context cpu_serial cpu_openmp
         LocalParticle* part = &lpart;    //only_for_context cpu_serial cpu_openmp
@@ -105,17 +105,37 @@ def _generate_per_particle_kernel_from_local_particle_function(
 '''
                              int64_t flag_increment_at_element,
                 /*gpuglmem*/ int8_t* io_buffer){
-            const int num_threads = omp_get_max_threads();                                     //only_for_context cpu_openmp
-            const int64_t capacity = ParticlesData_get__capacity(particles);                   //only_for_context cpu_openmp
-            const int64_t num_active = ParticlesData_get__num_active_particles(particles);     //only_for_context cpu_openmp
-            const int64_t chunk_size = (num_active + num_threads - 1)/num_threads; // ceil division //only_for_context cpu_openmp
+
+            #define CONTEXT_OPENMP  //only_for_context cpu_openmp
+            #ifdef CONTEXT_OPENMP
+                const int64_t capacity = ParticlesData_get__capacity(particles);
+                const int num_threads = omp_get_max_threads();
+
+                #ifndef XT_OMP_SKIP_REORGANIZE
+                    const int64_t num_particles_to_track = ParticlesData_get__num_active_particles(particles);
+
+                    {
+                        LocalParticle lpart;
+                        lpart.io_buffer = io_buffer;
+                        Particles_to_LocalParticle(particles, &lpart, 0, capacity);
+                        check_is_active(&lpart);
+                        count_reorganized_particles(&lpart);
+                        LocalParticle_to_Particles(&lpart, particles, 0, capacity);
+                    }
+                #else // When we skip reorganize, we cannot just batch active particles
+                    const int64_t num_particles_to_track = capacity;
+                #endif
+                
+                const int64_t chunk_size = (num_particles_to_track + num_threads - 1)/num_threads; // ceil division
+            #endif // CONTEXT_OPENMP
+
             #pragma omp parallel for                                                           //only_for_context cpu_openmp
             for (int64_t batch_id = 0; batch_id < num_threads; batch_id++) {                   //only_for_context cpu_openmp
                 LocalParticle lpart;
                 lpart.io_buffer = io_buffer;
                 int64_t part_id = batch_id * chunk_size;                                       //only_for_context cpu_openmp
                 int64_t end_id = (batch_id + 1) * chunk_size;                                  //only_for_context cpu_openmp
-                if (end_id > num_active) end_id = num_active;                                            //only_for_context cpu_openmp
+                if (end_id > num_particles_to_track) end_id = num_particles_to_track;          //only_for_context cpu_openmp
 
                 int64_t part_id = 0;                    //only_for_context cpu_serial
                 int64_t part_id = blockDim.x * blockIdx.x + threadIdx.x; //only_for_context cuda
diff --git a/xtrack/tracker.py b/xtrack/tracker.py
index 123e630b5..d9237b17e 100644
--- a/xtrack/tracker.py
+++ b/xtrack/tracker.py
@@ -466,15 +466,32 @@ def _build_kernel(
                              int64_t offset_tbt_monitor,
                 /*gpuglmem*/ int8_t* io_buffer){
 
-            const int64_t capacity = ParticlesData_get__capacity(particles);                   //only_for_context cpu_openmp
-            const int64_t num_active = ParticlesData_get__num_active_particles(particles);     //only_for_context cpu_openmp
-            const int num_threads = omp_get_max_threads();                                     //only_for_context cpu_openmp
-            const int64_t chunk_size = (num_active + num_threads - 1)/num_threads; // ceil division //only_for_context cpu_openmp
+            #define CONTEXT_OPENMP  //only_for_context cpu_openmp
+            #ifdef CONTEXT_OPENMP
+                const int64_t capacity = ParticlesData_get__capacity(particles);
+                const int num_threads = omp_get_max_threads();
+
+                #ifndef XT_OMP_SKIP_REORGANIZE
+                    const int64_t num_particles_to_track = ParticlesData_get__num_active_particles(particles);
+
+                    LocalParticle lpart;
+                    lpart.io_buffer = io_buffer;
+                    Particles_to_LocalParticle(particles, &lpart, 0, capacity);
+                    check_is_active(&lpart);
+                    count_reorganized_particles(&lpart);
+                    LocalParticle_to_Particles(&lpart, particles, 0, capacity);
+                #else // When we skip reorganize, we cannot just batch active particles
+                    const int64_t num_particles_to_track = capacity;
+                #endif
+                
+                const int64_t chunk_size = (num_particles_to_track + num_threads - 1)/num_threads; // ceil division
+            #endif // CONTEXT_OPENMP
+            
             #pragma omp parallel for                                                           //only_for_context cpu_openmp
             for (int chunk = 0; chunk < num_threads; chunk++) {                                //only_for_context cpu_openmp
             int64_t part_id = chunk * chunk_size;                                              //only_for_context cpu_openmp
             int64_t end_id = (chunk + 1) * chunk_size;                                         //only_for_context cpu_openmp
-            if (end_id > num_active) end_id = num_active;                                      //only_for_context cpu_openmp
+            if (end_id > num_particles_to_track) end_id = num_particles_to_track;              //only_for_context cpu_openmp
 
             int64_t part_id = 0;                                      //only_for_context cpu_serial
             int64_t part_id = blockDim.x * blockIdx.x + threadIdx.x;  //only_for_context cuda

From 1d5c4f1f788fe3d6ec94d054a7ddbba16717f70d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= <szymon@lopaciuk.eu>
Date: Wed, 13 Mar 2024 17:11:34 +0100
Subject: [PATCH 7/7] local scope lpart in tracker on openmp

---
 xtrack/tracker.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/xtrack/tracker.py b/xtrack/tracker.py
index d9237b17e..a47e466f1 100644
--- a/xtrack/tracker.py
+++ b/xtrack/tracker.py
@@ -473,13 +473,15 @@ def _build_kernel(
 
                 #ifndef XT_OMP_SKIP_REORGANIZE
                     const int64_t num_particles_to_track = ParticlesData_get__num_active_particles(particles);
-
-                    LocalParticle lpart;
-                    lpart.io_buffer = io_buffer;
-                    Particles_to_LocalParticle(particles, &lpart, 0, capacity);
-                    check_is_active(&lpart);
-                    count_reorganized_particles(&lpart);
-                    LocalParticle_to_Particles(&lpart, particles, 0, capacity);
+                    
+                    {
+                        LocalParticle lpart;
+                        lpart.io_buffer = io_buffer;
+                        Particles_to_LocalParticle(particles, &lpart, 0, capacity);
+                        check_is_active(&lpart);
+                        count_reorganized_particles(&lpart);
+                        LocalParticle_to_Particles(&lpart, particles, 0, capacity);
+                    }
                 #else // When we skip reorganize, we cannot just batch active particles
                     const int64_t num_particles_to_track = capacity;
                 #endif