Skip to content

Commit

Permalink
Merge pull request #460 from xsuite/release/v0.54.2
Browse files Browse the repository at this point in the history
Release/v0.54.2
  • Loading branch information
giadarol authored Mar 15, 2024
2 parents 3e3dd97 + 1d5c4f1 commit 4578880
Show file tree
Hide file tree
Showing 4 changed files with 207 additions and 16 deletions.
135 changes: 135 additions & 0 deletions tests/test_particles_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# copyright ############################### #
# This file is part of the Xpart Package. #
# Copyright (c) CERN, 2023. #
# ######################################### #
import numpy as np
import pytest

import xtrack as xt
import xobjects as xo
import xpart as xp


def test_check_is_active_sorting_openmp():
test_context = xo.ContextCpu(omp_num_threads=5)

class TestElement(xt.BeamElement):
_xofields = {
'states': xo.Int64[:],
}

_extra_c_sources = ["""
#define XT_OMP_SKIP_REORGANIZE
/*gpufun*/
void TestElement_track_local_particle(
TestElementData el,
LocalParticle* part0
) {
//start_per_particle_block (part0->part)
int64_t state = check_is_active(part);
int64_t id = LocalParticle_get_particle_id(part);
TestElementData_set_states(el, id, state);
//end_per_particle_block
}
"""]

el = TestElement(
_context=test_context,
states=np.zeros(18, dtype=np.int64),
)
particles = xp.Particles(
_context=test_context,
state=[
1, 0, 1, 0, 1, # should be reordered to 1, 1, 1, 0, 0
0, 0, 0, 0, 0, # should be left intact
0, 1, 0, 1, 1, # should be reordered to 1, 1, 1, 0, 0
1, 1, 0, # should be left intact
],
_capacity=22, # there are 4 particles that are unallocated
_no_reorganize=True,
)

el.track(particles)

# We have five threads, so the particles should be split into chunks
# of 5, 5, 5, 3 + 2 (unallocated), 2 (unallocated).
assert len(particles.state) == 22

# Check that each chunk is reorganized correctly.
# First batch:
assert np.all(particles.state[0:5] == [1, 1, 1, 0, 0])
assert set(particles.particle_id[0:3]) == {0, 2, 4}
assert set(particles.particle_id[3:5]) == {1, 3}

# Second batch:
assert np.all(particles.state[5:10] == [0, 0, 0, 0, 0])
# Don't reorder if not needed:
assert np.all(particles.particle_id[5:10] == [5, 6, 7, 8, 9])

# Third batch:
assert np.all(particles.state[10:15] == [1, 1, 1, 0, 0])
assert set(particles.particle_id[10:13]) == {11, 13, 14}
assert set(particles.particle_id[13:15]) == {10, 12}

# Fourth batch:
assert np.all(particles.state[15:20] == [1, 1, 0, -999999999, -999999999])
# Don't reorder if not needed:
assert np.all(particles.particle_id[15:18] == [15, 16, 17])

# Fifth batch (unallocated):
assert np.all(particles.state[20:22] == [-999999999, -999999999])


@pytest.mark.parametrize(
'test_context',
[
xo.ContextCpu(),
xo.ContextCpu(omp_num_threads=4),
]
)
def test_check_is_active_sorting_cpu_default(test_context):
class TestElement(xt.BeamElement):
_xofields = {
'states': xo.Int64[:],
}

_extra_c_sources = ["""
/*gpufun*/
void TestElement_track_local_particle(
TestElementData el,
LocalParticle* part0
) {
//start_per_particle_block (part0->part)
int64_t state = check_is_active(part);
int64_t id = LocalParticle_get_particle_id(part);
TestElementData_set_states(el, id, state);
//end_per_particle_block
}
"""]

el = TestElement(
_context=test_context,
states=np.zeros(18, dtype=np.int64),
)
particles = xp.Particles(
_context=test_context,
state=[
1, 0, 1, 0, 1,
0, 0, 0, 0, 0,
0, 1, 0, 1, 1,
1, 1, 0,
],
_no_reorganize=True,
)
# We want to simulate a situation where a recount is needed, so we put a
# value of active particles to be equal to the total number of particles:
particles._num_active_particles = 18

el.track(particles)

# Here we don't reorganize by batches, so we just check the whole array
# to see if it's sensible:
assert np.all(particles.state == ([1] * 8) + ([0] * 10))
assert set(particles.particle_id[:8]) == {0, 2, 4, 11, 13, 14, 15, 16}
assert set(particles.particle_id[8:]) == {1, 3, 5, 6, 7, 8, 9, 10, 12, 17}
35 changes: 28 additions & 7 deletions xtrack/base_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
const int64_t XT_part_block_end_idx = LocalParticle_get__num_active_particles(part0); //only_for_context cpu_serial
//#pragma omp simd // TODO: currently does not work, needs investigating
for (int64_t XT_part_block_ii=XT_part_block_start_idx; XT_part_block_ii<XT_part_block_end_idx; XT_part_block_ii++) { //only_for_context cpu_openmp cpu_serial
for (int64_t XT_part_block_ii = XT_part_block_start_idx; XT_part_block_ii<XT_part_block_end_idx; XT_part_block_ii++) { //only_for_context cpu_openmp cpu_serial
LocalParticle lpart = *part0; //only_for_context cpu_serial cpu_openmp
LocalParticle* part = &lpart; //only_for_context cpu_serial cpu_openmp
Expand Down Expand Up @@ -105,16 +105,37 @@ def _generate_per_particle_kernel_from_local_particle_function(
'''
int64_t flag_increment_at_element,
/*gpuglmem*/ int8_t* io_buffer){
const int num_threads = omp_get_max_threads(); //only_for_context cpu_openmp
const int64_t capacity = ParticlesData_get__capacity(particles); //only_for_context cpu_openmp
const int64_t chunk_size = (capacity + num_threads - 1)/num_threads; // ceil division //only_for_context cpu_openmp
#pragma omp parallel for //only_for_context cpu_openmp
for (int64_t batch_id = 0; batch_id < num_threads; batch_id++) { //only_for_context cpu_openmp
#define CONTEXT_OPENMP //only_for_context cpu_openmp
#ifdef CONTEXT_OPENMP
const int64_t capacity = ParticlesData_get__capacity(particles);
const int num_threads = omp_get_max_threads();
#ifndef XT_OMP_SKIP_REORGANIZE
const int64_t num_particles_to_track = ParticlesData_get__num_active_particles(particles);
{
LocalParticle lpart;
lpart.io_buffer = io_buffer;
Particles_to_LocalParticle(particles, &lpart, 0, capacity);
check_is_active(&lpart);
count_reorganized_particles(&lpart);
LocalParticle_to_Particles(&lpart, particles, 0, capacity);
}
#else // When we skip reorganize, we cannot just batch active particles
const int64_t num_particles_to_track = capacity;
#endif
const int64_t chunk_size = (num_particles_to_track + num_threads - 1)/num_threads; // ceil division
#endif // CONTEXT_OPENMP
#pragma omp parallel for //only_for_context cpu_openmp
for (int64_t batch_id = 0; batch_id < num_threads; batch_id++) { //only_for_context cpu_openmp
LocalParticle lpart;
lpart.io_buffer = io_buffer;
int64_t part_id = batch_id * chunk_size; //only_for_context cpu_openmp
int64_t end_id = (batch_id + 1) * chunk_size; //only_for_context cpu_openmp
if (end_id > capacity) end_id = capacity; //only_for_context cpu_openmp
if (end_id > num_particles_to_track) end_id = num_particles_to_track; //only_for_context cpu_openmp
int64_t part_id = 0; //only_for_context cpu_serial
int64_t part_id = blockDim.x * blockIdx.x + threadIdx.x; //only_for_context cuda
Expand Down
15 changes: 15 additions & 0 deletions xtrack/prebuilt_kernels/kernel_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@
'config': BASE_CONFIG,
'classes': ONLY_XTRACK_ELEMENTS + NO_SYNRAD_ELEMENTS,
}),
('default_only_xtrack_no_limit', {
'config': {
**{k: v for k, v in BASE_CONFIG.items()
if k != 'XTRACK_GLOBAL_XY_LIMIT'}
},
'classes': ONLY_XTRACK_ELEMENTS + NO_SYNRAD_ELEMENTS,
}),
('only_xtrack_non_tracking_kernels', {
'config': BASE_CONFIG,
'classes': [],
Expand Down Expand Up @@ -159,6 +166,7 @@
DEFAULT_XCOLL_ELEMENTS = [
*ONLY_XTRACK_ELEMENTS,
*NO_SYNRAD_ELEMENTS,
ZetaShift,
xc.BlackAbsorber,
xc.EverestBlock,
xc.EverestCollimator,
Expand All @@ -174,6 +182,13 @@
'config': {},
'classes': DEFAULT_XCOLL_ELEMENTS,
}),
('default_xcoll_no_limit', {
'config': {
**{k: v for k, v in BASE_CONFIG.items()
if k != 'XTRACK_GLOBAL_XY_LIMIT'}
},
'classes': DEFAULT_XCOLL_ELEMENTS,
}),
('default_xcoll_frozen_longitudinal', {
'config': {**BASE_CONFIG, **FREEZE_LONGITUDINAL},
'classes': DEFAULT_XCOLL_ELEMENTS,
Expand Down
38 changes: 29 additions & 9 deletions xtrack/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,19 +466,39 @@ def _build_kernel(
int64_t offset_tbt_monitor,
/*gpuglmem*/ int8_t* io_buffer){
const int64_t capacity = ParticlesData_get__capacity(particles); //only_for_context cpu_openmp
const int num_threads = omp_get_max_threads(); //only_for_context cpu_openmp
const int64_t chunk_size = (capacity + num_threads - 1)/num_threads; // ceil division //only_for_context cpu_openmp
#pragma omp parallel for //only_for_context cpu_openmp
for (int chunk = 0; chunk < num_threads; chunk++) { //only_for_context cpu_openmp
int64_t part_id = chunk * chunk_size; //only_for_context cpu_openmp
int64_t end_id = (chunk + 1) * chunk_size; //only_for_context cpu_openmp
if (end_id > capacity) end_id = capacity; //only_for_context cpu_openmp
#define CONTEXT_OPENMP //only_for_context cpu_openmp
#ifdef CONTEXT_OPENMP
const int64_t capacity = ParticlesData_get__capacity(particles);
const int num_threads = omp_get_max_threads();
#ifndef XT_OMP_SKIP_REORGANIZE
const int64_t num_particles_to_track = ParticlesData_get__num_active_particles(particles);
{
LocalParticle lpart;
lpart.io_buffer = io_buffer;
Particles_to_LocalParticle(particles, &lpart, 0, capacity);
check_is_active(&lpart);
count_reorganized_particles(&lpart);
LocalParticle_to_Particles(&lpart, particles, 0, capacity);
}
#else // When we skip reorganize, we cannot just batch active particles
const int64_t num_particles_to_track = capacity;
#endif
const int64_t chunk_size = (num_particles_to_track + num_threads - 1)/num_threads; // ceil division
#endif // CONTEXT_OPENMP
#pragma omp parallel for //only_for_context cpu_openmp
for (int chunk = 0; chunk < num_threads; chunk++) { //only_for_context cpu_openmp
int64_t part_id = chunk * chunk_size; //only_for_context cpu_openmp
int64_t end_id = (chunk + 1) * chunk_size; //only_for_context cpu_openmp
if (end_id > num_particles_to_track) end_id = num_particles_to_track; //only_for_context cpu_openmp
int64_t part_id = 0; //only_for_context cpu_serial
int64_t part_id = blockDim.x * blockIdx.x + threadIdx.x; //only_for_context cuda
int64_t part_id = get_global_id(0); //only_for_context opencl
int64_t end_id = 0; // unused outside of openmp //only_for_context cpu_serial cuda opencl
int64_t end_id = 0; // unused outside of openmp //only_for_context cpu_serial cuda opencl
LocalParticle lpart;
lpart.io_buffer = io_buffer;
Expand Down

0 comments on commit 4578880

Please sign in to comment.