diff --git a/src/components/ramachandran/ramachandran.cpp b/src/components/ramachandran/ramachandran.cpp index aca802c..81c76c5 100644 --- a/src/components/ramachandran/ramachandran.cpp +++ b/src/components/ramachandran/ramachandran.cpp @@ -1271,11 +1271,14 @@ struct Ramachandran : viamd::EventHandler { uint32_t frame_end; uint32_t frame_stride; float sigma; + md_allocator_i* alloc; }; + md_allocator_i* alloc = md_get_heap_allocator(); + uint64_t tex_size = sizeof(vec4_t) * density_tex_dim * density_tex_dim; uint64_t alloc_size = sizeof(UserData) + tex_size + alignof(vec4_t); - UserData* user_data = (UserData*)md_alloc(md_get_heap_allocator(), sizeof(UserData) + tex_size); + UserData* user_data = (UserData*)md_alloc(alloc, alloc_size); vec4_t* density_tex = (vec4_t*)NEXT_ALIGNED_ADDRESS(user_data + 1, alignof(vec4_t)); memset(density_tex, 0, tex_size); @@ -1291,9 +1294,9 @@ struct Ramachandran : viamd::EventHandler { user_data->frame_end = frame_end; user_data->frame_stride = frame_stride; user_data->sigma = blur_sigma; + user_data->alloc = alloc; - task_system::ID async_task = task_system::create_pool_task(STR_LIT("Rama density"), [](void* user_data) { - UserData* data = (UserData*)user_data; + task_system::ID async_task = task_system::create_pool_task(STR_LIT("Rama density"), [data = user_data]() { const float angle_to_coord_scale = 1.0f / (2.0f * PI); const float angle_to_coord_offset = 0.5f; @@ -1333,13 +1336,12 @@ struct Ramachandran : viamd::EventHandler { data->rep->den_sum[1] = (float)sum[1]; data->rep->den_sum[2] = (float)sum[2]; data->rep->den_sum[3] = (float)sum[3]; - }, user_data); + }); - task_system::ID main_task = task_system::create_main_task(STR_LIT("##Update rama texture"), [](void* user_data) { - UserData* data = (UserData*)user_data; + task_system::ID main_task = task_system::create_main_task(STR_LIT("##Update rama texture"), [data = user_data]() { gl::set_texture_2D_data(data->rep->den_tex, data->density_tex, GL_RGBA32F); - md_free(md_get_heap_allocator(), data, data->alloc_size); - }, user_data); + md_free(data->alloc, data, data->alloc_size); + }); task_system::set_task_dependency(main_task, async_task); task_system::enqueue_task(async_task); diff --git a/src/components/shapespace/shapespace.cpp b/src/components/shapespace/shapespace.cpp index a87c9c3..e7fb2e3 100644 --- a/src/components/shapespace/shapespace.cpp +++ b/src/components/shapespace/shapespace.cpp @@ -379,9 +379,8 @@ struct Shapespace : viamd::EventHandler { md_array_resize(coords, num_frames * num_structures, arena); MEMSET(weights, 0, md_array_bytes(weights)); MEMSET(coords, 0, md_array_bytes(coords)); - evaluate_task = task_system::create_pool_task(STR_LIT("Eval Shape Space"), 0, (uint32_t)num_frames, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + evaluate_task = task_system::create_pool_task(STR_LIT("Eval Shape Space"), (uint32_t)num_frames, [shapespace = this](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - Shapespace* shapespace = (Shapespace*)user_data; ApplicationState* app_state = shapespace->app_state; const size_t stride = ALIGN_TO(app_state->mold.mol.atom.count, 8); const size_t bytes = stride * 3 * sizeof(float); @@ -423,7 +422,7 @@ struct Shapespace : viamd::EventHandler { } } md_array_free(xyzw, md_get_heap_allocator()); - }, this); + }); task_system::enqueue_task(evaluate_task); } diff --git a/src/components/veloxchem/veloxchem.cpp b/src/components/veloxchem/veloxchem.cpp index d415c65..19924a0 100644 --- a/src/components/veloxchem/veloxchem.cpp +++ b/src/components/veloxchem/veloxchem.cpp @@ -805,8 +805,7 @@ struct VeloxChem : viamd::EventHandler { task_system::ID async_task = evaluate_gto_on_grid_async(&payload->args); // Launch task for main (render) thread to update the volume texture - task_system::ID main_task = task_system::create_main_task(STR_LIT("##Update Volume"), [](void* user_data) { - Payload* data = (Payload*)user_data; + task_system::ID main_task = task_system::create_main_task(STR_LIT("##Update Volume"), [data = payload]() { // Ensure that the dimensions of the texture have not changed during evaluation int dim[3]; @@ -816,7 +815,7 @@ struct VeloxChem : viamd::EventHandler { md_free(data->alloc, data->args.gtos, data->args.num_gtos * sizeof(md_gto_t)); md_free(data->alloc, data->mem, data->mem_size); - }, payload); + }); task_system::set_task_dependency(main_task, async_task); task_system::enqueue_task(async_task); @@ -890,9 +889,7 @@ struct VeloxChem : viamd::EventHandler { task_system::ID async_task = evaluate_gto_on_grid_async(&payload->args); // Launch task for main (render) thread to update the volume texture - task_system::ID main_task = task_system::create_main_task(STR_LIT("##Update Volume"), [](void* user_data) { - Payload* data = (Payload*)user_data; - + task_system::ID main_task = task_system::create_main_task(STR_LIT("##Update Volume"), [data = payload]() { // Ensure that the dimensions of the texture have not changed during evaluation int dim[3]; if (gl::get_texture_dim(dim, data->tex) && MEMCMP(dim, data->args.grid.dim, sizeof(dim)) == 0) { @@ -901,7 +898,7 @@ struct VeloxChem : viamd::EventHandler { md_free(data->alloc, data->args.gtos, data->args.num_gtos * sizeof(md_gto_t)); md_free(data->alloc, data->mem, data->mem_size); - }, payload); + }); task_system::set_task_dependency(main_task, async_task); task_system::enqueue_task(async_task); @@ -1006,9 +1003,7 @@ struct VeloxChem : viamd::EventHandler { task_system::ID async_task = evaluate_gto_on_grid_async(&payload->args); // Launch task for main (render) thread to update the volume texture - task_system::ID main_task = task_system::create_main_task(STR_LIT("##Update Volume"), [](void* user_data) { - Payload* data = (Payload*)user_data; - + task_system::ID main_task = task_system::create_main_task(STR_LIT("##Update Volume"), [data = payload]() { // Ensure that the dimensions of the texture have not changed during evaluation int dim[3]; if (gl::get_texture_dim(dim, data->tex) && MEMCMP(dim, data->args.grid.dim, sizeof(dim)) == 0) { @@ -1017,7 +1012,7 @@ struct VeloxChem : viamd::EventHandler { md_free(data->alloc, data->args.gtos, data->args.num_gtos * sizeof(md_gto_t)); md_free(data->alloc, data, data->alloc_size); - }, payload); + }); task_system::set_task_dependency(main_task, async_task); task_system::enqueue_task(async_task); @@ -1554,9 +1549,7 @@ struct VeloxChem : viamd::EventHandler { task_system::ID eval_task = evaluate_gto_on_grid_async(&payload->args); // @TODO: This should be performed as a range task in parallel - task_system::ID segment_task = task_system::create_pool_task(STR_LIT("##Segment Volume"), [](void* user_data) { - Payload* data = (Payload*)user_data; - + task_system::ID segment_task = task_system::create_pool_task(STR_LIT("##Segment Volume"), [data = payload]() { #if DEBUG double sum = 0.0; size_t len = data->args.grid.dim[0] * data->args.grid.dim[1] * data->args.grid.dim[2]; @@ -1572,7 +1565,7 @@ struct VeloxChem : viamd::EventHandler { md_free(data->alloc, data->args.gtos, data->args.num_gtos * sizeof(md_gto_t)); md_free(data->alloc, data, data->alloc_size); - }, payload); + }); task_system::set_task_dependency(segment_task, eval_task); @@ -1592,9 +1585,8 @@ struct VeloxChem : viamd::EventHandler { // We evaluate the in parallel over smaller NxNxN blocks const uint32_t num_blocks = (args->grid.dim[0] / BLK_DIM) * (args->grid.dim[1] / BLK_DIM) * (args->grid.dim[2] / BLK_DIM); - task_system::ID async_task = task_system::create_pool_task(STR_LIT("Evaluate Orbital"), 0, num_blocks, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + task_system::ID async_task = task_system::create_pool_task(STR_LIT("Evaluate Orbital"), num_blocks, [data = args](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - AsyncGridEvalArgs* data = (AsyncGridEvalArgs*)user_data; MD_LOG_DEBUG("Starting async eval of orbital grid [%i][%i][%i]", data->grid.dim[0], data->grid.dim[1], data->grid.dim[2]); // Number of NxNxN blocks in each dimension @@ -1657,7 +1649,7 @@ struct VeloxChem : viamd::EventHandler { } md_temp_set_pos_back(temp_pos); - }, args); + }); return async_task; } @@ -2500,7 +2492,7 @@ struct VeloxChem : viamd::EventHandler { rsp.first_plot_rot_ecd = false; ImGui::TreePop(); } -#if 0 +#if 1 if (rsp.first_plot_vib) { ImGui::SetNextItemOpen(true); } if (ImGui::TreeNode("Vibrational Analysis")) { // draw the vibrational analysis @@ -4484,10 +4476,9 @@ struct VeloxChem : viamd::EventHandler { if (compute_transition_group_values_async(&eval_attach, &seg_attach, nto.transition_density_part, nto.group.count, nto.atom_group_idx, nto.atom_xyzr, nto.num_atoms, nto_idx, MD_VLX_NTO_TYPE_PARTICLE, MD_GTO_EVAL_MODE_PSI_SQUARED, samples_per_angstrom) && compute_transition_group_values_async(&eval_detach, &seg_detach, nto.transition_density_hole, nto.group.count, nto.atom_group_idx, nto.atom_xyzr, nto.num_atoms, nto_idx, MD_VLX_NTO_TYPE_HOLE, MD_GTO_EVAL_MODE_PSI_SQUARED, samples_per_angstrom)) { - task_system::ID compute_matrix_task = task_system::create_main_task(STR_LIT("##Compute Transition Matrix"), [](void* user_data) { - VeloxChem::Nto* nto = (VeloxChem::Nto*)user_data; + task_system::ID compute_matrix_task = task_system::create_main_task(STR_LIT("##Compute Transition Matrix"), [nto = &nto]() { compute_transition_matrix(nto->transition_matrix, nto->group.count, nto->transition_density_hole, nto->transition_density_part); - }, &nto); + }); task_system::set_task_dependency(compute_matrix_task, seg_attach); task_system::set_task_dependency(compute_matrix_task, seg_detach); diff --git a/src/main.cpp b/src/main.cpp index 6cfb584..f3bbf91 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1029,13 +1029,13 @@ int main(int argc, char** argv) { frame_end = (uint32_t)CLAMP((int)data.animation.frame , 0, (int)traj_frames); } if (frame_beg != frame_end) { - data.tasks.prefetch_frames = task_system::create_pool_task(STR_LIT("##Prefetch Frames"), frame_beg, frame_end, [](uint32_t frame_beg, uint32_t frame_end, void* user_data, uint32_t thread_num) { - (void)thread_num; - ApplicationState* data = (ApplicationState*)user_data; - for (uint32_t i = frame_beg; i < frame_end; ++i) { - md_trajectory_load_frame(data->mold.traj, i, 0, 0, 0, 0); + uint32_t frame_count = frame_end - frame_beg; + data.tasks.prefetch_frames = task_system::create_pool_task(STR_LIT("##Prefetch Frames"), frame_count, [&data, frame_offset = frame_beg](uint32_t frame_beg, uint32_t frame_end, uint32_t thread_num) { + (void)thread_num; + for (uint32_t i = frame_offset + frame_beg; i < frame_offset + frame_end; ++i) { + md_trajectory_load_frame(data.mold.traj, i, 0, 0, 0, 0); } - }, &data); + }); task_system::enqueue_task(data.tasks.prefetch_frames); } } @@ -1275,20 +1275,18 @@ int main(int argc, char** argv) { md_script_eval_clear_data(data.script.full_eval); if (md_script_ir_property_count(data.script.eval_ir) > 0) { - data.tasks.evaluate_full = task_system::create_pool_task(STR_LIT("Eval Full"), 0, (uint32_t)num_frames, [](uint32_t frame_beg, uint32_t frame_end, void* user_data, uint32_t thread_num) { + data.tasks.evaluate_full = task_system::create_pool_task(STR_LIT("Eval Full"), (uint32_t)num_frames, [&data](uint32_t frame_beg, uint32_t frame_end, uint32_t thread_num) { (void)thread_num; - ApplicationState* data = (ApplicationState*)user_data; - md_script_eval_frame_range(data->script.full_eval, data->script.eval_ir, &data->mold.mol, data->mold.traj, frame_beg, frame_end); - }, &data); + md_script_eval_frame_range(data.script.full_eval, data.script.eval_ir, &data.mold.mol, data.mold.traj, frame_beg, frame_end); + }); #if MEASURE_EVALUATION_TIME uint64_t time = (uint64_t)md_time_current(); - task_system::ID time_task = task_system::create_pool_task(STR_LIT("##Time Eval Full"), [](void* user_data) { + task_system::ID time_task = task_system::create_pool_task(STR_LIT("##Time Eval Full"), [t0 = time]() { uint64_t t1 = md_time_current(); - uint64_t t0 = (uint64_t)user_data; double s = md_time_as_seconds(t1 - t0); LOG_INFO("Evaluation completed in: %.3fs", s); - }, (void*)time); + }); #endif task_system::set_task_dependency(time_task, data.tasks.evaluate_full); task_system::enqueue_task(data.tasks.evaluate_full); @@ -1312,12 +1310,14 @@ int main(int argc, char** argv) { const uint32_t traj_frames = (uint32_t)md_trajectory_num_frames(data.mold.traj); const uint32_t beg_frame = CLAMP((uint32_t)data.timeline.filter.beg_frame, 0, traj_frames-1); const uint32_t end_frame = CLAMP((uint32_t)data.timeline.filter.end_frame + 1, beg_frame + 1, traj_frames); - data.tasks.evaluate_filt = task_system::create_pool_task(STR_LIT("Eval Filt"), beg_frame, end_frame, [](uint32_t beg, uint32_t end, void* user_data, uint32_t thread_num) { - (void)thread_num; - ApplicationState* data = (ApplicationState*)user_data; - md_script_eval_frame_range(data->script.filt_eval, data->script.eval_ir, &data->mold.mol, data->mold.traj, beg, end); - }, &data); - task_system::enqueue_task(data.tasks.evaluate_filt); + uint32_t num_frames = end_frame - beg_frame; + if (num_frames > 0) { + data.tasks.evaluate_filt = task_system::create_pool_task(STR_LIT("Eval Filt"), num_frames, [offset = beg_frame, &data](uint32_t beg, uint32_t end, uint32_t thread_num) { + (void)thread_num; + md_script_eval_frame_range(data.script.filt_eval, data.script.eval_ir, &data.mold.mol, data.mold.traj, offset + beg, offset + end); + }); + task_system::enqueue_task(data.tasks.evaluate_filt); + } } /* @@ -1999,6 +1999,10 @@ static void interpolate_atomic_properties(ApplicationState* state) { const size_t stride = ALIGN_TO(mol.atom.count, 16); // The interploation uses SIMD vectorization without bounds, so we make sure there is no overlap between the data segments const size_t bytes = stride * sizeof(float) * 3 * 4; + + // The number of atoms to be processed per thread + const size_t chunk_size = 128; + const size_t num_chunks = DIV_UP(mol.atom.count, chunk_size); md_vm_arena_temp_t tmp = md_vm_arena_temp_begin(frame_alloc); defer { md_vm_arena_temp_end(tmp); }; @@ -2016,6 +2020,7 @@ static void interpolate_atomic_properties(ApplicationState* state) { md_trajectory_frame_header_t headers[4]; md_unit_cell_t unit_cell; + size_t chunk_size; size_t count; float* src_x[4]; @@ -2039,6 +2044,7 @@ static void interpolate_atomic_properties(ApplicationState* state) { .mode = mode, .nearest_frame = nearest_frame, .frames = { frames[0], frames[1], frames[2], frames[3]}, + .chunk_size = chunk_size, .count = mol.atom.count, .src_x = { (float*)mem + stride * 0, (float*)mem + stride * 1, (float*)mem + stride * 2, (float*)mem + stride * 3 }, .src_y = { (float*)mem + stride * 4, (float*)mem + stride * 5, (float*)mem + stride * 6, (float*)mem + stride * 7 }, @@ -2056,28 +2062,24 @@ static void interpolate_atomic_properties(ApplicationState* state) { switch (mode) { case InterpolationMode::Nearest: { - task_system::ID load_task = task_system::create_pool_task(STR_LIT("## Load Frame"),[](void* user_data) { - Payload* data = (Payload*)user_data; + task_system::ID load_task = task_system::create_pool_task(STR_LIT("## Load Frame"),[data = &payload]() { md_trajectory_frame_header_t header; md_trajectory_load_frame(data->state->mold.traj, data->nearest_frame, &header, data->dst_x, data->dst_y, data->dst_z); - data->unit_cell = header.unit_cell; - }, &payload); + MEMCPY(&data->unit_cell, &header.unit_cell, sizeof(md_unit_cell_t)); + }); tasks[num_tasks++] = load_task; break; } case InterpolationMode::Linear: { - task_system::ID load_task = task_system::create_pool_task(STR_LIT("## Load Frame"), 0, 2, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + task_system::ID load_task = task_system::create_pool_task(STR_LIT("## Load Frame"), 2, [data = &payload](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - Payload* data = (Payload*)user_data; for (uint32_t i = range_beg; i < range_end; ++i) { md_trajectory_load_frame(data->state->mold.traj, data->frames[i+1], &data->headers[i], data->src_x[i], data->src_y[i], data->src_z[i]); } - }, &payload); - - task_system::ID interp_unit_cell_task = task_system::create_pool_task(STR_LIT("## Interp Unit Cell Data"), [](void* user_data) { - Payload* data = (Payload*)user_data; + }); + task_system::ID interp_unit_cell_task = task_system::create_pool_task(STR_LIT("## Interp Unit Cell Data"), [data = &payload]() { if ((data->headers[0].unit_cell.flags & MD_UNIT_CELL_FLAG_ORTHO) && (data->headers[1].unit_cell.flags & MD_UNIT_CELL_FLAG_ORTHO)) { double ext_x = lerp(data->headers[0].unit_cell.basis[0][0], data->headers[1].unit_cell.basis[0][0], data->t); double ext_y = lerp(data->headers[0].unit_cell.basis[1][1], data->headers[1].unit_cell.basis[1][1], data->t); @@ -2087,29 +2089,23 @@ static void interpolate_atomic_properties(ApplicationState* state) { data->unit_cell.basis = lerp(data->headers[0].unit_cell.basis, data->headers[1].unit_cell.basis, data->t); data->unit_cell.inv_basis = mat3_inverse(data->state->mold.mol.unit_cell.basis); } - }, &payload); + }); task_system::ID interp_coord_task = 0; - if (mol.atom.count > 128) { - interp_coord_task = task_system::create_pool_task(STR_LIT("## Interp Coord Data"), 0, (uint32_t)mol.atom.count, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { - (void)thread_num; - Payload* data = (Payload*)user_data; - size_t count = range_end - range_beg; - float* dst_x = data->dst_x + range_beg; - float* dst_y = data->dst_y + range_beg; - float* dst_z = data->dst_z + range_beg; - const float* src_x[2] = { data->src_x[0] + range_beg, data->src_x[1] + range_beg}; - const float* src_y[2] = { data->src_y[0] + range_beg, data->src_y[1] + range_beg}; - const float* src_z[2] = { data->src_z[0] + range_beg, data->src_z[1] + range_beg}; - - md_util_interpolate_linear(dst_x, dst_y, dst_z, src_x, src_y, src_z, count, &data->unit_cell, data->t); - }, &payload); - } else { - interp_coord_task = task_system::create_pool_task(STR_LIT("## Interp Coord Data"), [](void* user_data) { - Payload* data = (Payload*)user_data; - md_util_interpolate_linear(data->dst_x, data->dst_y, data->dst_z, data->src_x, data->src_y, data->src_z, data->count, &data->unit_cell, data->t); - }, &payload); - } + interp_coord_task = task_system::create_pool_task(STR_LIT("## Interp Coord Data"), (uint32_t)num_chunks, [data = &payload](uint32_t chunk_beg, uint32_t chunk_end, uint32_t thread_num) { + (void)thread_num; + size_t range_beg = chunk_beg * data->chunk_size; + size_t range_end = MIN(chunk_end * data->chunk_size, data->count); + size_t count = range_end - range_beg; + float* dst_x = data->dst_x + range_beg; + float* dst_y = data->dst_y + range_beg; + float* dst_z = data->dst_z + range_beg; + const float* src_x[2] = { data->src_x[0] + range_beg, data->src_x[1] + range_beg}; + const float* src_y[2] = { data->src_y[0] + range_beg, data->src_y[1] + range_beg}; + const float* src_z[2] = { data->src_z[0] + range_beg, data->src_z[1] + range_beg}; + + md_util_interpolate_linear(dst_x, dst_y, dst_z, src_x, src_y, src_z, count, &data->unit_cell, data->t); + }); tasks[num_tasks++] = load_task; tasks[num_tasks++] = interp_unit_cell_task; tasks[num_tasks++] = interp_coord_task; @@ -2117,17 +2113,14 @@ static void interpolate_atomic_properties(ApplicationState* state) { break; } case InterpolationMode::CubicSpline: { - task_system::ID load_task = task_system::create_pool_task(STR_LIT("## Load Frame"), 0, 4, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + task_system::ID load_task = task_system::create_pool_task(STR_LIT("## Load Frame"), 4, [data = &payload](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - Payload* data = (Payload*)user_data; for (uint32_t i = range_beg; i < range_end; ++i) { md_trajectory_load_frame(data->state->mold.traj, data->frames[i], &data->headers[i], data->src_x[i], data->src_y[i], data->src_z[i]); } - }, &payload); - - task_system::ID interp_unit_cell_task = task_system::create_pool_task(STR_LIT("## Interp Unit Cell Data"), [](void* user_data) { - Payload* data = (Payload*)user_data; + }); + task_system::ID interp_unit_cell_task = task_system::create_pool_task(STR_LIT("## Interp Unit Cell Data"), [data = &payload]() { if ((data->headers[0].unit_cell.flags & MD_UNIT_CELL_FLAG_ORTHO) && (data->headers[1].unit_cell.flags & MD_UNIT_CELL_FLAG_ORTHO) && (data->headers[2].unit_cell.flags & MD_UNIT_CELL_FLAG_ORTHO) && @@ -2145,29 +2138,23 @@ static void interpolate_atomic_properties(ApplicationState* state) { data->unit_cell.basis = cubic_spline(data->headers[0].unit_cell.basis, data->headers[1].unit_cell.basis, data->headers[2].unit_cell.basis, data->headers[3].unit_cell.basis, data->t, data->s); data->unit_cell.inv_basis = mat3_inverse(data->state->mold.mol.unit_cell.basis); } - }, &payload); + }); task_system::ID interp_coord_task = 0; - if (mol.atom.count > 128) { - interp_coord_task = task_system::create_pool_task(STR_LIT("## Interp Coord Data"), 0, (uint32_t)mol.atom.count, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { - (void)thread_num; - Payload* data = (Payload*)user_data; - size_t count = range_end - range_beg; - float* dst_x = data->dst_x + range_beg; - float* dst_y = data->dst_y + range_beg; - float* dst_z = data->dst_z + range_beg; - const float* src_x[4] = { data->src_x[0] + range_beg, data->src_x[1] + range_beg, data->src_x[2] + range_beg, data->src_x[3] + range_beg}; - const float* src_y[4] = { data->src_y[0] + range_beg, data->src_y[1] + range_beg, data->src_y[2] + range_beg, data->src_y[3] + range_beg}; - const float* src_z[4] = { data->src_z[0] + range_beg, data->src_z[1] + range_beg, data->src_z[2] + range_beg, data->src_z[3] + range_beg}; - - md_util_interpolate_cubic_spline(dst_x, dst_y, dst_z, src_x, src_y, src_z, count, &data->unit_cell, data->t, data->s); - }, &payload); - } else { - interp_coord_task = task_system::create_pool_task(STR_LIT("## Interp Coord Data"), [](void* user_data) { - Payload* data = (Payload*)user_data; - md_util_interpolate_cubic_spline(data->dst_x, data->dst_y, data->dst_z, data->src_x, data->src_y, data->src_z, data->count, &data->unit_cell, data->t, data->s); - }, &payload); - } + interp_coord_task = task_system::create_pool_task(STR_LIT("## Interp Coord Data"), (uint32_t)num_chunks, [data = &payload](uint32_t chunk_beg, uint32_t chunk_end, uint32_t thread_num) { + (void)thread_num; + size_t range_beg = chunk_beg * chunk_size; + size_t range_end = MIN(chunk_end * chunk_size, data->count); + size_t count = range_end - range_beg; + float* dst_x = data->dst_x + range_beg; + float* dst_y = data->dst_y + range_beg; + float* dst_z = data->dst_z + range_beg; + const float* src_x[4] = { data->src_x[0] + range_beg, data->src_x[1] + range_beg, data->src_x[2] + range_beg, data->src_x[3] + range_beg}; + const float* src_y[4] = { data->src_y[0] + range_beg, data->src_y[1] + range_beg, data->src_y[2] + range_beg, data->src_y[3] + range_beg}; + const float* src_z[4] = { data->src_z[0] + range_beg, data->src_z[1] + range_beg, data->src_z[2] + range_beg, data->src_z[3] + range_beg}; + + md_util_interpolate_cubic_spline(dst_x, dst_y, dst_z, src_x, src_y, src_z, count, &data->unit_cell, data->t, data->s); + }); tasks[num_tasks++] = load_task; tasks[num_tasks++] = interp_unit_cell_task; @@ -2181,35 +2168,31 @@ static void interpolate_atomic_properties(ApplicationState* state) { } if (state->operations.apply_pbc) { - task_system::ID pbc_task = task_system::create_pool_task(STR_LIT("## Apply PBC"), 0, (uint32_t)mol.atom.count, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + task_system::ID pbc_task = task_system::create_pool_task(STR_LIT("## Apply PBC"), (uint32_t)mol.atom.count, [data = &payload](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - Payload* data = (Payload*)user_data; size_t count = range_end - range_beg; float* x = data->dst_x + range_beg; float* y = data->dst_y + range_beg; float* z = data->dst_z + range_beg; md_util_pbc(x, y, z, 0, count, &data->unit_cell); - }, &payload); + }); tasks[num_tasks++] = pbc_task; } if (state->operations.unwrap_structures) { size_t num_structures = md_index_data_num_ranges(mol.structure); - task_system::ID unwrap_task = task_system::create_pool_task(STR_LIT("## Unwrap Structures"), 0, (uint32_t)num_structures, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + task_system::ID unwrap_task = task_system::create_pool_task(STR_LIT("## Unwrap Structures"), (uint32_t)num_structures, [data = &payload](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - Payload* data = (Payload*)user_data; for (uint32_t i = range_beg; i < range_end; ++i) { int32_t* s_idx = md_index_range_beg(data->state->mold.mol.structure, i); size_t s_len = md_index_range_size(data->state->mold.mol.structure, i); md_util_unwrap(data->dst_x, data->dst_y, data->dst_z, s_idx, s_len, &data->unit_cell); } - }, &payload); + }); tasks[num_tasks++] = unwrap_task; } { - task_system::ID aabb_task = task_system::create_pool_task(STR_LIT("## Compute AABB"), 0, (uint32_t)mol.atom.count, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { - Payload* data = (Payload*)user_data; - + task_system::ID aabb_task = task_system::create_pool_task(STR_LIT("## Compute AABB"), (uint32_t)mol.atom.count, [data = &payload](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { size_t count = range_end - range_beg; const float* x = data->state->mold.mol.atom.x + range_beg; const float* y = data->state->mold.mol.atom.y + range_beg; @@ -2222,32 +2205,28 @@ static void interpolate_atomic_properties(ApplicationState* state) { data->aabb_min[thread_num] = aabb_min; data->aabb_max[thread_num] = aabb_max; - }, &payload); - + }); tasks[num_tasks++] = aabb_task; - // md_util_aabb_compute(state->mold.mol_aabb_min.elem, state->mold.mol_aabb_max.elem, mol.atom.x, mol.atom.y, mol.atom.z, mol.atom.radius, 0, mol.atom.count); } if (mol.protein_backbone.angle) { switch (mode) { case InterpolationMode::Nearest: { - task_system::ID angle_task = task_system::create_pool_task(STR_LIT("## Compute Backbone Angles"), [](void* user_data) { - Payload* data = (Payload*)user_data; + task_system::ID angle_task = task_system::create_pool_task(STR_LIT("## Compute Backbone Angles"), [data = &payload]() { const md_backbone_angles_t* src_angles[2] = { data->state->trajectory_data.backbone_angles.data + data->state->trajectory_data.backbone_angles.stride * data->frames[1], data->state->trajectory_data.backbone_angles.data + data->state->trajectory_data.backbone_angles.stride * data->frames[2], }; const md_backbone_angles_t* src_angle = data->t < 0.5f ? src_angles[0] : src_angles[1]; MEMCPY(data->state->mold.mol.protein_backbone.angle, src_angle, data->state->mold.mol.protein_backbone.count * sizeof(md_backbone_angles_t)); - }, &payload); + }); tasks[num_tasks++] = angle_task; break; } case InterpolationMode::Linear: { - task_system::ID angle_task = task_system::create_pool_task(STR_LIT("## Compute Backbone Angles"), 0, (uint32_t)mol.protein_backbone.count, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + task_system::ID angle_task = task_system::create_pool_task(STR_LIT("## Compute Backbone Angles"), (uint32_t)mol.protein_backbone.count, [data = &payload](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - Payload* data = (Payload*)user_data; const md_backbone_angles_t* src_angles[2] = { data->state->trajectory_data.backbone_angles.data + data->state->trajectory_data.backbone_angles.stride * data->frames[1], data->state->trajectory_data.backbone_angles.data + data->state->trajectory_data.backbone_angles.stride * data->frames[2], @@ -2264,15 +2243,14 @@ static void interpolate_atomic_properties(ApplicationState* state) { float final_psi = lerp(psi[0], psi[1], data->t); mol.protein_backbone.angle[i] = {deperiodizef(final_phi, 0, (float)TWO_PI), deperiodizef(final_psi, 0, (float)TWO_PI)}; } - }, &payload); + }); tasks[num_tasks++] = angle_task; break; } case InterpolationMode::CubicSpline: { - task_system::ID angle_task = task_system::create_pool_task(STR_LIT("## Interpolate Backbone Angles"), 0, (uint32_t)mol.protein_backbone.count, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + task_system::ID angle_task = task_system::create_pool_task(STR_LIT("## Interpolate Backbone Angles"), (uint32_t)mol.protein_backbone.count, [data = &payload](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - Payload* data = (Payload*)user_data; const md_backbone_angles_t* src_angles[4] = { data->state->trajectory_data.backbone_angles.data + data->state->trajectory_data.backbone_angles.stride * data->frames[0], data->state->trajectory_data.backbone_angles.data + data->state->trajectory_data.backbone_angles.stride * data->frames[1], @@ -2296,7 +2274,7 @@ static void interpolate_atomic_properties(ApplicationState* state) { float final_psi = cubic_spline(psi[0], psi[1], psi[2], psi[3], data->t, data->s); mol.protein_backbone.angle[i] = {deperiodizef(final_phi, 0, (float)TWO_PI), deperiodizef(final_psi, 0, (float)TWO_PI)}; } - }, &payload); + }); tasks[num_tasks++] = angle_task; break; @@ -2310,23 +2288,21 @@ static void interpolate_atomic_properties(ApplicationState* state) { if (mol.protein_backbone.secondary_structure) { switch (mode) { case InterpolationMode::Nearest: { - task_system::ID ss_task = task_system::create_pool_task(STR_LIT("## Interpolate Secondary Structures"), [](void* user_data) { - Payload* data = (Payload*)user_data; + task_system::ID ss_task = task_system::create_pool_task(STR_LIT("## Interpolate Secondary Structures"), [data = &payload]() { const md_secondary_structure_t* src_ss[2] = { (md_secondary_structure_t*)data->state->trajectory_data.secondary_structure.data + data->state->trajectory_data.secondary_structure.stride * data->frames[1], (md_secondary_structure_t*)data->state->trajectory_data.secondary_structure.data + data->state->trajectory_data.secondary_structure.stride * data->frames[2], }; const md_secondary_structure_t* ss = data->t < 0.5f ? src_ss[0] : src_ss[1]; MEMCPY(data->state->mold.mol.protein_backbone.secondary_structure, ss, data->state->mold.mol.protein_backbone.count * sizeof(md_secondary_structure_t)); - }, &payload); + }); tasks[num_tasks++] = ss_task; break; } case InterpolationMode::Linear: { - task_system::ID ss_task = task_system::create_pool_task(STR_LIT("## Interpolate Secondary Structures"), 0, (uint32_t)mol.protein_backbone.count, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + task_system::ID ss_task = task_system::create_pool_task(STR_LIT("## Interpolate Secondary Structures"), (uint32_t)mol.protein_backbone.count, [data = &payload](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - Payload* data = (Payload*)user_data; const md_secondary_structure_t* src_ss[2] = { (md_secondary_structure_t*)data->state->trajectory_data.secondary_structure.data + data->state->trajectory_data.secondary_structure.stride * data->frames[1], (md_secondary_structure_t*)data->state->trajectory_data.secondary_structure.data + data->state->trajectory_data.secondary_structure.stride * data->frames[2], @@ -2339,15 +2315,14 @@ static void interpolate_atomic_properties(ApplicationState* state) { const vec4_t ss_res = vec4_lerp(ss_f[0], ss_f[1], data->t); data->state->mold.mol.protein_backbone.secondary_structure[i] = (md_secondary_structure_t)convert_color(ss_res); } - }, &payload); + }); tasks[num_tasks++] = ss_task; break; } case InterpolationMode::CubicSpline: { - task_system::ID ss_task = task_system::create_pool_task(STR_LIT("## Interpolate Secondary Structures"), 0, (uint32_t)mol.protein_backbone.count, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + task_system::ID ss_task = task_system::create_pool_task(STR_LIT("## Interpolate Secondary Structures"), (uint32_t)mol.protein_backbone.count, [data = &payload](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - Payload* data = (Payload*)user_data; const md_secondary_structure_t* src_ss[4] = { (md_secondary_structure_t*)data->state->trajectory_data.secondary_structure.data + data->state->trajectory_data.secondary_structure.stride * data->frames[0], (md_secondary_structure_t*)data->state->trajectory_data.secondary_structure.data + data->state->trajectory_data.secondary_structure.stride * data->frames[1], @@ -2364,7 +2339,7 @@ static void interpolate_atomic_properties(ApplicationState* state) { const vec4_t ss_res = cubic_spline(ss_f[0], ss_f[1], ss_f[2], ss_f[3], data->t, data->s); data->state->mold.mol.protein_backbone.secondary_structure[i] = (md_secondary_structure_t)convert_color(ss_res); } - }, &payload); + }); tasks[num_tasks++] = ss_task; break; @@ -7285,10 +7260,8 @@ static void init_trajectory_data(ApplicationState* data) { // Launch work to compute the values task_system::task_interrupt_and_wait_for(data->tasks.backbone_computations); - data->tasks.backbone_computations = task_system::create_pool_task(STR_LIT("Backbone Operations"), 0, (uint32_t)num_frames, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + data->tasks.backbone_computations = task_system::create_pool_task(STR_LIT("Backbone Operations"), (uint32_t)num_frames, [data](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - ApplicationState* data = (ApplicationState*)user_data; - // Create copy here of molecule since we use the full structure as input md_molecule_t mol = data->mold.mol; @@ -7306,10 +7279,9 @@ static void init_trajectory_data(ApplicationState* data) { md_util_backbone_angles_compute(data->trajectory_data.backbone_angles.data + data->trajectory_data.backbone_angles.stride * frame_idx, data->trajectory_data.backbone_angles.stride, &mol); md_util_backbone_secondary_structure_compute(data->trajectory_data.secondary_structure.data + data->trajectory_data.secondary_structure.stride * frame_idx, data->trajectory_data.secondary_structure.stride, &mol); } - }, data); + }); - task_system::ID main_task = task_system::create_main_task(STR_LIT("Update Trajectory Data"), [](void* user_data) { - ApplicationState* data = (ApplicationState*)user_data; + task_system::ID main_task = task_system::create_main_task(STR_LIT("Update Trajectory Data"), [data]() { data->trajectory_data.backbone_angles.fingerprint = generate_fingerprint(); data->trajectory_data.secondary_structure.fingerprint = generate_fingerprint(); @@ -7317,7 +7289,7 @@ static void init_trajectory_data(ApplicationState* data) { data->mold.dirty_buffers |= MolBit_ClearVelocity; update_all_representations(data); - }, data); + }); task_system::set_task_dependency(main_task, data->tasks.backbone_computations); task_system::enqueue_task(data->tasks.backbone_computations); @@ -7424,20 +7396,18 @@ static void launch_prefetch_job(ApplicationState* data) { if (!num_frames) return; task_system::task_interrupt_and_wait_for(data->tasks.prefetch_frames); - data->tasks.prefetch_frames = task_system::create_pool_task(STR_LIT("Prefetch Frames"), 0, num_frames, [](uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num) { + data->tasks.prefetch_frames = task_system::create_pool_task(STR_LIT("Prefetch Frames"), num_frames, [data](uint32_t range_beg, uint32_t range_end, uint32_t thread_num) { (void)thread_num; - ApplicationState* data = (ApplicationState*)user_data; for (uint32_t i = range_beg; i < range_end; ++i) { md_trajectory_frame_header_t header; md_trajectory_load_frame(data->mold.traj, i, &header, 0, 0, 0); } - }, data); + }); - task_system::ID main_task = task_system::create_main_task(STR_LIT("Prefetch Complete"), [](void* user_data) { - ApplicationState* data = (ApplicationState*)user_data; + task_system::ID main_task = task_system::create_main_task(STR_LIT("Prefetch Complete"), [data]() { interpolate_atomic_properties(data); data->mold.dirty_buffers |= MolBit_ClearVelocity; - }, data); + }); task_system::set_task_dependency(main_task, data->tasks.prefetch_frames); task_system::enqueue_task(data->tasks.prefetch_frames); diff --git a/src/task_system.cpp b/src/task_system.cpp index 0081100..6e7cd2a 100644 --- a/src/task_system.cpp +++ b/src/task_system.cpp @@ -54,17 +54,17 @@ struct CompletionActionFreePoolSlot : public enki::ICompletable { class AsyncTask : public enki::ITaskSet { public: AsyncTask() = default; - AsyncTask(uint32_t set_beg_, uint32_t set_end_, RangeTask set_func_, void* user_data_, str_t lbl_ = {}, ID id = INVALID_ID) - : ITaskSet(set_end_-set_beg_), m_set_func(set_func_), m_user_data(user_data_), m_range_offset(set_beg_), m_set_complete(0), m_interrupt(false), m_dependency(), m_id(id) { - size_t len = str_copy_to_char_buf(m_buf, sizeof(m_buf), lbl_); + AsyncTask(uint32_t set_size, RangeTask set_func, str_t lbl = {}, ID id = INVALID_ID, uint32_t grain_size = 1) + : ITaskSet(set_size, grain_size), m_set_func(set_func), m_set_complete(0), m_interrupt(false), m_dependency(), m_id(id), m_grain_size(grain_size) { + size_t len = str_copy_to_char_buf(m_buf, sizeof(m_buf), lbl); m_label = {m_buf, len}; m_completion_action.m_slot_idx = get_slot_idx(id); m_completion_action.SetDependency(m_completion_action.m_dependency, this); } - AsyncTask(Task func_, void* user_data_, str_t lbl_ = {}, ID id = INVALID_ID) - : ITaskSet(1), m_func(func_), m_user_data(user_data_), m_set_complete(0), m_interrupt(false), m_dependency(), m_id(id) { - size_t len = str_copy_to_char_buf(m_buf, sizeof(m_buf), lbl_); + AsyncTask(Task func, str_t lbl = {}, ID id = INVALID_ID) + : ITaskSet(1), m_func(func), m_set_complete(0), m_interrupt(false), m_dependency(), m_id(id) { + size_t len = str_copy_to_char_buf(m_buf, sizeof(m_buf), lbl); m_label = {m_buf, len}; m_completion_action.m_slot_idx = get_slot_idx(id); m_completion_action.SetDependency(m_completion_action.m_dependency, this); @@ -74,11 +74,11 @@ class AsyncTask : public enki::ITaskSet { (void)threadnum; if (!m_interrupt) { if (m_set_func) { - m_set_func(m_range_offset + range.start, m_range_offset + range.end, m_user_data, threadnum); + m_set_func(range.start, range.end, threadnum); m_set_complete += (range.end - range.start); } else if (m_func) { - m_func(m_user_data); + m_func(); m_set_complete += 1; } } @@ -90,8 +90,8 @@ class AsyncTask : public enki::ITaskSet { RangeTask m_set_func = nullptr; // either of these two are executed Task m_func = nullptr; - void* m_user_data = nullptr; uint32_t m_range_offset = 0; + uint32_t m_grain_size = 1; std::atomic_uint32_t m_set_complete = 0; std::atomic_bool m_interrupt = false; enki::Dependency m_dependency = {}; @@ -104,18 +104,18 @@ class AsyncTask : public enki::ITaskSet { class MainTask : public enki::IPinnedTask { public: MainTask() = default; - MainTask(Task func, void* user_data, str_t lbl = {}, ID id = INVALID_ID) : - IPinnedTask(0), m_function(func), m_user_data(user_data), m_id(id) { + MainTask(Task func, str_t lbl = {}, ID id = INVALID_ID) : + IPinnedTask(0), m_function(func), m_id(id) { size_t len = MIN(lbl.len, LABEL_SIZE-1); m_label = {strncpy(m_buf, lbl.ptr, len), len}; } + void Execute() final { - m_function(m_user_data); + m_function(); main::free_slots.push(get_slot_idx(m_id)); } Task m_function = nullptr; - void* m_user_data = nullptr; enki::Dependency m_dependency = {}; char m_buf[LABEL_SIZE] = ""; str_t m_label = {}; @@ -153,27 +153,27 @@ void initialize(size_t num_threads = 0) { void shutdown() { ts.WaitforAllAndShutdown(); } -ID create_main_task(str_t label, Task func, void* user_data) { +ID create_main_task(str_t label, Task func) { const uint32_t idx = main::free_slots.pop(); ID id = generate_id(idx); MainTask* task = &main::task_data[idx]; - PLACEMENT_NEW(task) MainTask(func, user_data, label, id); + PLACEMENT_NEW(task) MainTask(func, label, id); return id; } -ID create_pool_task(str_t label, Task func, void* user_data) { +ID create_pool_task(str_t label, Task func) { const uint32_t idx = pool::free_slots.pop(); ID id = generate_id(idx); AsyncTask* task = &pool::task_data[idx]; - PLACEMENT_NEW(task) AsyncTask(func, user_data, label, id); + PLACEMENT_NEW(task) AsyncTask(func, label, id); return id; } -ID create_pool_task(str_t label, uint32_t range_beg, uint32_t range_end, RangeTask func, void* user_data) { +ID create_pool_task(str_t label, uint32_t range_size, RangeTask func) { const uint32_t idx = pool::free_slots.pop(); ID id = generate_id(idx); AsyncTask* task = &pool::task_data[idx]; - PLACEMENT_NEW(task) AsyncTask(range_beg, range_end, func, user_data, label, id); + PLACEMENT_NEW(task) AsyncTask(range_size, func, label, id); return id; } diff --git a/src/task_system.h b/src/task_system.h index 4abc8e0..5af85a5 100644 --- a/src/task_system.h +++ b/src/task_system.h @@ -4,22 +4,19 @@ #include #include -//#include +#include namespace task_system { typedef uint64_t ID; constexpr ID INVALID_ID = 0; -//using Task = std::function; -//using RangeTask = std::function; +using Task = std::function; +using RangeTask = std::function; +/* using Task = void (*)(void* user_data); using RangeTask = void (*)(uint32_t range_beg, uint32_t range_end, void* user_data, uint32_t thread_num); - -/* -typedef void (*Task) (void* user_data); -typedef void (*RangeTask)(uint32_t range_beg, uint32_t range_end, void *user_data); */ void initialize(size_t num_threads); @@ -29,9 +26,9 @@ void shutdown(); // Pool tasks will not stall the main thread. //void execute_queued_tasks(); -ID create_main_task(str_t label, Task task, void* user_data = 0); -ID create_pool_task(str_t label, Task task, void* user_data = 0); -ID create_pool_task(str_t label, uint32_t range_beg, uint32_t range_end, RangeTask task, void* user_data = 0); +ID create_main_task(str_t label, Task task); +ID create_pool_task(str_t label, Task task); +ID create_pool_task(str_t label, uint32_t range_size, RangeTask task); // Sets a dependency for a task such that the task will only be executed upon the completion of 'dependency' void set_task_dependency(ID task, ID dependency);