From 8cbb183e407b87fc7f45c87537e4618746f46c3b Mon Sep 17 00:00:00 2001 From: "Maarten L. Hekkelman" Date: Thu, 14 Dec 2023 08:14:52 +0100 Subject: [PATCH 1/4] optimise isunique --- CMakeLists.txt | 381 +++++++++++++++++++++++----------------------- src/alphafill.cpp | 110 ++++++++----- 2 files changed, 262 insertions(+), 229 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6450afe..b20bb4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,10 +6,10 @@ # modification, are permitted provided that the following conditions are met: # 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer +# list of conditions and the following disclaimer # 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE @@ -26,9 +26,9 @@ cmake_minimum_required(VERSION 3.21) # set the project name project( - alphafill - VERSION 2.1.0 - LANGUAGES C CXX) + alphafill + VERSION 2.1.1 + LANGUAGES C CXX) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") @@ -50,41 +50,41 @@ option(BUILD_DOCUMENTATION "Build the documentation" OFF) option(BUILD_WEB_APPLICATION "Build the web application" OFF) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers" - ) + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers" + ) elseif(MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") endif() if(WIN32) - if(${CMAKE_SYSTEM_VERSION} GREATER_EQUAL 10) # Windows 10 - add_definitions(-D _WIN32_WINNT=0x0A00) - elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.3) # Windows 8.1 - add_definitions(-D _WIN32_WINNT=0x0603) - elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.2) # Windows 8 - add_definitions(-D _WIN32_WINNT=0x0602) - elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.1) # Windows 7 - add_definitions(-D _WIN32_WINNT=0x0601) - elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.0) # Windows Vista - add_definitions(-D _WIN32_WINNT=0x0600) - else() # Windows XP (5.1) - add_definitions(-D _WIN32_WINNT=0x0501) - endif() - - add_definitions(-DNOMINMAX) - - # We do not want to write an export file for all our symbols... - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + if(${CMAKE_SYSTEM_VERSION} GREATER_EQUAL 10) # Windows 10 + add_definitions(-D _WIN32_WINNT=0x0A00) + elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.3) # Windows 8.1 + add_definitions(-D _WIN32_WINNT=0x0603) + elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.2) # Windows 8 + add_definitions(-D _WIN32_WINNT=0x0602) + elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.1) # Windows 7 + add_definitions(-D _WIN32_WINNT=0x0601) + elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.0) # Windows Vista + add_definitions(-D _WIN32_WINNT=0x0600) + else() # Windows XP (5.1) + add_definitions(-D _WIN32_WINNT=0x0501) + endif() + + add_definitions(-DNOMINMAX) + + # We do not want to write an export file for all our symbols... + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) endif() if(MSVC) - # make msvc standards compliant... - add_compile_options(/permissive- /bigobj) - add_link_options(/NODEFAULTLIB:library) + # make msvc standards compliant... + add_compile_options(/permissive- /bigobj) + add_link_options(/NODEFAULTLIB:library) - # static runtime - set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") + # static runtime + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") endif() # Create a revision file, containing the current git version info @@ -95,9 +95,9 @@ write_version_header("${PROJECT_SOURCE_DIR}/src/") find_package(Mrc QUIET) if(MRC_FOUND) - option(USE_RSRC "Use mrc to create resources" ON) + option(USE_RSRC "Use mrc to create resources" ON) else() - message(STATUS "Not using resources since mrc was not found") + message(STATUS "Not using resources since mrc was not found") endif() # push the BUILD_TESTING flag @@ -107,185 +107,190 @@ set(BUILD_TESTING OFF) find_package(cifpp 6 QUIET) if(NOT cifpp_FOUND) - FetchContent_Declare( - cifpp - GIT_REPOSITORY https://github.com/PDB-REDO/libcifpp.git - GIT_TAG b14237e) - FetchContent_MakeAvailable(cifpp) + FetchContent_Declare( + cifpp + GIT_REPOSITORY https://github.com/PDB-REDO/libcifpp.git + GIT_TAG b14237e) + FetchContent_MakeAvailable(cifpp) - set(CIFPP_SHARE_DIR ${cifpp_SOURCE_DIR}/rsrc) + set(CIFPP_SHARE_DIR ${cifpp_SOURCE_DIR}/rsrc) endif() find_package(libmcfp 1.2.4 QUIET) if(NOT libmcfp_FOUND) - FetchContent_Declare( - libmcfp - GIT_REPOSITORY https://github.com/mhekkel/libmcfp.git - GIT_TAG v1.2.4) - FetchContent_MakeAvailable(libmcfp) + FetchContent_Declare( + libmcfp + GIT_REPOSITORY https://github.com/mhekkel/libmcfp.git + GIT_TAG v1.2.4) + FetchContent_MakeAvailable(libmcfp) endif() find_package(zeep 6.0.11 QUIET) if(NOT zeep_FOUND) - FetchContent_Declare( - zeep - GIT_REPOSITORY https://github.com/mhekkel/libzeep.git - GIT_TAG v6.0.11) - FetchContent_MakeAvailable(zeep) + FetchContent_Declare( + zeep + GIT_REPOSITORY https://github.com/mhekkel/libzeep.git + GIT_TAG v6.0.11) + FetchContent_MakeAvailable(zeep) endif() if(BUILD_WEB_APPLICATION) - find_package(libpqxx 7.8.0 QUIET) - - if(NOT libpqxx_FOUND) - FetchContent_Declare( - libpqxx - GIT_REPOSITORY https://github.com/jtv/libpqxx - GIT_TAG 7.8.0) - FetchContent_MakeAvailable(libpqxx) - endif() - - find_package(OpenSSL REQUIRED) - - find_program(YARN yarn REQUIRED) - - # Make sure yarn is initialised - add_custom_command( - OUTPUT ${PROJECT_SOURCE_DIR}/node_modules - COMMAND ${YARN} - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) - - # yarn rules for javascripts - set(webpack_input - ${PROJECT_SOURCE_DIR}/webapp/index.js - ${PROJECT_SOURCE_DIR}/webapp/model.js - ${PROJECT_SOURCE_DIR}/webapp/optimized.js - ${PROJECT_SOURCE_DIR}/webapp/lists.js - ${PROJECT_SOURCE_DIR}/webapp/wait.js - ${PROJECT_SOURCE_DIR}/webapp/molstar.tsx - ${PROJECT_SOURCE_DIR}/scss/pdb-redo-bootstrap.scss) - - set(webpack_output ${PROJECT_SOURCE_DIR}/docroot/scripts/index.js) - - add_custom_command( - OUTPUT ${webpack_output} - BYPRODUCTS - DEPENDS ${webpack_input} - COMMAND ${YARN} run $,build,build-production> - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) - - add_custom_target(webpack DEPENDS ${PROJECT_SOURCE_DIR}/node_modules - ${webpack_output}) + find_package(libpqxx 7.8.0 QUIET) + + if(NOT libpqxx_FOUND) + FetchContent_Declare( + libpqxx + GIT_REPOSITORY https://github.com/jtv/libpqxx + GIT_TAG 7.8.0) + FetchContent_MakeAvailable(libpqxx) + endif() + + find_package(OpenSSL REQUIRED) + + find_program(YARN yarn REQUIRED) + + # Make sure yarn is initialised + add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/node_modules + COMMAND ${YARN} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + + # yarn rules for javascripts + set(webpack_input + ${PROJECT_SOURCE_DIR}/webapp/index.js + ${PROJECT_SOURCE_DIR}/webapp/model.js + ${PROJECT_SOURCE_DIR}/webapp/optimized.js + ${PROJECT_SOURCE_DIR}/webapp/lists.js + ${PROJECT_SOURCE_DIR}/webapp/wait.js + ${PROJECT_SOURCE_DIR}/webapp/molstar.tsx + ${PROJECT_SOURCE_DIR}/scss/pdb-redo-bootstrap.scss) + + set(webpack_output ${PROJECT_SOURCE_DIR}/docroot/scripts/index.js) + + add_custom_command( + OUTPUT ${webpack_output} + BYPRODUCTS + DEPENDS ${webpack_input} + COMMAND ${YARN} run $,build,build-production> + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + + add_custom_target(webpack DEPENDS ${PROJECT_SOURCE_DIR}/node_modules + ${webpack_output}) endif() set(CONFIG_PATH "${CMAKE_INSTALL_FULL_SYSCONFDIR}") set(ALPHAFILL_DATA_DIR - "${CMAKE_INSTALL_FULL_DATADIR}/alphafill" - CACHE PATH "The directory where AlphaFill data files are to be stored") + "${CMAKE_INSTALL_FULL_DATADIR}/alphafill" + CACHE PATH "The directory where AlphaFill data files are to be stored") configure_file(${PROJECT_SOURCE_DIR}/src/config.hpp.in - ${PROJECT_SOURCE_DIR}/src/config.hpp) + ${PROJECT_SOURCE_DIR}/src/config.hpp) configure_file(${PROJECT_SOURCE_DIR}/alphafill.conf-dist.in alphafill.conf-dist - @ONLY) + @ONLY) # pop the BUILD_TESTING flag set(BUILD_TESTING ${SAVE_BUILD_TESTING}) list( - APPEND - SOURCES - ${PROJECT_SOURCE_DIR}/src/config.hpp - ${PROJECT_SOURCE_DIR}/src/alphafill.cpp - ${PROJECT_SOURCE_DIR}/src/blast.cpp - ${PROJECT_SOURCE_DIR}/src/ligands.cpp - ${PROJECT_SOURCE_DIR}/src/matrix.cpp - ${PROJECT_SOURCE_DIR}/src/main.cpp - ${PROJECT_SOURCE_DIR}/src/utilities.cpp - ${PROJECT_SOURCE_DIR}/src/validate.cpp) + APPEND + SOURCES + ${PROJECT_SOURCE_DIR}/src/config.hpp + ${PROJECT_SOURCE_DIR}/src/alphafill.cpp + ${PROJECT_SOURCE_DIR}/src/blast.cpp + ${PROJECT_SOURCE_DIR}/src/ligands.cpp + ${PROJECT_SOURCE_DIR}/src/matrix.cpp + ${PROJECT_SOURCE_DIR}/src/main.cpp + ${PROJECT_SOURCE_DIR}/src/utilities.cpp + ${PROJECT_SOURCE_DIR}/src/validate.cpp) if(BUILD_WEB_APPLICATION) - list( - APPEND - SOURCES - ${PROJECT_SOURCE_DIR}/src/bsd-closefrom.c - ${PROJECT_SOURCE_DIR}/src/db-connection.cpp - ${PROJECT_SOURCE_DIR}/src/data-service.cpp - ${PROJECT_SOURCE_DIR}/src/server.cpp - ${PROJECT_SOURCE_DIR}/src/structure.cpp - ${PROJECT_SOURCE_DIR}/src/https-client.cpp) + list( + APPEND + SOURCES + ${PROJECT_SOURCE_DIR}/src/bsd-closefrom.c + ${PROJECT_SOURCE_DIR}/src/db-connection.cpp + ${PROJECT_SOURCE_DIR}/src/data-service.cpp + ${PROJECT_SOURCE_DIR}/src/server.cpp + ${PROJECT_SOURCE_DIR}/src/structure.cpp + ${PROJECT_SOURCE_DIR}/src/https-client.cpp) endif() # The alphafill executable add_executable(alphafill ${SOURCES}) find_file(HAVE_DIRENT_H dirent.h) + if(HAVE_DIRENT_H) - target_compile_definitions(alphafill PUBLIC HAVE_DIRENT_H) + target_compile_definitions(alphafill PUBLIC HAVE_DIRENT_H) endif() find_file(HAVE_FCNTL_H fcntl.h) + if(HAVE_FCNTL_H) - target_compile_definitions(alphafill PUBLIC HAVE_FCNTL_H) + target_compile_definitions(alphafill PUBLIC HAVE_FCNTL_H) endif() check_function_exists(dirfd HAVE_DIRFD) + if(HAVE_DIRFD) - target_compile_definitions(alphafill PUBLIC HAVE_DIRFD) + target_compile_definitions(alphafill PUBLIC HAVE_DIRFD) endif() check_symbol_exists(closefrom "unistd.h" HAVE_CLOSEFROM) + if(HAVE_CLOSEFROM) - target_compile_definitions(alphafill PUBLIC HAVE_CLOSEFROM) + target_compile_definitions(alphafill PUBLIC HAVE_CLOSEFROM) endif() check_symbol_exists(sysconf "unistd.h" HAVE_SYSCONF) + if(HAVE_SYSCONF) - target_compile_definitions(alphafill PUBLIC HAVE_SYSCONF) + target_compile_definitions(alphafill PUBLIC HAVE_SYSCONF) endif() target_link_libraries(alphafill libmcfp::libmcfp cifpp::cifpp zeep::zeep) if(BUILD_WEB_APPLICATION) - target_compile_definitions(alphafill PUBLIC BUILD_WEB_APPLICATION) - add_dependencies(alphafill webpack) + target_compile_definitions(alphafill PUBLIC BUILD_WEB_APPLICATION) + add_dependencies(alphafill webpack) - target_link_libraries(alphafill libpqxx::pqxx OpenSSL::SSL OpenSSL::Crypto) + target_link_libraries(alphafill libpqxx::pqxx OpenSSL::SSL OpenSSL::Crypto) endif() target_compile_definitions(alphafill - PUBLIC ALPHAFILL_DATA_DIR="${ALPHAFILL_DATA_DIR}") + PUBLIC ALPHAFILL_DATA_DIR="${ALPHAFILL_DATA_DIR}") if(USE_RSRC) - message("Using resources compiled with ${MRC_EXECUTABLE}") - add_compile_definitions(USE_RSRC WEBAPP_USES_RESOURCES) + message("Using resources compiled with ${MRC_EXECUTABLE}") + add_compile_definitions(USE_RSRC WEBAPP_USES_RESOURCES) - list(APPEND RESOURCES ${PROJECT_SOURCE_DIR}/rsrc/mmcif_af.dic - ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic) + list(APPEND RESOURCES ${PROJECT_SOURCE_DIR}/rsrc/mmcif_af.dic + ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic) - if(CIFPP_SHARE_DIR AND EXISTS "${CIFPP_SHARE_DIR}/mmcif_pdbx.dic") - list(APPEND RESOURCES ${CIFPP_SHARE_DIR}/mmcif_pdbx.dic) - endif() + if(CIFPP_SHARE_DIR AND EXISTS "${CIFPP_SHARE_DIR}/mmcif_pdbx.dic") + list(APPEND RESOURCES ${CIFPP_SHARE_DIR}/mmcif_pdbx.dic) + endif() - if(BUILD_WEB_APPLICATION) - list(APPEND RESOURCES ${PROJECT_SOURCE_DIR}/docroot/ - ${PROJECT_SOURCE_DIR}/db-schema.sql - ${PROJECT_SOURCE_DIR}/scripts/refine.mcr) - endif() + if(BUILD_WEB_APPLICATION) + list(APPEND RESOURCES ${PROJECT_SOURCE_DIR}/docroot/ + ${PROJECT_SOURCE_DIR}/db-schema.sql + ${PROJECT_SOURCE_DIR}/scripts/refine.mcr) + endif() - mrc_target_resources(alphafill ${RESOURCES}) + mrc_target_resources(alphafill ${RESOURCES}) endif() # installation install(TARGETS alphafill RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES ${PROJECT_SOURCE_DIR}/af-ligands.cif - DESTINATION ${ALPHAFILL_DATA_DIR}) + DESTINATION ${ALPHAFILL_DATA_DIR}) install( - CODE " + CODE " if(EXISTS \"${CMAKE_INSTALL_FULL_SYSCONFDIR}/alphafill.conf\") file(INSTALL ${CMAKE_BINARY_DIR}/alphafill.conf-dist DESTINATION ${CMAKE_INSTALL_FULL_SYSCONFDIR}) @@ -300,59 +305,59 @@ install( # If we do not use resources, install the resource data in a share directory if(NOT USE_RSRC) - install( - FILES ${CIFPP_SHARE_DIR}/mmcif_pdbx.dic - ${PROJECT_SOURCE_DIR}/rsrc/mmcif_af.dic - ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic - DESTINATION ${ALPHAFILL_DATA_DIR}) - - if(BUILD_WEB_APPLICATION) - install(FILES ${PROJECT_SOURCE_DIR}/db-schema.sql - ${PROJECT_SOURCE_DIR}/scripts/refine.mcr - DESTINATION ${ALPHAFILL_DATA_DIR}) - - install(DIRECTORY ${PROJECT_SOURCE_DIR}/docroot - DESTINATION ${ALPHAFILL_DATA_DIR}) - endif() + install( + FILES ${CIFPP_SHARE_DIR}/mmcif_pdbx.dic + ${PROJECT_SOURCE_DIR}/rsrc/mmcif_af.dic + ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic + DESTINATION ${ALPHAFILL_DATA_DIR}) + + if(BUILD_WEB_APPLICATION) + install(FILES ${PROJECT_SOURCE_DIR}/db-schema.sql + ${PROJECT_SOURCE_DIR}/scripts/refine.mcr + DESTINATION ${ALPHAFILL_DATA_DIR}) + + install(DIRECTORY ${PROJECT_SOURCE_DIR}/docroot + DESTINATION ${ALPHAFILL_DATA_DIR}) + endif() endif() if(BUILD_DOCUMENTATION) - add_subdirectory(docs) + add_subdirectory(docs) endif() if(BUILD_TESTING) - # simply run alphafill jobs on our test data start by generating a new config - # file - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/test/alphafill-test.conf.in - ${CMAKE_CURRENT_BINARY_DIR}/alphafill-test.conf @ONLY) - - # First create an index - add_test(NAME create_index_test COMMAND alphafill create-index - --config=alphafill-test.conf) - - add_test( - NAME run_process_test - COMMAND - alphafill process --config=alphafill-test.conf - ${CMAKE_CURRENT_SOURCE_DIR}/test/afdb-v4/P2/AF-P29373-F1-model_v4.cif.gz - AF-P29373-F1-filled.cif.gz - WORKING_DIRECTORY) - - add_executable(validate-test-files - ${PROJECT_SOURCE_DIR}/test/validate-test-files.cpp) - target_link_libraries(validate-test-files libmcfp::libmcfp cifpp::cifpp - zeep::zeep) - - add_test(NAME validate_files_test - COMMAND validate-test-files AF-P29373-F1-filled.cif.gz - AF-P29373-F1-filled.cif.json --pdb-id=1CBS --pdb-asym-id=A) - - # If cifpp was not found, it means it was not installed so we might not have a - # proper CIFPP_SHARE_DIR and the the tests will fail - if(NOT cifpp_FOUND) - set_property(TEST run_process_test - PROPERTY ENVIRONMENT "LIBCIFPP_DATA_DIR=${CIFPP_SHARE_DIR}") - set_property(TEST validate_files_test - PROPERTY ENVIRONMENT "LIBCIFPP_DATA_DIR=${CIFPP_SHARE_DIR}") - endif() + # simply run alphafill jobs on our test data start by generating a new config + # file + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/test/alphafill-test.conf.in + ${CMAKE_CURRENT_BINARY_DIR}/alphafill-test.conf @ONLY) + + # First create an index + add_test(NAME create_index_test COMMAND alphafill create-index + --config=alphafill-test.conf) + + add_test( + NAME run_process_test + COMMAND + alphafill process --config=alphafill-test.conf + ${CMAKE_CURRENT_SOURCE_DIR}/test/afdb-v4/P2/AF-P29373-F1-model_v4.cif.gz + AF-P29373-F1-filled.cif.gz + WORKING_DIRECTORY) + + add_executable(validate-test-files + ${PROJECT_SOURCE_DIR}/test/validate-test-files.cpp) + target_link_libraries(validate-test-files libmcfp::libmcfp cifpp::cifpp + zeep::zeep) + + add_test(NAME validate_files_test + COMMAND validate-test-files AF-P29373-F1-filled.cif.gz + AF-P29373-F1-filled.cif.json --pdb-id=1CBS --pdb-asym-id=A) + + # If cifpp was not found, it means it was not installed so we might not have a + # proper CIFPP_SHARE_DIR and the the tests will fail + if(NOT cifpp_FOUND) + set_property(TEST run_process_test + PROPERTY ENVIRONMENT "LIBCIFPP_DATA_DIR=${CIFPP_SHARE_DIR}") + set_property(TEST validate_files_test + PROPERTY ENVIRONMENT "LIBCIFPP_DATA_DIR=${CIFPP_SHARE_DIR}") + endif() endif() diff --git a/src/alphafill.cpp b/src/alphafill.cpp index 19cd305..bf4dda4 100644 --- a/src/alphafill.cpp +++ b/src/alphafill.cpp @@ -114,63 +114,91 @@ std::tuple isUniqueLigand(const cif::mm::structure &str auto minDistanceSq = minDistance * minDistance; - std::vector> atoms_a; + // short cut + if (lig.atoms().size() == 1) + { + for (auto &np : structure.non_polymers()) + { + if (np.get_compound_id() != id) + continue; + + // assert(lig.atoms().size() == 1); + // assert(np.atoms().size() == 1); - for (auto &a : lig.atoms()) - atoms_a.emplace_back(a.get_label_atom_id(), a.get_location()); - sort(atoms_a.begin(), atoms_a.end(), [](auto &a, auto &b) { return std::get<0>(a) < std::get<0>(b); }); + auto pa = lig.atoms().front().get_location(); + auto pb = np.atoms().front().get_location(); - for (auto &np : structure.non_polymers()) + if (distance_squared(pa, pb) < minDistanceSq) + { + if (lig.unique_atoms().size() > np.unique_atoms().size()) + result = { UniqueType::MoreAtoms, np.get_asym_id() }; + else + result = { UniqueType::Seen, np.get_asym_id() }; + + break; + } + } + } + else { - if (np.get_compound_id() != id) - continue; + std::vector> atoms_a; - std::vector> atoms_b; + for (auto &a : lig.atoms()) + atoms_a.emplace_back(a.get_label_atom_id(), a.get_location()); + sort(atoms_a.begin(), atoms_a.end(), [](auto &a, auto &b) { return std::get<0>(a) < std::get<0>(b); }); - for (auto &a : np.atoms()) - atoms_b.emplace_back(a.get_label_atom_id(), a.get_location()); - sort(atoms_b.begin(), atoms_b.end(), [](auto &a, auto &b) { return std::get<0>(a) < std::get<0>(b); }); + for (auto &np : structure.non_polymers()) + { + if (np.get_compound_id() != id) + continue; - std::vector pa, pb; + std::vector> atoms_b; - auto a_i = atoms_a.begin(); - auto b_i = atoms_b.begin(); + for (auto &a : np.atoms()) + atoms_b.emplace_back(a.get_label_atom_id(), a.get_location()); + sort(atoms_b.begin(), atoms_b.end(), [](auto &a, auto &b) { return std::get<0>(a) < std::get<0>(b); }); - while (a_i != atoms_a.end() and b_i != atoms_b.end()) - { - const auto &[id_a, p_a] = *a_i; - const auto &[id_b, p_b] = *b_i; + std::vector pa, pb; - if (id_a == id_b) + auto a_i = atoms_a.begin(); + auto b_i = atoms_b.begin(); + + while (a_i != atoms_a.end() and b_i != atoms_b.end()) { - pa.emplace_back(p_a); - pb.emplace_back(p_b); + const auto &[id_a, p_a] = *a_i; + const auto &[id_b, p_b] = *b_i; - ++a_i; - ++b_i; - continue; - } + if (id_a == id_b) + { + pa.emplace_back(p_a); + pb.emplace_back(p_b); - if (id_a < id_b) - ++a_i; - else - ++b_i; - } + ++a_i; + ++b_i; + continue; + } - if (pa.empty() or pb.empty()) - continue; + if (id_a < id_b) + ++a_i; + else + ++b_i; + } - auto ca = centroid(pa); - auto cb = centroid(pb); + if (pa.empty() or pb.empty()) + continue; - if (distance_squared(ca, cb) < minDistanceSq) - { - if (lig.unique_atoms().size() > np.unique_atoms().size()) - result = { UniqueType::MoreAtoms, np.get_asym_id() }; - else - result = { UniqueType::Seen, np.get_asym_id() }; + auto ca = centroid(pa); + auto cb = centroid(pb); - break; + if (distance_squared(ca, cb) < minDistanceSq) + { + if (lig.unique_atoms().size() > np.unique_atoms().size()) + result = { UniqueType::MoreAtoms, np.get_asym_id() }; + else + result = { UniqueType::Seen, np.get_asym_id() }; + + break; + } } } From e0a608746987f38050ec47d92901177f105eada8 Mon Sep 17 00:00:00 2001 From: "Maarten L. Hekkelman" Date: Thu, 14 Dec 2023 20:24:25 +0100 Subject: [PATCH 2/4] optimise isUnique a bit more --- src/alphafill.cpp | 51 +++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/src/alphafill.cpp b/src/alphafill.cpp index bf4dda4..113fb23 100644 --- a/src/alphafill.cpp +++ b/src/alphafill.cpp @@ -114,33 +114,46 @@ std::tuple isUniqueLigand(const cif::mm::structure &str auto minDistanceSq = minDistance * minDistance; - // short cut - if (lig.atoms().size() == 1) + // First, naive attempt. Take centroids and compare distance. + // This is the fast approach. + + std::vector pa; + bool has_common = false; + + for (auto &a : lig.atoms()) + pa.push_back(a.get_location()); + auto ca = cif::centroid(pa); + + for (auto &np : structure.non_polymers()) { - for (auto &np : structure.non_polymers()) - { - if (np.get_compound_id() != id) - continue; + if (np.get_compound_id() != id) + continue; - // assert(lig.atoms().size() == 1); - // assert(np.atoms().size() == 1); + has_common = true; - auto pa = lig.atoms().front().get_location(); - auto pb = np.atoms().front().get_location(); + std::vector pb; - if (distance_squared(pa, pb) < minDistanceSq) - { - if (lig.unique_atoms().size() > np.unique_atoms().size()) - result = { UniqueType::MoreAtoms, np.get_asym_id() }; - else - result = { UniqueType::Seen, np.get_asym_id() }; + for (auto &a : np.atoms()) + pb.push_back(a.get_location()); + auto cb = cif::centroid(pb); - break; - } + if (distance_squared(ca, cb) < minDistanceSq) + { + if (lig.unique_atoms().size() > np.unique_atoms().size()) + result = { UniqueType::MoreAtoms, np.get_asym_id() }; + else + result = { UniqueType::Seen, np.get_asym_id() }; + + break; } } - else + + if (has_common) { + // OK, not close, but be careful, sometimes whole chunks + // are missing and then the centroids are not close but + // the centroids of the common atoms may still be close. + std::vector> atoms_a; for (auto &a : lig.atoms()) From 7bee38a550e05cfee9dfe7a10cdb8e275efacb0f Mon Sep 17 00:00:00 2001 From: "Maarten L. Hekkelman" Date: Thu, 14 Dec 2023 20:30:27 +0100 Subject: [PATCH 3/4] removing transplant from json --- src/alphafill.cpp | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/alphafill.cpp b/src/alphafill.cpp index 113fb23..1126a82 100644 --- a/src/alphafill.cpp +++ b/src/alphafill.cpp @@ -850,20 +850,29 @@ zeep::json::element alphafill(cif::datablock &db, const std::vector auto &rep_res = af_structure.get_residue(replace_id); if (cif::VERBOSE > 0) std::cerr << "Residue " << res << " has more atoms than the first transplant " << rep_res << '\n'; - af_structure.remove_residue(rep_res); - - for (auto &hit : hits) + + try { - auto ti = std::find_if(hit["transplants"].begin(), hit["transplants"].end(), [id=replace_id](json &e) { - return e["asym_id"] == id; - }); - if (ti != hit["transplants"].end()) + af_structure.remove_residue(rep_res); + + for (auto &hit : hits) { - hit["transplants"].erase(ti); - break; + auto ti = std::find_if(hit["transplants"].begin(), hit["transplants"].end(), [id=replace_id](json &e) { + return e["asym_id"] == id; + }); + if (ti != hit["transplants"].end()) + { + hit["transplants"].erase(ti); + break; + } } } - + catch(const std::exception& e) + { + if (cif::VERBOSE > 0) + std::cerr << "Failed to remove residue with asym ID " << replace_id << ": " << e.what() << '\n'; + } + break; } From 86fd6da32d5d7738460e56ec1cbb4b4dda2a8657 Mon Sep 17 00:00:00 2001 From: "Maarten L. Hekkelman" Date: Mon, 15 Jan 2024 14:11:19 +0100 Subject: [PATCH 4/4] disable filling based on 3d-beacon requests --- src/server.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/server.cpp b/src/server.cpp index f9cff71..605d81f 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -899,8 +899,9 @@ zeep::json::element affd_rest_controller::get_aff_3d_beacon(std::string af_id, s if (not fs::exists(file)) { - auto &data_service = data_service::instance(); - data_service.queue_3d_beacon_request(id); + // Disabled due to too many requests + // auto &data_service = data_service::instance(); + // data_service.queue_3d_beacon_request(id); throw zeep::http::not_found; }