From c59a0b7d58400b7311eb30b3bfae7ac1e5d2325b Mon Sep 17 00:00:00 2001 From: Jonas Schaub <44881147+JonasSchaub@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:26:02 +0100 Subject: [PATCH] WIP, only does a bit of refactoring --- .../model/fragmentation/algorithm/RECAP.java | 99 +++++++++++-------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/src/main/java/de/unijena/cheminf/mortar/model/fragmentation/algorithm/RECAP.java b/src/main/java/de/unijena/cheminf/mortar/model/fragmentation/algorithm/RECAP.java index 725140ed..bb06446e 100644 --- a/src/main/java/de/unijena/cheminf/mortar/model/fragmentation/algorithm/RECAP.java +++ b/src/main/java/de/unijena/cheminf/mortar/model/fragmentation/algorithm/RECAP.java @@ -59,38 +59,45 @@ * @version 1.0.0.0 */ public class RECAP { - + /** + * TODO doc + * private: Limits access to within the enclosing class. + * static: Makes the class independent of the enclosing class's instances. + * final: Prevents the class from being extended. + */ private static final class CleavageRule { + //TODO move to State? + private final String smirksCode; - private final String SMIRKS_CODE; + private final String eductSmartsCode; - private final String EDUCT_SMARTS_CODE; + private final String productSmartsCode; - private final String PRODUCT_SMARTS_CODE; - - private final String NAME; + private final String name; private final SmirksTransform transformation; private final Pattern eductPattern; private CleavageRule(String smirksCode, String name) { + //TODO checks this(smirksCode.split(">>")[0], smirksCode.split(">>")[1], name); } private CleavageRule(String eductSmarts, String productSmarts, String name) { - this.EDUCT_SMARTS_CODE = eductSmarts; - this.PRODUCT_SMARTS_CODE = productSmarts; - this.NAME = name; - this.SMIRKS_CODE = String.format("%s>>%s", eductSmarts, productSmarts); + this.eductSmartsCode = eductSmarts; + this.productSmartsCode = productSmarts; + this.name = name; + this.smirksCode = String.format("%s>>%s", eductSmarts, productSmarts); //can throw IllegalStateException if code is invalid - this.transformation = Smirks.compile(this.SMIRKS_CODE); + this.transformation = Smirks.compile(this.smirksCode); + //TODO is this a problem? We want cycle detection and aromaticity detection to be done externally, explicitly this.transformation.setPrepare(false); this.eductPattern = SmartsPattern.create(eductSmarts); } private String getName() { - return this.NAME; + return this.name; } private SmirksTransform getTransformation() { @@ -98,11 +105,11 @@ private SmirksTransform getTransformation() { } private String getEductSmartsCode() { - return this.EDUCT_SMARTS_CODE; + return this.eductSmartsCode; } private String getProductSmartsCode() { - return this.PRODUCT_SMARTS_CODE; + return this.productSmartsCode; } private Pattern getEductPattern() { @@ -110,7 +117,7 @@ private Pattern getEductPattern() { } private String getSmirksCode() { - return this.SMIRKS_CODE; + return this.smirksCode; } } //TODO option for minimum fragment size (also described in RECAP paper) @@ -155,7 +162,12 @@ public HierarchyNode buildHierarchy(IAtomContainer mol, int minimumFragmentSize) return state.buildHierarchy(mol, minimumFragmentSize); } - public static class HierarchyNode { + /** + * public: Accessible by calling code since this is a return value. + * static: Makes the class independent of the enclosing class's instances. + * final: Prevents the class from being extended. + */ + public static final class HierarchyNode { private final IAtomContainer structure; @@ -236,6 +248,9 @@ private void collectAllDescendants(List childrenList, boolean onl /** * Encapsulating the state of the algorithm allows thread-safe calling. + * private: Limits access to within the enclosing class. + * static: Makes the class independent of the enclosing class's instances. + * final: Prevents the class from being extended. */ private static final class State { /** @@ -249,7 +264,7 @@ private static final class State { * and an amine -> note also that the atoms can potentially be in a * ring, just not the bonds */ - private final CleavageRule AMIDE = new CleavageRule("[C;!$(C([#7])[#7]):1](=!@[O:2])!@[#7;+0;!D1:3]", "*[C:1]=[O:2].*[#7:3]", "Amide"); + private final CleavageRule amide = new CleavageRule("[C;!$(C([#7])[#7]):1](=!@[O:2])!@[#7;+0;!D1:3]", "*[C:1]=[O:2].*[#7:3]", "Amide"); /** * 2 = Ester -> aliphatic C (index 1), connected via a non-ring double * bond to aliphatic O (index 2) as a side chain, connected via a @@ -259,7 +274,7 @@ private static final class State { * aldehyde (not a carboxylic acid) and an alcohol -> note also that the * atoms can potentially be in a ring, just not the bonds */ - private final CleavageRule ESTER = new CleavageRule("[C:1](=!@[O:2])!@[O;+0:3]", "*[C:1]=[O:2].[O:3]*", "Ester"); + private final CleavageRule ester = new CleavageRule("[C:1](=!@[O:2])!@[O;+0:3]", "*[C:1]=[O:2].[O:3]*", "Ester"); //TODO does this also work for tertiary amines? I guess it matches multiple times? /** * 3 = Amine -> aliphatic N with a neutral charge and a degree of NOT 1 @@ -272,7 +287,7 @@ private static final class State { * just not the bonds -> simpler alternative would be (without excluding * any sort of amides): [N;!D1](!@[*:1])!@[*:2]>>*[*:1].[*:2]* */ - private final CleavageRule AMINE = new CleavageRule("[N;!D1;+0;!$(N-C=[#7,#8,#15,#16])](-!@[*:1])-!@[*:2]", "*[*:1].[*:2]*", "Amine"); + private final CleavageRule amine = new CleavageRule("[N;!D1;+0;!$(N-C=[#7,#8,#15,#16])](-!@[*:1])-!@[*:2]", "*[*:1].[*:2]*", "Amine"); /** * 4 = Urea -> aliphatic or aromatic(!) N with a neutral charge and a * degree of 2 or 3 (index 1), connected via a non-ring bond to an @@ -285,7 +300,7 @@ private static final class State { * possible way to synthesize a urea functionality) note also that the * atoms can potentially be in a ring, just not the bonds */ - private final CleavageRule UREA = new CleavageRule("[#7;+0;D2,D3:1]!@C(!@=O)!@[#7;+0;D2,D3:2]", "*[#7:1].[#7:2]*", "Urea"); + private final CleavageRule urea = new CleavageRule("[#7;+0;D2,D3:1]!@C(!@=O)!@[#7;+0;D2,D3:2]", "*[#7:1].[#7:2]*", "Urea"); /** * 5 = Ether -> aliphatic or aromatic(!) C (index 1) connected via a * non-ring bond to an aliphatic O with a neutral charge, connected via @@ -295,7 +310,7 @@ private static final class State { * note also that the atoms can potentially be in a ring, just not the * bonds */ - private final CleavageRule ETHER = new CleavageRule("[#6:1]-!@[O;+0]-!@[#6:2]", "[#6:1]*.*[#6:2]", "Ether"); + private final CleavageRule ether = new CleavageRule("[#6:1]-!@[O;+0]-!@[#6:2]", "[#6:1]*.*[#6:2]", "Ether"); /** * 6 = Olefin -> an aliphatic C (index 1) connected via a non-ring * double bond to another aliphatic C (index 2) reacts to the two carbon @@ -304,12 +319,12 @@ private static final class State { * that the degree of the carbon atoms is not specified note also that * the atoms can potentially be in a ring, just not the bonds */ - private final CleavageRule OLEFIN = new CleavageRule("[C:1]=!@[C:2]", "[C:1]*.*[C:2]", "Olefin"); + private final CleavageRule olefin = new CleavageRule("[C:1]=!@[C:2]", "[C:1]*.*[C:2]", "Olefin"); //TODO what about this? I do not think it is covered by nr 3 (amine)! /** * 7 = Quaternary nitrogen */ - private final CleavageRule QUATERNARY_NITROGEN = new CleavageRule("", "", "Quaternary nitrogen"); + private final CleavageRule quaternaryNitrogen = new CleavageRule("", "", "Quaternary nitrogen"); /** * 8 = Aromatic nitrogen - aliphatic carbon -> an aromatic N with a * neutral charge (index 1) connected via a non-ring bond to an @@ -318,7 +333,7 @@ private static final class State { * synthesized note also that the atoms can potentially be in a ring * (the n must be), just not the bonds */ - private final CleavageRule AROMATIC_NITROGEN_TO_ALIPHATIC_CARBON = new CleavageRule("[n;+0:1]-!@[C:2]", "[n:1]*.[C:2]*", "Aromatic nitrogen to aliphatic carbon"); + private final CleavageRule aromaticNitrogenToAliphaticCarbon = new CleavageRule("[n;+0:1]-!@[C:2]", "[n:1]*.[C:2]*", "Aromatic nitrogen to aliphatic carbon"); /** * 9 = Lactam nitrogen - aliphatic carbon -> an aliphatic O (index 3) * connected via a double bond (ring or non-ring) to an aliphatic C @@ -329,14 +344,14 @@ private static final class State { * note also that no assumption is made as to how the structure was * synthesized */ - private final CleavageRule LACTAM_NITROGEN_TO_ALIPHATIC_CARBON = new CleavageRule("[O:3]=[C:4]-@[N;+0:1]-!@[C:2]", "[O:3]=[C:4]-[N:1]*.[C:2]*", "Lactam nitrogen to aliphatic carbon"); + private final CleavageRule lactamNitrogenToAliphaticCarbon = new CleavageRule("[O:3]=[C:4]-@[N;+0:1]-!@[C:2]", "[O:3]=[C:4]-[N:1]*.[C:2]*", "Lactam nitrogen to aliphatic carbon"); /** * 10 = Aromatic carbon - aromatic carbon -> aromatic C (index 1) * connected via a non-ring bond(!) to another aromatic C (index 2) * reacts to the bond in between being split note that no assumption is * made as to how the structure was synthesized */ - private final CleavageRule AROMATIC_CARBON_TO_AROMATIC_CARBON = new CleavageRule("[c:1]-!@[c:2]", "[c:1]*.*[c:2]", "Aromatic carbon to aromatic carbon"); + private final CleavageRule aromaticCarbonToAromaticCarbon = new CleavageRule("[c:1]-!@[c:2]", "[c:1]*.*[c:2]", "Aromatic carbon to aromatic carbon"); /** * 11 = Sulphonamide -> an aliphatic or aromatic N with a neutral charge * and a degree of 2 or 3 (index 1) connected via a non-ring bond to an @@ -346,7 +361,7 @@ private static final class State { * note that no assumption is made as to how the structure was * synthesized */ - private final CleavageRule SULPHONAMIDE = new CleavageRule("[#7;+0;D2,D3:1]-!@[S:2](=[O:3])=[O:4]", "[#7:1]*.*[S:2](=[O:3])=[O:4]", "Sulphonamide"); + private final CleavageRule sulphonamide = new CleavageRule("[#7;+0;D2,D3:1]-!@[S:2](=[O:3])=[O:4]", "[#7:1]*.*[S:2](=[O:3])=[O:4]", "Sulphonamide"); //TODO this is not part of the original RECAP, make it optional? /** * S1 = Cyclic amines -> an aliphatic or aromatic N in a ring, with a @@ -355,7 +370,7 @@ private static final class State { * the other atom connected to any atom note that no assumption is made * as to how the structure was synthesized */ - private final CleavageRule CYCLIC_AMINES = new CleavageRule("[#7;R;D3;+0:1]-!@[*:2]", "*[#7:1].[*:2]*", "Cyclic amines"); + private final CleavageRule cyclicAmines = new CleavageRule("[#7;R;D3;+0:1]-!@[*:2]", "*[#7:1].[*:2]*", "Cyclic amines"); //TODO this is not part of the original RECAP, make it optional? /** * S2 = Aromatic nitrogen - aromatic carbon -> aromatic N with a neutral @@ -364,25 +379,31 @@ private static final class State { * assumption is made as to how the structure was synthesized note also * that both atoms are in different rings */ - private final CleavageRule AROMATIC_NITROGEN_TO_AROMATIC_CARBON = new CleavageRule("[n;+0:1]-!@[c:2]", "[n:1]*.*[c:2]", "Aromatic nitrogen to aromatic carbon"); + private final CleavageRule aromaticNitrogenToAromaticCarbon = new CleavageRule("[n;+0:1]-!@[c:2]", "[n:1]*.*[c:2]", "Aromatic nitrogen to aromatic carbon"); /** * String array of SMIRKS reaction transform codes that describe the * cleavage rules. */ - private final CleavageRule[] CLEAVAGE_RULES = {this.AMIDE, this.ESTER, - this.AMINE, this.UREA, this.ETHER, this.OLEFIN, this.QUATERNARY_NITROGEN, - this.AROMATIC_NITROGEN_TO_ALIPHATIC_CARBON, this.LACTAM_NITROGEN_TO_ALIPHATIC_CARBON, - this.AROMATIC_CARBON_TO_AROMATIC_CARBON, this.SULPHONAMIDE, this.CYCLIC_AMINES, - this.AROMATIC_NITROGEN_TO_AROMATIC_CARBON - }; + private final CleavageRule[] cleavageRules = {this.amide, this.ester, + this.amine, this.urea, this.ether, this.olefin, this.quaternaryNitrogen, + this.aromaticNitrogenToAliphaticCarbon, this.lactamNitrogenToAliphaticCarbon, + this.aromaticCarbonToAromaticCarbon, this.sulphonamide, this.cyclicAmines, + this.aromaticNitrogenToAromaticCarbon}; + /** + * + * + * @param inputMol + * @param minimumFragmentSize + * @return + */ private HierarchyNode buildHierarchy(IAtomContainer inputMol, int minimumFragmentSize) { HierarchyNode inputMolNode = new HierarchyNode(inputMol); Queue queue = new LinkedList<>(); queue.add(inputMolNode); while (!queue.isEmpty()) { HierarchyNode currentNode = queue.poll(); - for (CleavageRule rule : this.CLEAVAGE_RULES) { + for (CleavageRule rule : this.cleavageRules) { if (rule.getEductPattern().matches(currentNode.getStructure())) { //mode unique returns as many products as there are splittable bonds, so one product for every bond split Iterable products = rule.getTransformation().apply(currentNode.getStructure(), Transform.Mode.Unique); @@ -420,7 +441,7 @@ private List applyTransformationsSinglePass(IAtomContainer mol, //TODO this still includes intermediates! List lastRoundFragments = new ArrayList<>(mol.getAtomCount() * 2); lastRoundFragments.add(mol); - for (CleavageRule rule : this.CLEAVAGE_RULES) { + for (CleavageRule rule : this.cleavageRules) { List newRoundFragments = new ArrayList<>(mol.getAtomCount() * 2); for (IAtomContainer fragment : lastRoundFragments) { if (rule.getEductPattern().matches(fragment)) { @@ -470,8 +491,8 @@ private List applyTransformationsWithAllIntermediates(IAtomConta Map finalFragments = new HashMap<>(mol.getAtomCount() * 2); SmilesGenerator smilesGenerator = new SmilesGenerator(SmiFlavor.Absolute | SmiFlavor.UseAromaticSymbols); //step 1 determine relevant transformation rules that have at least one match in the mol - List matchingRules = new ArrayList<>(this.CLEAVAGE_RULES.length); - for (CleavageRule rule : this.CLEAVAGE_RULES) { + List matchingRules = new ArrayList<>(this.cleavageRules.length); + for (CleavageRule rule : this.cleavageRules) { if (rule.getEductPattern().matches(mol)) { matchingRules.add(rule); }