Skip to content

Commit

Permalink
WIP, only does a bit of refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
JonasSchaub committed Nov 25, 2024
1 parent 8917cd1 commit c59a0b7
Showing 1 changed file with 60 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,58 +59,65 @@
* @version 1.0.0.0
*/
public class RECAP {

/**
* TODO doc
* private: Limits access to within the enclosing class.
* static: Makes the class independent of the enclosing class's instances.
* final: Prevents the class from being extended.
*/
private static final class CleavageRule {
//TODO move to State?
private final String smirksCode;

private final String SMIRKS_CODE;
private final String eductSmartsCode;

private final String EDUCT_SMARTS_CODE;
private final String productSmartsCode;

private final String PRODUCT_SMARTS_CODE;

private final String NAME;
private final String name;

private final SmirksTransform transformation;

private final Pattern eductPattern;

private CleavageRule(String smirksCode, String name) {
//TODO checks
this(smirksCode.split(">>")[0], smirksCode.split(">>")[1], name);
}

private CleavageRule(String eductSmarts, String productSmarts, String name) {
this.EDUCT_SMARTS_CODE = eductSmarts;
this.PRODUCT_SMARTS_CODE = productSmarts;
this.NAME = name;
this.SMIRKS_CODE = String.format("%s>>%s", eductSmarts, productSmarts);
this.eductSmartsCode = eductSmarts;
this.productSmartsCode = productSmarts;
this.name = name;
this.smirksCode = String.format("%s>>%s", eductSmarts, productSmarts);
//can throw IllegalStateException if code is invalid
this.transformation = Smirks.compile(this.SMIRKS_CODE);
this.transformation = Smirks.compile(this.smirksCode);
//TODO is this a problem? We want cycle detection and aromaticity detection to be done externally, explicitly
this.transformation.setPrepare(false);
this.eductPattern = SmartsPattern.create(eductSmarts);
}

private String getName() {
return this.NAME;
return this.name;
}

private SmirksTransform getTransformation() {
return this.transformation;
}

private String getEductSmartsCode() {
return this.EDUCT_SMARTS_CODE;
return this.eductSmartsCode;
}

private String getProductSmartsCode() {
return this.PRODUCT_SMARTS_CODE;
return this.productSmartsCode;
}

private Pattern getEductPattern() {
return this.eductPattern;
}

private String getSmirksCode() {
return this.SMIRKS_CODE;
return this.smirksCode;
}
}
//TODO option for minimum fragment size (also described in RECAP paper)
Expand Down Expand Up @@ -155,7 +162,12 @@ public HierarchyNode buildHierarchy(IAtomContainer mol, int minimumFragmentSize)
return state.buildHierarchy(mol, minimumFragmentSize);
}

public static class HierarchyNode {
/**
* public: Accessible by calling code since this is a return value.
* static: Makes the class independent of the enclosing class's instances.
* final: Prevents the class from being extended.
*/
public static final class HierarchyNode {

private final IAtomContainer structure;

Expand Down Expand Up @@ -236,6 +248,9 @@ private void collectAllDescendants(List<HierarchyNode> childrenList, boolean onl

/**
* Encapsulating the state of the algorithm allows thread-safe calling.
* private: Limits access to within the enclosing class.
* static: Makes the class independent of the enclosing class's instances.
* final: Prevents the class from being extended.
*/
private static final class State {
/**
Expand All @@ -249,7 +264,7 @@ private static final class State {
* and an amine -> note also that the atoms can potentially be in a
* ring, just not the bonds
*/
private final CleavageRule AMIDE = new CleavageRule("[C;!$(C([#7])[#7]):1](=!@[O:2])!@[#7;+0;!D1:3]", "*[C:1]=[O:2].*[#7:3]", "Amide");
private final CleavageRule amide = new CleavageRule("[C;!$(C([#7])[#7]):1](=!@[O:2])!@[#7;+0;!D1:3]", "*[C:1]=[O:2].*[#7:3]", "Amide");
/**
* 2 = Ester -> aliphatic C (index 1), connected via a non-ring double
* bond to aliphatic O (index 2) as a side chain, connected via a
Expand All @@ -259,7 +274,7 @@ private static final class State {
* aldehyde (not a carboxylic acid) and an alcohol -> note also that the
* atoms can potentially be in a ring, just not the bonds
*/
private final CleavageRule ESTER = new CleavageRule("[C:1](=!@[O:2])!@[O;+0:3]", "*[C:1]=[O:2].[O:3]*", "Ester");
private final CleavageRule ester = new CleavageRule("[C:1](=!@[O:2])!@[O;+0:3]", "*[C:1]=[O:2].[O:3]*", "Ester");
//TODO does this also work for tertiary amines? I guess it matches multiple times?
/**
* 3 = Amine -> aliphatic N with a neutral charge and a degree of NOT 1
Expand All @@ -272,7 +287,7 @@ private static final class State {
* just not the bonds -> simpler alternative would be (without excluding
* any sort of amides): [N;!D1](!@[*:1])!@[*:2]>>*[*:1].[*:2]*
*/
private final CleavageRule AMINE = new CleavageRule("[N;!D1;+0;!$(N-C=[#7,#8,#15,#16])](-!@[*:1])-!@[*:2]", "*[*:1].[*:2]*", "Amine");
private final CleavageRule amine = new CleavageRule("[N;!D1;+0;!$(N-C=[#7,#8,#15,#16])](-!@[*:1])-!@[*:2]", "*[*:1].[*:2]*", "Amine");
/**
* 4 = Urea -> aliphatic or aromatic(!) N with a neutral charge and a
* degree of 2 or 3 (index 1), connected via a non-ring bond to an
Expand All @@ -285,7 +300,7 @@ private static final class State {
* possible way to synthesize a urea functionality) note also that the
* atoms can potentially be in a ring, just not the bonds
*/
private final CleavageRule UREA = new CleavageRule("[#7;+0;D2,D3:1]!@C(!@=O)!@[#7;+0;D2,D3:2]", "*[#7:1].[#7:2]*", "Urea");
private final CleavageRule urea = new CleavageRule("[#7;+0;D2,D3:1]!@C(!@=O)!@[#7;+0;D2,D3:2]", "*[#7:1].[#7:2]*", "Urea");
/**
* 5 = Ether -> aliphatic or aromatic(!) C (index 1) connected via a
* non-ring bond to an aliphatic O with a neutral charge, connected via
Expand All @@ -295,7 +310,7 @@ private static final class State {
* note also that the atoms can potentially be in a ring, just not the
* bonds
*/
private final CleavageRule ETHER = new CleavageRule("[#6:1]-!@[O;+0]-!@[#6:2]", "[#6:1]*.*[#6:2]", "Ether");
private final CleavageRule ether = new CleavageRule("[#6:1]-!@[O;+0]-!@[#6:2]", "[#6:1]*.*[#6:2]", "Ether");
/**
* 6 = Olefin -> an aliphatic C (index 1) connected via a non-ring
* double bond to another aliphatic C (index 2) reacts to the two carbon
Expand All @@ -304,12 +319,12 @@ private static final class State {
* that the degree of the carbon atoms is not specified note also that
* the atoms can potentially be in a ring, just not the bonds
*/
private final CleavageRule OLEFIN = new CleavageRule("[C:1]=!@[C:2]", "[C:1]*.*[C:2]", "Olefin");
private final CleavageRule olefin = new CleavageRule("[C:1]=!@[C:2]", "[C:1]*.*[C:2]", "Olefin");
//TODO what about this? I do not think it is covered by nr 3 (amine)!
/**
* 7 = Quaternary nitrogen
*/
private final CleavageRule QUATERNARY_NITROGEN = new CleavageRule("", "", "Quaternary nitrogen");
private final CleavageRule quaternaryNitrogen = new CleavageRule("", "", "Quaternary nitrogen");
/**
* 8 = Aromatic nitrogen - aliphatic carbon -> an aromatic N with a
* neutral charge (index 1) connected via a non-ring bond to an
Expand All @@ -318,7 +333,7 @@ private static final class State {
* synthesized note also that the atoms can potentially be in a ring
* (the n must be), just not the bonds
*/
private final CleavageRule AROMATIC_NITROGEN_TO_ALIPHATIC_CARBON = new CleavageRule("[n;+0:1]-!@[C:2]", "[n:1]*.[C:2]*", "Aromatic nitrogen to aliphatic carbon");
private final CleavageRule aromaticNitrogenToAliphaticCarbon = new CleavageRule("[n;+0:1]-!@[C:2]", "[n:1]*.[C:2]*", "Aromatic nitrogen to aliphatic carbon");
/**
* 9 = Lactam nitrogen - aliphatic carbon -> an aliphatic O (index 3)
* connected via a double bond (ring or non-ring) to an aliphatic C
Expand All @@ -329,14 +344,14 @@ private static final class State {
* note also that no assumption is made as to how the structure was
* synthesized
*/
private final CleavageRule LACTAM_NITROGEN_TO_ALIPHATIC_CARBON = new CleavageRule("[O:3]=[C:4]-@[N;+0:1]-!@[C:2]", "[O:3]=[C:4]-[N:1]*.[C:2]*", "Lactam nitrogen to aliphatic carbon");
private final CleavageRule lactamNitrogenToAliphaticCarbon = new CleavageRule("[O:3]=[C:4]-@[N;+0:1]-!@[C:2]", "[O:3]=[C:4]-[N:1]*.[C:2]*", "Lactam nitrogen to aliphatic carbon");
/**
* 10 = Aromatic carbon - aromatic carbon -> aromatic C (index 1)
* connected via a non-ring bond(!) to another aromatic C (index 2)
* reacts to the bond in between being split note that no assumption is
* made as to how the structure was synthesized
*/
private final CleavageRule AROMATIC_CARBON_TO_AROMATIC_CARBON = new CleavageRule("[c:1]-!@[c:2]", "[c:1]*.*[c:2]", "Aromatic carbon to aromatic carbon");
private final CleavageRule aromaticCarbonToAromaticCarbon = new CleavageRule("[c:1]-!@[c:2]", "[c:1]*.*[c:2]", "Aromatic carbon to aromatic carbon");
/**
* 11 = Sulphonamide -> an aliphatic or aromatic N with a neutral charge
* and a degree of 2 or 3 (index 1) connected via a non-ring bond to an
Expand All @@ -346,7 +361,7 @@ private static final class State {
* note that no assumption is made as to how the structure was
* synthesized
*/
private final CleavageRule SULPHONAMIDE = new CleavageRule("[#7;+0;D2,D3:1]-!@[S:2](=[O:3])=[O:4]", "[#7:1]*.*[S:2](=[O:3])=[O:4]", "Sulphonamide");
private final CleavageRule sulphonamide = new CleavageRule("[#7;+0;D2,D3:1]-!@[S:2](=[O:3])=[O:4]", "[#7:1]*.*[S:2](=[O:3])=[O:4]", "Sulphonamide");
//TODO this is not part of the original RECAP, make it optional?
/**
* S1 = Cyclic amines -> an aliphatic or aromatic N in a ring, with a
Expand All @@ -355,7 +370,7 @@ private static final class State {
* the other atom connected to any atom note that no assumption is made
* as to how the structure was synthesized
*/
private final CleavageRule CYCLIC_AMINES = new CleavageRule("[#7;R;D3;+0:1]-!@[*:2]", "*[#7:1].[*:2]*", "Cyclic amines");
private final CleavageRule cyclicAmines = new CleavageRule("[#7;R;D3;+0:1]-!@[*:2]", "*[#7:1].[*:2]*", "Cyclic amines");
//TODO this is not part of the original RECAP, make it optional?
/**
* S2 = Aromatic nitrogen - aromatic carbon -> aromatic N with a neutral
Expand All @@ -364,25 +379,31 @@ private static final class State {
* assumption is made as to how the structure was synthesized note also
* that both atoms are in different rings
*/
private final CleavageRule AROMATIC_NITROGEN_TO_AROMATIC_CARBON = new CleavageRule("[n;+0:1]-!@[c:2]", "[n:1]*.*[c:2]", "Aromatic nitrogen to aromatic carbon");
private final CleavageRule aromaticNitrogenToAromaticCarbon = new CleavageRule("[n;+0:1]-!@[c:2]", "[n:1]*.*[c:2]", "Aromatic nitrogen to aromatic carbon");
/**
* String array of SMIRKS reaction transform codes that describe the
* cleavage rules.
*/
private final CleavageRule[] CLEAVAGE_RULES = {this.AMIDE, this.ESTER,
this.AMINE, this.UREA, this.ETHER, this.OLEFIN, this.QUATERNARY_NITROGEN,
this.AROMATIC_NITROGEN_TO_ALIPHATIC_CARBON, this.LACTAM_NITROGEN_TO_ALIPHATIC_CARBON,
this.AROMATIC_CARBON_TO_AROMATIC_CARBON, this.SULPHONAMIDE, this.CYCLIC_AMINES,
this.AROMATIC_NITROGEN_TO_AROMATIC_CARBON
};
private final CleavageRule[] cleavageRules = {this.amide, this.ester,
this.amine, this.urea, this.ether, this.olefin, this.quaternaryNitrogen,
this.aromaticNitrogenToAliphaticCarbon, this.lactamNitrogenToAliphaticCarbon,
this.aromaticCarbonToAromaticCarbon, this.sulphonamide, this.cyclicAmines,
this.aromaticNitrogenToAromaticCarbon};

/**
*
*
* @param inputMol
* @param minimumFragmentSize
* @return
*/
private HierarchyNode buildHierarchy(IAtomContainer inputMol, int minimumFragmentSize) {
HierarchyNode inputMolNode = new HierarchyNode(inputMol);
Queue<HierarchyNode> queue = new LinkedList<>();
queue.add(inputMolNode);
while (!queue.isEmpty()) {
HierarchyNode currentNode = queue.poll();
for (CleavageRule rule : this.CLEAVAGE_RULES) {
for (CleavageRule rule : this.cleavageRules) {
if (rule.getEductPattern().matches(currentNode.getStructure())) {
//mode unique returns as many products as there are splittable bonds, so one product for every bond split
Iterable<IAtomContainer> products = rule.getTransformation().apply(currentNode.getStructure(), Transform.Mode.Unique);
Expand Down Expand Up @@ -420,7 +441,7 @@ private List<IAtomContainer> applyTransformationsSinglePass(IAtomContainer mol,
//TODO this still includes intermediates!
List<IAtomContainer> lastRoundFragments = new ArrayList<>(mol.getAtomCount() * 2);
lastRoundFragments.add(mol);
for (CleavageRule rule : this.CLEAVAGE_RULES) {
for (CleavageRule rule : this.cleavageRules) {
List<IAtomContainer> newRoundFragments = new ArrayList<>(mol.getAtomCount() * 2);
for (IAtomContainer fragment : lastRoundFragments) {
if (rule.getEductPattern().matches(fragment)) {
Expand Down Expand Up @@ -470,8 +491,8 @@ private List<IAtomContainer> applyTransformationsWithAllIntermediates(IAtomConta
Map<String, IAtomContainer> finalFragments = new HashMap<>(mol.getAtomCount() * 2);
SmilesGenerator smilesGenerator = new SmilesGenerator(SmiFlavor.Absolute | SmiFlavor.UseAromaticSymbols);
//step 1 determine relevant transformation rules that have at least one match in the mol
List<CleavageRule> matchingRules = new ArrayList<>(this.CLEAVAGE_RULES.length);
for (CleavageRule rule : this.CLEAVAGE_RULES) {
List<CleavageRule> matchingRules = new ArrayList<>(this.cleavageRules.length);
for (CleavageRule rule : this.cleavageRules) {
if (rule.getEductPattern().matches(mol)) {
matchingRules.add(rule);
}
Expand Down

0 comments on commit c59a0b7

Please sign in to comment.