refactor: Detect molecules#2528
Conversation
353a3e6 to
cd27f8f
Compare
cd27f8f to
150dd4c
Compare
2b5a4f2 to
6e08831
Compare
a160f32 to
9303bb2
Compare
trisyoungs
left a comment
There was a problem hiding this comment.
This is excellent and a whole lot cleaner than the old one, but I think there are some looping / workflow errors to address!
There was a problem hiding this comment.
Does this still need a rebase? This was in #2530 ...
| // Inputs | ||
| addInput<Structure>("Structure", "Input structure", inputStructure_); | ||
| } | ||
|
|
There was a problem hiding this comment.
| /* | |
| * Definition | |
| */ | |
| std::string_view DetectMoleculesNode::type() const { return "DetectMolecules"; } | ||
|
|
||
| std::string_view DetectMoleculesNode::summary() const { return "Detect molecular species within a structure"; } | ||
|
|
There was a problem hiding this comment.
| /* | |
| * Processing | |
| */ | |
| for (int i = 0; i < structure.nAtoms(); i++) | ||
| fragments.emplace_back(fragment(i)); |
There was a problem hiding this comment.
Shouldn't this function start from the next un-selected atom? Need something like a std::set to track which atoms have been detected within instances (a bug that doesn't show up for the atomic NaCl or MgO cases!)
| detectedStructures_.clear(); | ||
|
|
||
| // Return all discovered molecular fragment index vectors | ||
| auto allFragmentIndices = findMolecularFragments(inputStructure_); |
There was a problem hiding this comment.
If you're pre-detecting all bound fragments here (which is a good idea)....
|
|
||
| std::set<const StructureAtom *> atomMask; | ||
|
|
||
| for (int i = 0; i < inputStructure_.nAtoms(); i++) |
There was a problem hiding this comment.
...why is this a loop over structure atoms and not fragments?
| // Set up the return value and bind its contents | ||
| NETADefinition bestNETA; | ||
| std::vector<StructureAtom *> rootAtoms; | ||
|
|
||
| // Maintain a set of atoms matched by any NETA description we generate | ||
| std::set<StructureAtom *> alreadyMatched; | ||
|
|
||
| // Skip this atom? | ||
| if (alreadyMatched.find(fragmentAtom) != alreadyMatched.end()) | ||
| continue; | ||
|
|
||
| // Create a NETA definition with this atom as the root | ||
| NETADefinition neta; | ||
| neta.create(static_cast<AtomBase *>(fragmentAtom), std::nullopt, | ||
| Flags<NETADefinition::NETACreationFlags>(NETADefinition::NETACreationFlags::ExplicitHydrogens, | ||
| NETADefinition::NETACreationFlags::IncludeRootElement)); | ||
|
|
||
| // Apply this match over the whole species | ||
| std::vector<StructureAtom *> currentRootAtoms; | ||
| for (auto fragAtomIndex : fragmentIndices) | ||
| { | ||
| const auto fragmentAtom = inputStructure_.atom(fragAtomIndex); | ||
| if (neta.matches(fragmentAtom)) | ||
| { | ||
| currentRootAtoms.push_back(fragmentAtom); | ||
| alreadyMatched.insert(fragmentAtom); | ||
| } | ||
| } | ||
|
|
||
| // Is this a better description? | ||
| auto better = false; | ||
| if (rootAtoms.empty() || currentRootAtoms.size() < rootAtoms.size()) | ||
| better = true; | ||
| else if (currentRootAtoms.size() == rootAtoms.size()) | ||
| { | ||
| // Replace the current match if there are more bonds on the current atom. | ||
| if (fragmentAtom->nBonds() > rootAtoms.front()->nBonds()) | ||
| better = true; | ||
| } | ||
|
|
||
| if (better) | ||
| { | ||
| bestNETA = neta; | ||
| rootAtoms = currentRootAtoms; | ||
| } |
There was a problem hiding this comment.
This whole NETA creation part needs to run on each atom within a fragment, reason being to find the best, most concrete way of describing uniquely the bonding / atoms within it. Then it can be applied to each fragment (of the same size) in turn to see if the NETA matches.
| // Unfold all detected structures | ||
| for (auto &structure : detectedStructures_) | ||
| structure.unFold(); |
There was a problem hiding this comment.
This step needs to happen before you store the coordinates of the instances at the end of the loop above. Could happen immediately after the fragment detection at the top of the routine.
No description provided.