-
Notifications
You must be signed in to change notification settings - Fork 19
refactor: Detect molecules #2528
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop2
Are you sure you want to change the base?
Changes from all commits
9303bb2
280518f
2974058
5fec157
114de82
6a17457
cf5b19d
d5d2f90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,242 @@ | ||||||||||||||
| // SPDX-License-Identifier: GPL-3.0-or-later | ||||||||||||||
| // Copyright (c) 2026 Team Dissolve and contributors | ||||||||||||||
|
|
||||||||||||||
| #include "nodes/detectMolecules.h" | ||||||||||||||
| #include "classes/empiricalFormula.h" | ||||||||||||||
| #include "classes/fragment.h" | ||||||||||||||
| #include "classes/molecule.h" | ||||||||||||||
| #include "classes/species.h" | ||||||||||||||
| #include <algorithm> | ||||||||||||||
| #include <format> | ||||||||||||||
|
|
||||||||||||||
| DetectMoleculesNode::DetectMoleculesNode(Graph *parentGraph) : Node(parentGraph) | ||||||||||||||
| { | ||||||||||||||
| // Inputs | ||||||||||||||
| addInput<Structure>("Structure", "Input structure", inputStructure_); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||
| std::string_view DetectMoleculesNode::type() const { return "DetectMolecules"; } | ||||||||||||||
|
|
||||||||||||||
| std::string_view DetectMoleculesNode::summary() const { return "Detect molecular species within a structure"; } | ||||||||||||||
|
|
||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||
| std::vector<std::vector<int>> DetectMoleculesNode::findMolecularFragments(const Structure &structure) const | ||||||||||||||
| { | ||||||||||||||
| std::vector<std::vector<int>> fragments; | ||||||||||||||
|
|
||||||||||||||
| auto fragment = [structure](int i) { return Fragment<StructureAtom, Bond<StructureAtom>>::get(structure.atoms(), i); }; | ||||||||||||||
|
|
||||||||||||||
| for (int i = 0; i < structure.nAtoms(); i++) | ||||||||||||||
| fragments.emplace_back(fragment(i)); | ||||||||||||||
|
Comment on lines
+28
to
+29
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this function start from the next un-selected atom? Need something like a |
||||||||||||||
|
|
||||||||||||||
| return fragments; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Run main processing | ||||||||||||||
| NodeConstants::ProcessResult DetectMoleculesNode::process() | ||||||||||||||
| { | ||||||||||||||
| detectedStructures_.clear(); | ||||||||||||||
|
|
||||||||||||||
| // Return all discovered molecular fragment index vectors | ||||||||||||||
| auto allFragmentIndices = findMolecularFragments(inputStructure_); | ||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you're pre-detecting all bound fragments here (which is a good idea).... |
||||||||||||||
|
|
||||||||||||||
| // Try selecting within the species from the first atom - if this captures all atoms we have a bound framework... | ||||||||||||||
| if (allFragmentIndices[0].size() == inputStructure_.nAtoms()) | ||||||||||||||
| return error( | ||||||||||||||
| "Can't create molecular definitions since this unit cell appears to be a continuous framework/network. Consider " | ||||||||||||||
| "adjusting the bonding options in order to generate molecular fragments.\n"); | ||||||||||||||
|
|
||||||||||||||
| std::set<const StructureAtom *> atomMask; | ||||||||||||||
|
|
||||||||||||||
| for (int i = 0; i < inputStructure_.nAtoms(); i++) | ||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ...why is this a loop over structure atoms and not fragments? |
||||||||||||||
| { | ||||||||||||||
| const auto structureAtom = inputStructure_.atom(i); | ||||||||||||||
| if (atomMask.contains(structureAtom)) | ||||||||||||||
| continue; | ||||||||||||||
|
|
||||||||||||||
| auto fragmentIndices = allFragmentIndices.at(i); | ||||||||||||||
|
|
||||||||||||||
| auto largeFragment = fragmentIndices.size() * 2 > inputStructure_.nAtoms(); | ||||||||||||||
|
|
||||||||||||||
| // Find instances of this fragment. | ||||||||||||||
| for (auto fragAtomIndex : fragmentIndices) | ||||||||||||||
| { | ||||||||||||||
| const auto fragmentAtom = inputStructure_.atom(fragAtomIndex); | ||||||||||||||
|
|
||||||||||||||
| // Create a provisional structure for the detected fragment | ||||||||||||||
| Structure detectedMolStructure; | ||||||||||||||
| detectedMolStructure.createBox(inputStructure_.box().axes()); | ||||||||||||||
| std::vector<std::vector<Vector3>> instances; | ||||||||||||||
|
|
||||||||||||||
| // Skip NETA definition creation for large fragments | ||||||||||||||
| if (largeFragment) | ||||||||||||||
| { | ||||||||||||||
| if (atomMask.contains(fragmentAtom)) | ||||||||||||||
| continue; | ||||||||||||||
|
|
||||||||||||||
| auto &instance = instances.emplace_back(); | ||||||||||||||
| for (auto fragAtomIndex : fragmentIndices) | ||||||||||||||
| { | ||||||||||||||
| auto fragmentAtom = inputStructure_.atom(fragAtomIndex); | ||||||||||||||
| instance.push_back(fragmentAtom->r()); | ||||||||||||||
| atomMask.insert(fragmentAtom); | ||||||||||||||
|
|
||||||||||||||
| // Remove fragments with this size | ||||||||||||||
| std::erase_if(allFragmentIndices, [&fragmentIndices](const auto &fragment) | ||||||||||||||
| { return fragment.size() == fragmentIndices.size(); }); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| else | ||||||||||||||
| { | ||||||||||||||
| /* | ||||||||||||||
| * Best NETA definition | ||||||||||||||
| */ | ||||||||||||||
|
|
||||||||||||||
| // Set up the return value and bind its contents | ||||||||||||||
| NETADefinition bestNETA; | ||||||||||||||
| std::vector<StructureAtom *> rootAtoms; | ||||||||||||||
|
|
||||||||||||||
| // Maintain a set of atoms matched by any NETA description we generate | ||||||||||||||
| std::set<StructureAtom *> alreadyMatched; | ||||||||||||||
|
|
||||||||||||||
| // Skip this atom? | ||||||||||||||
| if (alreadyMatched.find(fragmentAtom) != alreadyMatched.end()) | ||||||||||||||
| continue; | ||||||||||||||
|
|
||||||||||||||
| // Create a NETA definition with this atom as the root | ||||||||||||||
| NETADefinition neta; | ||||||||||||||
| neta.create(static_cast<AtomBase *>(fragmentAtom), std::nullopt, | ||||||||||||||
| Flags<NETADefinition::NETACreationFlags>(NETADefinition::NETACreationFlags::ExplicitHydrogens, | ||||||||||||||
| NETADefinition::NETACreationFlags::IncludeRootElement)); | ||||||||||||||
|
|
||||||||||||||
| // Apply this match over the whole species | ||||||||||||||
| std::vector<StructureAtom *> currentRootAtoms; | ||||||||||||||
| for (auto fragAtomIndex : fragmentIndices) | ||||||||||||||
| { | ||||||||||||||
| const auto fragmentAtom = inputStructure_.atom(fragAtomIndex); | ||||||||||||||
| if (neta.matches(fragmentAtom)) | ||||||||||||||
| { | ||||||||||||||
| currentRootAtoms.push_back(fragmentAtom); | ||||||||||||||
| alreadyMatched.insert(fragmentAtom); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Is this a better description? | ||||||||||||||
| auto better = false; | ||||||||||||||
| if (rootAtoms.empty() || currentRootAtoms.size() < rootAtoms.size()) | ||||||||||||||
| better = true; | ||||||||||||||
| else if (currentRootAtoms.size() == rootAtoms.size()) | ||||||||||||||
| { | ||||||||||||||
| // Replace the current match if there are more bonds on the current atom. | ||||||||||||||
| if (fragmentAtom->nBonds() > rootAtoms.front()->nBonds()) | ||||||||||||||
| better = true; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (better) | ||||||||||||||
| { | ||||||||||||||
| bestNETA = neta; | ||||||||||||||
| rootAtoms = currentRootAtoms; | ||||||||||||||
| } | ||||||||||||||
|
Comment on lines
+94
to
+138
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This whole NETA creation part needs to run on each atom within a fragment, reason being to find the best, most concrete way of describing uniquely the bonding / atoms within it. Then it can be applied to each fragment (of the same size) in turn to see if the NETA matches. |
||||||||||||||
|
|
||||||||||||||
| /* | ||||||||||||||
| * Get instances | ||||||||||||||
| */ | ||||||||||||||
|
|
||||||||||||||
| // Iterate over all structural atoms, matching their unit cell atoms by NETA | ||||||||||||||
| std::vector<std::set<const AtomBase *>> matchedUnitCellAtomSets; | ||||||||||||||
| for (const auto &structureAtom : inputStructure_.atoms()) | ||||||||||||||
| { | ||||||||||||||
| if (atomMask.contains(structureAtom.get())) | ||||||||||||||
| continue; | ||||||||||||||
|
|
||||||||||||||
| auto matchedPath = neta.matchedPath(structureAtom.get()).set(); | ||||||||||||||
| if (!matchedPath.empty()) | ||||||||||||||
| { | ||||||||||||||
| auto set = matchedUnitCellAtomSets.emplace_back(matchedPath); | ||||||||||||||
| for (const auto &matchedAtom : set) | ||||||||||||||
| atomMask.insert(static_cast<const StructureAtom *>(matchedAtom)); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Loop over matched unit cell atoms, retrieving instances | ||||||||||||||
| for (const auto &matchedUnitCellAtoms : matchedUnitCellAtomSets) | ||||||||||||||
| { | ||||||||||||||
| if (matchedUnitCellAtoms.empty()) | ||||||||||||||
| continue; | ||||||||||||||
|
|
||||||||||||||
| auto &instance = instances.emplace_back(); | ||||||||||||||
| for (const auto &matchedAtom : matchedUnitCellAtoms) | ||||||||||||||
| instance.push_back(matchedAtom->r()); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Detect structures that have instances | ||||||||||||||
| if (!instances.empty()) | ||||||||||||||
| detectedStructures_ | ||||||||||||||
| .emplace_back(copyStructureAtomsAndBonds(inputStructure_, detectedMolStructure, fragmentIndices)) | ||||||||||||||
| .instances() = instances; | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Unfold all detected structures | ||||||||||||||
| for (auto &structure : detectedStructures_) | ||||||||||||||
| structure.unFold(); | ||||||||||||||
|
Comment on lines
+180
to
+182
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This step needs to happen before you store the coordinates of the instances at the end of the loop above. Could happen immediately after the fragment detection at the top of the routine. |
||||||||||||||
|
|
||||||||||||||
| message("Detected {} distinct fragment structures:\n\n", detectedStructures_.size()); | ||||||||||||||
| message(" ID N Species Formula\n"); | ||||||||||||||
| auto count = 1; | ||||||||||||||
| for (const auto &structure : detectedStructures_) | ||||||||||||||
| message(" {:3d} {:4d} {}\n", count++, structure.instances().size(), | ||||||||||||||
| EmpiricalFormula::formula(structure.atoms(), [](const auto &i) { return i->Z(); })); | ||||||||||||||
| message(""); | ||||||||||||||
|
|
||||||||||||||
| /* | ||||||||||||||
| * Dynamic outputs | ||||||||||||||
| */ | ||||||||||||||
|
|
||||||||||||||
| // Register dynamic outputs | ||||||||||||||
| for (int i = 0; i < detectedStructures_.size(); i++) | ||||||||||||||
| { | ||||||||||||||
| auto val = detectedStructures_[i]; | ||||||||||||||
| auto paramName = std::string("DetectedMolecule" + std::format("-{}", i)); | ||||||||||||||
|
|
||||||||||||||
| // Check if output already exists - do not add if it does | ||||||||||||||
| if (outputs_.find(paramName) != outputs_.end()) | ||||||||||||||
| continue; | ||||||||||||||
|
|
||||||||||||||
| addOutput(paramName, "Detected molecular structure", detectedStructures_[i]); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return NodeConstants::ProcessResult::Success; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /* | ||||||||||||||
| * Helpers | ||||||||||||||
| */ | ||||||||||||||
|
|
||||||||||||||
| // Copy atom and bond information from one structure to another | ||||||||||||||
| Structure &DetectMoleculesNode::copyStructureAtomsAndBonds(const Structure &source, Structure &target, | ||||||||||||||
| const std::vector<int> fragmentIndices) | ||||||||||||||
| { | ||||||||||||||
| // Copy fragment atoms, forming a map of the original indices to the new atom in the structure | ||||||||||||||
| std::map<int, StructureAtom *> originalIndexMap; | ||||||||||||||
| for (auto fragAtomIndex : fragmentIndices) | ||||||||||||||
| { | ||||||||||||||
| const auto fragmentAtom = source.atom(fragAtomIndex); | ||||||||||||||
| originalIndexMap[fragAtomIndex] = target.addAtom(fragmentAtom->Z(), fragmentAtom->r(), fragmentAtom->q()); | ||||||||||||||
| std::cout << std::format("New atom added to structure: {} {}\n", fragAtomIndex, Elements::symbol(fragmentAtom->Z())); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Copy bond information - since our fragment is by definition a bound fragment, we copy all bonds on each atom | ||||||||||||||
| for (auto fragAtomIndex : fragmentIndices) | ||||||||||||||
| { | ||||||||||||||
| const auto fragmentAtom = source.atom(fragAtomIndex); | ||||||||||||||
| for (auto bond : fragmentAtom->bonds()) | ||||||||||||||
| { | ||||||||||||||
| // Add a bond between the new atoms in the detected structure (as long as it doesn't already exist) | ||||||||||||||
| if (!target.hasBond(originalIndexMap[bond->i()->index()], originalIndexMap[bond->j()->index()])) | ||||||||||||||
| target.addBond(originalIndexMap[bond->i()->index()], originalIndexMap[bond->j()->index()]); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return target; | ||||||||||||||
| } | ||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| // SPDX-License-Identifier: GPL-3.0-or-later | ||
| // Copyright (c) 2026 Team Dissolve and contributors | ||
|
|
||
| #pragma once | ||
|
|
||
| #include "classes/localMolecule.h" | ||
| #include "classes/molecule.h" | ||
| #include "classes/species.h" | ||
| #include "classes/structure.h" | ||
| #include "data/elements.h" | ||
| #include "math/vector3.h" | ||
| #include "nodes/node.h" | ||
| #include <algorithm> | ||
| #include <map> | ||
| #include <vector> | ||
|
|
||
| class Species; | ||
|
|
||
| // DetectMolecules Node | ||
| class DetectMoleculesNode : public Node | ||
| { | ||
| public: | ||
| DetectMoleculesNode(Graph *parentGraph); | ||
| ~DetectMoleculesNode() override = default; | ||
|
|
||
| public: | ||
| std::string_view type() const override; | ||
| std::string_view summary() const override; | ||
|
|
||
| /* | ||
| * Definition | ||
| */ | ||
| private: | ||
| // Input structure | ||
| Structure inputStructure_; | ||
| // Output structures | ||
| std::vector<Structure> detectedStructures_; | ||
|
|
||
| /* | ||
| * Helpers | ||
| */ | ||
| private: | ||
| // Copy atom and bond information from one structure to another | ||
| static Structure ©StructureAtomsAndBonds(const Structure &source, Structure &target, | ||
| const std::vector<int> fragmentIndices); | ||
| // Find molecular fragments | ||
| std::vector<std::vector<int>> findMolecularFragments(const Structure &structure) const; | ||
|
|
||
| /* | ||
| * Processing | ||
| */ | ||
| protected: | ||
| // Run main processing | ||
| NodeConstants::ProcessResult process() override; | ||
| }; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this still need a rebase? This was in #2530 ...