Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions src/classes/fragment.h

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this still need a rebase? This was in #2530 ...

Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,24 @@ template <class AtomClass, class BondClass> class Fragment
getIndicesRecursive(atoms, indices, j->index(), exclusions);
}
}
static void getIndicesRecursive(const std::vector<std::unique_ptr<AtomClass>> &atoms, std::vector<int> &indices, int index,
const std::vector<const BondClass *> &exclusions)
{
// Loop over bonds on indexed atom
indices.emplace_back(index);
const auto i = atoms.at(index).get();
for (const auto *bond : i->bonds())
{
// Is this either of the excluded bonds?
if (std::ranges::find(exclusions, bond) != exclusions.end())
continue;

// Get the partner atom in the bond and select it (if it is not selected already)
auto j = bond->partner(i);
if (std::find(indices.begin(), indices.end(), j->index()) == indices.end())
getIndicesRecursive(atoms, indices, j->index(), exclusions);
}
}

public:
// Return the fragment (vector of indices) containing the specified atom
Expand All @@ -38,4 +56,11 @@ template <class AtomClass, class BondClass> class Fragment
getIndicesRecursive(atoms, indices, startIndex, exclusions);
return indices;
}
static std::vector<int> get(const std::vector<std::unique_ptr<AtomClass>> &atoms, int startIndex,
const std::vector<const BondClass *> &exclusions = {})
{
std::vector<int> indices;
getIndicesRecursive(atoms, indices, startIndex, exclusions);
return indices;
}
};
9 changes: 9 additions & 0 deletions src/classes/structure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,20 @@ Structure &Structure::operator=(const Structure &source)
{
clear();

// Copy atoms
for (auto &atom : source.atoms_)
{
auto &i = atoms_.emplace_back(std::make_unique<StructureAtom>());
i->copy(*atom);
}

// Copy bonds
for (auto &bond : source.bonds_)
addBond(bond->i()->index(), bond->j()->index());

// Copy instances
instances_ = source.instances_;

// Copy source box
createBox(source.box_.axisLengths(), source.box_.axisAngles(), source.box_.type() == Box::BoxType::None);

Expand Down Expand Up @@ -119,6 +124,10 @@ const StructureAtom *Structure::atom(int i) const { return atoms_[i].get(); }
const std::vector<std::unique_ptr<StructureAtom>> &Structure::atoms() const { return atoms_; }
std::vector<std::unique_ptr<StructureAtom>> &Structure::atoms() { return atoms_; }

// Return molecular species coordinates
const std::vector<std::vector<Vector3>> &Structure::instances() const { return instances_; }
std::vector<std::vector<Vector3>> &Structure::instances() { return instances_; }

/*
* Connectivity
*/
Expand Down
5 changes: 5 additions & 0 deletions src/classes/structure.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ class Structure : public Serialisable<>
private:
// Atoms in the structure
std::vector<std::unique_ptr<StructureAtom>> atoms_;
// Positional instances of the root structure
std::vector<std::vector<Vector3>> instances_;

private:
// Renumber atoms so they are sequential in the vector
Expand All @@ -74,6 +76,9 @@ class Structure : public Serialisable<>
// Return atoms
const std::vector<std::unique_ptr<StructureAtom>> &atoms() const;
std::vector<std::unique_ptr<StructureAtom>> &atoms();
// Return positional instances of the root structure
const std::vector<std::vector<Vector3>> &instances() const;
std::vector<std::vector<Vector3>> &instances();

/*
* Connectivity
Expand Down
242 changes: 242 additions & 0 deletions src/nodes/detectMolecules.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
// SPDX-License-Identifier: GPL-3.0-or-later
// Copyright (c) 2026 Team Dissolve and contributors

#include "nodes/detectMolecules.h"
#include "classes/empiricalFormula.h"
#include "classes/fragment.h"
#include "classes/molecule.h"
#include "classes/species.h"
#include <algorithm>
#include <format>

DetectMoleculesNode::DetectMoleculesNode(Graph *parentGraph) : Node(parentGraph)
{
// Inputs
addInput<Structure>("Structure", "Input structure", inputStructure_);
}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/*
* Definition
*/

std::string_view DetectMoleculesNode::type() const { return "DetectMolecules"; }

std::string_view DetectMoleculesNode::summary() const { return "Detect molecular species within a structure"; }

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/*
* Processing
*/

std::vector<std::vector<int>> DetectMoleculesNode::findMolecularFragments(const Structure &structure) const
{
std::vector<std::vector<int>> fragments;

auto fragment = [structure](int i) { return Fragment<StructureAtom, Bond<StructureAtom>>::get(structure.atoms(), i); };

for (int i = 0; i < structure.nAtoms(); i++)
fragments.emplace_back(fragment(i));
Comment on lines +28 to +29

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this function start from the next un-selected atom? Need something like a std::set to track which atoms have been detected within instances (a bug that doesn't show up for the atomic NaCl or MgO cases!)


return fragments;
}

// Run main processing
NodeConstants::ProcessResult DetectMoleculesNode::process()
{
detectedStructures_.clear();

// Return all discovered molecular fragment index vectors
auto allFragmentIndices = findMolecularFragments(inputStructure_);

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're pre-detecting all bound fragments here (which is a good idea)....


// Try selecting within the species from the first atom - if this captures all atoms we have a bound framework...
if (allFragmentIndices[0].size() == inputStructure_.nAtoms())
return error(
"Can't create molecular definitions since this unit cell appears to be a continuous framework/network. Consider "
"adjusting the bonding options in order to generate molecular fragments.\n");

std::set<const StructureAtom *> atomMask;

for (int i = 0; i < inputStructure_.nAtoms(); i++)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...why is this a loop over structure atoms and not fragments?

{
const auto structureAtom = inputStructure_.atom(i);
if (atomMask.contains(structureAtom))
continue;

auto fragmentIndices = allFragmentIndices.at(i);

auto largeFragment = fragmentIndices.size() * 2 > inputStructure_.nAtoms();

// Find instances of this fragment.
for (auto fragAtomIndex : fragmentIndices)
{
const auto fragmentAtom = inputStructure_.atom(fragAtomIndex);

// Create a provisional structure for the detected fragment
Structure detectedMolStructure;
detectedMolStructure.createBox(inputStructure_.box().axes());
std::vector<std::vector<Vector3>> instances;

// Skip NETA definition creation for large fragments
if (largeFragment)
{
if (atomMask.contains(fragmentAtom))
continue;

auto &instance = instances.emplace_back();
for (auto fragAtomIndex : fragmentIndices)
{
auto fragmentAtom = inputStructure_.atom(fragAtomIndex);
instance.push_back(fragmentAtom->r());
atomMask.insert(fragmentAtom);

// Remove fragments with this size
std::erase_if(allFragmentIndices, [&fragmentIndices](const auto &fragment)
{ return fragment.size() == fragmentIndices.size(); });
}
}
else
{
/*
* Best NETA definition
*/

// Set up the return value and bind its contents
NETADefinition bestNETA;
std::vector<StructureAtom *> rootAtoms;

// Maintain a set of atoms matched by any NETA description we generate
std::set<StructureAtom *> alreadyMatched;

// Skip this atom?
if (alreadyMatched.find(fragmentAtom) != alreadyMatched.end())
continue;

// Create a NETA definition with this atom as the root
NETADefinition neta;
neta.create(static_cast<AtomBase *>(fragmentAtom), std::nullopt,
Flags<NETADefinition::NETACreationFlags>(NETADefinition::NETACreationFlags::ExplicitHydrogens,
NETADefinition::NETACreationFlags::IncludeRootElement));

// Apply this match over the whole species
std::vector<StructureAtom *> currentRootAtoms;
for (auto fragAtomIndex : fragmentIndices)
{
const auto fragmentAtom = inputStructure_.atom(fragAtomIndex);
if (neta.matches(fragmentAtom))
{
currentRootAtoms.push_back(fragmentAtom);
alreadyMatched.insert(fragmentAtom);
}
}

// Is this a better description?
auto better = false;
if (rootAtoms.empty() || currentRootAtoms.size() < rootAtoms.size())
better = true;
else if (currentRootAtoms.size() == rootAtoms.size())
{
// Replace the current match if there are more bonds on the current atom.
if (fragmentAtom->nBonds() > rootAtoms.front()->nBonds())
better = true;
}

if (better)
{
bestNETA = neta;
rootAtoms = currentRootAtoms;
}
Comment on lines +94 to +138

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This whole NETA creation part needs to run on each atom within a fragment, reason being to find the best, most concrete way of describing uniquely the bonding / atoms within it. Then it can be applied to each fragment (of the same size) in turn to see if the NETA matches.


/*
* Get instances
*/

// Iterate over all structural atoms, matching their unit cell atoms by NETA
std::vector<std::set<const AtomBase *>> matchedUnitCellAtomSets;
for (const auto &structureAtom : inputStructure_.atoms())
{
if (atomMask.contains(structureAtom.get()))
continue;

auto matchedPath = neta.matchedPath(structureAtom.get()).set();
if (!matchedPath.empty())
{
auto set = matchedUnitCellAtomSets.emplace_back(matchedPath);
for (const auto &matchedAtom : set)
atomMask.insert(static_cast<const StructureAtom *>(matchedAtom));
}
}

// Loop over matched unit cell atoms, retrieving instances
for (const auto &matchedUnitCellAtoms : matchedUnitCellAtomSets)
{
if (matchedUnitCellAtoms.empty())
continue;

auto &instance = instances.emplace_back();
for (const auto &matchedAtom : matchedUnitCellAtoms)
instance.push_back(matchedAtom->r());
}
}

// Detect structures that have instances
if (!instances.empty())
detectedStructures_
.emplace_back(copyStructureAtomsAndBonds(inputStructure_, detectedMolStructure, fragmentIndices))
.instances() = instances;
}
}

// Unfold all detected structures
for (auto &structure : detectedStructures_)
structure.unFold();
Comment on lines +180 to +182

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This step needs to happen before you store the coordinates of the instances at the end of the loop above. Could happen immediately after the fragment detection at the top of the routine.


message("Detected {} distinct fragment structures:\n\n", detectedStructures_.size());
message(" ID N Species Formula\n");
auto count = 1;
for (const auto &structure : detectedStructures_)
message(" {:3d} {:4d} {}\n", count++, structure.instances().size(),
EmpiricalFormula::formula(structure.atoms(), [](const auto &i) { return i->Z(); }));
message("");

/*
* Dynamic outputs
*/

// Register dynamic outputs
for (int i = 0; i < detectedStructures_.size(); i++)
{
auto val = detectedStructures_[i];
auto paramName = std::string("DetectedMolecule" + std::format("-{}", i));

// Check if output already exists - do not add if it does
if (outputs_.find(paramName) != outputs_.end())
continue;

addOutput(paramName, "Detected molecular structure", detectedStructures_[i]);
}

return NodeConstants::ProcessResult::Success;
}

/*
* Helpers
*/

// Copy atom and bond information from one structure to another
Structure &DetectMoleculesNode::copyStructureAtomsAndBonds(const Structure &source, Structure &target,
const std::vector<int> fragmentIndices)
{
// Copy fragment atoms, forming a map of the original indices to the new atom in the structure
std::map<int, StructureAtom *> originalIndexMap;
for (auto fragAtomIndex : fragmentIndices)
{
const auto fragmentAtom = source.atom(fragAtomIndex);
originalIndexMap[fragAtomIndex] = target.addAtom(fragmentAtom->Z(), fragmentAtom->r(), fragmentAtom->q());
std::cout << std::format("New atom added to structure: {} {}\n", fragAtomIndex, Elements::symbol(fragmentAtom->Z()));
}

// Copy bond information - since our fragment is by definition a bound fragment, we copy all bonds on each atom
for (auto fragAtomIndex : fragmentIndices)
{
const auto fragmentAtom = source.atom(fragAtomIndex);
for (auto bond : fragmentAtom->bonds())
{
// Add a bond between the new atoms in the detected structure (as long as it doesn't already exist)
if (!target.hasBond(originalIndexMap[bond->i()->index()], originalIndexMap[bond->j()->index()]))
target.addBond(originalIndexMap[bond->i()->index()], originalIndexMap[bond->j()->index()]);
}
}

return target;
}
55 changes: 55 additions & 0 deletions src/nodes/detectMolecules.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// SPDX-License-Identifier: GPL-3.0-or-later
// Copyright (c) 2026 Team Dissolve and contributors

#pragma once

#include "classes/localMolecule.h"
#include "classes/molecule.h"
#include "classes/species.h"
#include "classes/structure.h"
#include "data/elements.h"
#include "math/vector3.h"
#include "nodes/node.h"
#include <algorithm>
#include <map>
#include <vector>

class Species;

// DetectMolecules Node
class DetectMoleculesNode : public Node
{
public:
DetectMoleculesNode(Graph *parentGraph);
~DetectMoleculesNode() override = default;

public:
std::string_view type() const override;
std::string_view summary() const override;

/*
* Definition
*/
private:
// Input structure
Structure inputStructure_;
// Output structures
std::vector<Structure> detectedStructures_;

/*
* Helpers
*/
private:
// Copy atom and bond information from one structure to another
static Structure &copyStructureAtomsAndBonds(const Structure &source, Structure &target,
const std::vector<int> fragmentIndices);
// Find molecular fragments
std::vector<std::vector<int>> findMolecularFragments(const Structure &structure) const;

/*
* Processing
*/
protected:
// Run main processing
NodeConstants::ProcessResult process() override;
};
Loading
Loading