diff --git a/idpflex/bayes.py b/idpflex/bayes.py
index 2f1104e..57e2c22 100644
--- a/idpflex/bayes.py
+++ b/idpflex/bayes.py
@@ -1,171 +1,316 @@
# from qef.models import TabulatedFunctionModel
-from lmfit.models import (Model, ConstantModel, index_of)
-from scipy.interpolate import interp1d
+import warnings
+import operator
+import numpy as np
+from lmfit.models import (Model, ConstantModel)
+from lmfit import CompositeModel
+from functools import reduce
class TabulatedFunctionModel(Model):
- r"""A fit model that uses a table of (x, y) values to interpolate
+ r"""A fit model that uses a table of (x, y) values to interpolate.
- Uses :class:`~scipy.interpolate.interp1d`
+ Uses an individual property's `interpolator` for the interpolation.
+ Control of the interpolator can be set using the property's
+ `create_interpolator` method.
Fitting parameters:
- - integrated intensity ``amplitude`` :math:`A`
+ - integrated intensity ``slope`` :math:`A`
- position of the peak ``center`` :math:`E_0`
- - nominal relaxation time ``tau`` :math:`\tau`
- - stretching exponent ``beta`` :math:`\beta`
Parameters
----------
- xdata : :class:`~numpy:numpy.ndarray`
- X-values to construct the interpolator
- ydata : :class:`~numpy:numpy.ndarray`
- Y-values to construct the interpolator
- interpolator_kind : str
- Interpolator that :class:`~scipy.interpolate.interp1d` should use
- """
+ prop : :class:`~idpflex.properties.ScalarProperty` or :class:`~idpflex.properties.ProfileProperty`
+ Property used to create interpolator and model
+ """ # noqa: E501
- def __init__(self, xdata, ydata, interpolator_kind='linear',
- prefix='', missing=None, name=None,
- **kwargs):
- kwargs.update({'prefix': prefix, 'missing': missing})
- self._interpolator = interp1d(xdata, ydata, kind=interpolator_kind)
+ def __init__(self, prop, prefix='', missing=None, name=None, **kwargs):
+ kwargs.update({'prefix': prefix, 'missing': missing, 'name': name})
- def tabulate(x, amplitude, center):
- return amplitude * self._interpolator(x - center)
+ def tabulate(x, slope, center, intercept, prop=None):
+ return slope*prop.interpolator(x - center) + intercept
- super(TabulatedFunctionModel, self).__init__(tabulate, **kwargs)
- self.set_param_hint('amplitude', min=0, value=1)
+ super().__init__(tabulate, prop=prop, **kwargs)
+ self.set_param_hint('slope', min=0, value=1)
self.set_param_hint('center', value=0)
+ self.set_param_hint('intercept', value=0)
+
+
+class TabulatedCompositeModel(Model):
+ r"""A fit model that uses a table of (x, y) values to interpolate.
- def guess(self, y, x=None, **kwargs):
- r"""Estimate fitting parameters from input data
-
- Parameters
- ----------
- y : :class:`~numpy:numpy.ndarray`
- Values to fit to, e.g., SANS or SAXS intensity values
- x : :class:`~numpy:numpy.ndarray`
- independent variable, e.g., momentum transfer
-
- Returns
- -------
- :class:`~lmfit.parameter.Parameters`
- Parameters with estimated initial values.
- """
- amplitude = 1.0
- center = 0.0
- if x is not None:
- center = x[index_of(y, max(y))] # assumed peak within domain x
- amplitude = max(y)
- return self.make_params(amplitude=amplitude, center=center)
-
-
-def model_at_node(node, property_name):
- r"""Generate fit model as a tabulated function with a scaling parameter,
- plus a flat background
+ Uses an individual property's `interpolator` for the interpolation.
+ Control of the interpolator can be set using the property's
+ `create_interpolator` method.
+
+ Fitting parameters:
+ - integrated intensity ``slope`` :math:`A`
+ - position of the peak ``center`` :math:`E_0`
Parameters
----------
- node : :class:`~idpflex.cnextend.ClusterNodeX`
- One node of the hierarchical :class:`~idpflex.cnextend.Tree`
- property_name : str
- Name of the property to create the model for
+ prop : :class:`~idpflex.properties.ScalarProperty` or :class:`~idpflex.properties.ProfileProperty`
+ Property used to create interpolator and model
+ """ # noqa: E501
- Returns
- -------
- :class:`~lmfit.model.CompositeModel`
- A model composed of a :class:`~idpflex.bayes.TabulatedFunctionModel`
- and a :class:`~lmfit.models.ConstantModel`
+ def __init__(self, prop, prefix='', missing=None, name=None, **kwargs):
+ kwargs.update({'prefix': prefix, 'missing': missing, 'name': name})
+
+ def tabulate(x, slope, center, intercept, prop=None):
+ if not set(x).issuperset(prop.feature_domain):
+ raise ValueError('The domain of the experiment does not align '
+ 'with the domain of the profile being fitted.'
+ ' Interpolate before creating the model.')
+ return slope*prop.interpolator(prop.x - center) + intercept
+
+ super().__init__(tabulate, prop=prop, **kwargs)
+ self.set_param_hint('slope', min=0, value=1)
+ self.set_param_hint('center', value=0)
+ self.set_param_hint('intercept', value=0)
+
+
+class LinearModel(Model):
+ r"""A fit model that fits :math:`m*prop.y + b = exp`.
+
+ Fitting parameters:
+ - slope ``slope``
+ - intercept ``intercept``
+
+ Parameters
+ ----------
+ prop : :class:`~idpflex.properties.ScalarProperty` or :class:`~idpflex.properties.ProfileProperty`
+ Property used to create interpolator and model
""" # noqa: E501
- p = node[property_name]
- mod = TabulatedFunctionModel(p.x, p.y) + ConstantModel()
- mod.set_param_hint('center', vary=False)
- return mod
+ def __init__(self, prop=None, **kwargs):
+ def line(x, slope, intercept, prop=None):
+ if not set(x).issuperset(prop.feature_domain):
+ raise ValueError('The domain of the experiment does not align '
+ 'with the domain of the profile being fitted.'
+ ' Interpolate before creating the model.')
+ return slope*prop.y + intercept
+ super().__init__(line, prop=prop, **kwargs)
+ self.set_param_hint('slope', value=1, min=0)
+ self.set_param_hint('intercept', value=0)
-def model_at_depth(tree, depth, property_name):
- r"""Generate a fit model at a particular tree depth
+
+def create_at_depth(tree, depth, names=None, use_tabulated=False,
+ **subset_kws):
+ r"""Create a model at a particular tree depth from the root node.
Parameters
----------
tree : :class:`~idpflex.cnextend.Tree`
Hierarchical tree
- depth: int
- depth level, starting from the tree's root (depth=0)
- property_name : str
- Name of the property to create the model for
+ depth : int
+ Fit at this depth
+ names : list or str, optional
+ kwarg to pass on when creating subset of property dict
+ use_tabulated: bool
+ Decide to use an tabulated (interpolated) model or a linear model with
+ out interpolation. Useful to "center" data.
+ subset_kws : additional args for subset filtering, optional
+ kwargs to pass on when creating subset of property dict example
+ includes `property_type`
Returns
-------
- :class:`~lmfit.model.CompositeModel`
- A model composed of a :class:`~idpflex.bayes.TabulatedFunctionModel`
- for each node plus a :class:`~lmfit.models.ConstantModel` accounting
- for a flat background
- """ # noqa: E501
- mod = ConstantModel()
- for node in tree.nodes_at_depth(depth):
- p = node[property_name]
- m = TabulatedFunctionModel(p.x, p.y, prefix='n{}_'.format(node.id))
- m.set_param_hint('center', vary=False)
- m.set_param_hint('amplitude', value=1.0 / (1 + depth))
- mod += m
- return mod
-
-
-def fit_at_depth(tree, experiment, property_name, depth):
- r"""Fit at a particular tree depth from the root node
-
- Fit experiment against the property stored in the nodes. The fit model
- is generated by :func:`~idpflex.bayes.model_at_depth`
-
- Parameters
- ----------
- tree : :class:`~idpflex.cnextend.Tree`
- Hierarchical tree
- experiment : :class:`~idpflex.properties.ProfileProperty`
- A property containing the experimental info.
- property_name: str
- The name of the simulated property to compare against experiment
- max_depth : int
- Fit at each depth up to (and including) max_depth
-
- Returns
- -------
- :class:`~lmfit.model.ModelResult`
- Results of the fit
- """
- mod = model_at_depth(tree, depth, property_name)
- params = mod.make_params()
- return mod.fit(experiment.y,
- x=experiment.x,
- weights=1.0 / experiment.e,
- params=params)
-
-
-def fit_to_depth(tree, experiment, property_name, max_depth=5):
- r"""Fit at each tree depth from the root node up to a maximum depth
-
- Fit experiment against the property stored in the nodes. The fit model
- is generated by :func:`~idpflex.bayes.model_at_depth`
+ :class:`~lmfit.model.ModelResult`
+ Model for the depth
+ """
+ pgs = [node.property_group.subset(names or node.property_group.keys(),
+ **subset_kws)
+ for node in tree.nodes_at_depth(depth)]
+ return create_models(pgs, use_tabulated=use_tabulated)
+
+
+def create_to_depth(tree, max_depth, names=None,
+ use_tabulated=False, **subset_kws):
+ r"""Create models to a particular tree depth from the root node.
Parameters
----------
tree : :class:`~idpflex.cnextend.Tree`
Hierarchical tree
- experiment : :class:`~idpflex.properties.ProfileProperty`
- A property containing the experimental info.
- property_name: str
- The name of the simulated property to compare against experiment
max_depth : int
Fit at each depth up to (and including) max_depth
+ names : list or str, optional
+ kwarg to pass on when creating subset of property dict
+ use_tabulated: bool
+ Decide to use an tabulated (interpolated) model or a linear model with
+ out interpolation. Useful to "center" data.
+ subset_kws : additional args for subset filtering, optional
+ kwargs to pass on when creating subset of property dict example
+ includes `property_type`
Returns
-------
- :py:class:`list`
- A list of :class:`~lmfit.model.ModelResult` items containing the
- fit at each level of the tree up to and including `max_depth`
+ list of :class:`~lmfit.model.ModelResult`
+ Models for each depth
"""
+ return [create_at_depth(tree, i, names=names, use_tabulated=use_tabulated,
+ **subset_kws)
+ for i in range(max_depth + 1)]
+
+
+def fit_model(model, experiment, params=None, weights=None, method='leastsq'):
+ """Apply a fit to a particular model.
+
+ Parameters
+ ----------
+ model: :class:`~lmfit.model.ModelResult`
+ Model to be fit
+ experiment: :class:`~idpflex.properties.PropertyDict`
+ Set of experimental properties to be fit.
+ params: Parameters
+ Parameters of the model to be used. Can default to model.make_params()
+ weights: numpy.ndarray, optional
+ Array of weights to be used for fitting
+ method: str, optional
+ Choice of which fitting method to use with lmfit. Defaults to 'leastsq'
+ but can choose methods such as 'differential_evolution' to find global
+ minimizations for the parameters.
+
+ Returns
+ -------
+ :class:`~lmfit.model.ModelResult`
+ The fit of the model
+ """
+ if params is None:
+ params = model.make_params()
+ result = model.fit(experiment.feature_vector, weights=weights,
+ x=experiment.feature_domain, params=params,
+ method=method)
+ return result
+
+
+def fit_models(models, experiment, params_list=None, weights=None,
+ method='leastsq'):
+ """Apply fitting to a list of models."""
+ if params_list is None:
+ params_list = [m.make_params() for m in models]
+ return [fit_model(model, experiment, params=params,
+ weights=weights, method=method)
+ for model, params in zip(models, params_list)]
+
+
+def create_model(property_group, use_tabulated=False):
+ """Create a composite model from a PropertyDict and a set of models.
+
+ Parameters
+ ----------
+ property_group: :class:`~idpflex.properties.PropertyDict`
+ The set of properties used to create a composite model.
+ use_tabulated: bool
+ Decide to use an tabulated (interpolated) model or a linear model with
+ out interpolation. Useful to "center" data.
+
+ Returns
+ -------
+ :class:`~lmfit.CompositeModel`
+ The composite model created by applying the model to the corresponging
+ property and concatenating the results.
+ """ # noqa: E501
+ # Create new model instances or copy model instances
+ if use_tabulated:
+ model_objs = [TabulatedCompositeModel(prop=p)
+ for p in property_group.values()]
+ else:
+ model_objs = [LinearModel(prop=p) for p in property_group.values()]
+ # Prefix all models with the associated property name
+ # Set the model's function prop arg to use the property
+ for i, p in enumerate(property_group.values()):
+ model_objs[i].opts['prop'] = p
+ model_objs[i].prefix = p.name + '_'
+ # Reduce models to a single composite model
+ return reduce(lambda joined_model, m:
+ CompositeModel(joined_model, m, lambda l, r:
+ np.concatenate([np.atleast_1d(l),
+ np.atleast_1d(r)])),
+ model_objs)
+
+
+def create_models(property_groups, use_tabulated=False):
+ """Create a composite model from a list of PropertyDict and a set of models.
+
+ For each structure there will be a probability parameter of the form
+ struct{i}_p that indicates the relative amount of the structure used to
+ make the fit. There is an accompanying struct{i}_proportion_c which is
+ duplicates this information but is not limited to [0, 1].
+
+ For each property, there will be internally used parameters of the form
+ struct{i}_{propertyname}_{paramname}.
+ These are reported as {propertyname}_{paramname} (which may be rescaled).
+
+ NOTE:
+ To adjust parameter bounds, values, etc. after model creation, be sure to
+ adjust the parameters prefixed with struct{i} and not just
+ {propertyname}_{paramname}.
+
+ Parameters
+ ----------
+ property_groups: list of :class:`~idpflex.properties.PropertyDict`
+ The set of properties used to create a composite model.
+ use_tabulated: bool
+ Decide to use an tabulated (interpolated) model or a linear model with
+ out interpolation. Useful to "center" data.
+
+ Returns
+ -------
+ :class:`~lmfit.CompositeModel`
+ The composite model created by applying the model to the corresponging
+ property and concatenating the results.
+ """
+ try:
+ property_groups = list(property_groups)
+ except TypeError:
+ property_groups = [property_groups]
+
+ if len(property_groups) == 1:
+ return create_model(property_groups[0], use_tabulated=use_tabulated)
+
+ def create_submodel(i, property_group):
+ submodel = create_model(property_group, use_tabulated=use_tabulated)
+ submodel = ConstantModel(prefix='proportion_')*submodel
+ for component in submodel.components:
+ component.prefix = f'struct{i}_' + component.prefix
+ # manually changing prefix eliminates default param hints
+ for pname in property_groups[0]:
+ submodel.set_param_hint(f'struct{i}_{pname}_slope', value=1, min=0)
+ submodel.set_param_hint(f'struct{i}_{pname}_intercept', value=0)
+ return submodel
+
+ model = reduce(operator.add, (create_submodel(i, pg)
+ for i, pg in enumerate(property_groups)))
+
+ if len(property_groups[0]) == 1:
+ warnings.warn('Not enough properties for model to distinguish between'
+ ' slope and proportion of structure. Setting internal'
+ ' slope to not vary during fitting.')
+ for pname in filter(lambda p: 'slope' in p,
+ model.param_names):
+ model.set_param_hint(pname, vary=False, value=1, min=0)
+
+ # for each structure calculate a probability using the propotions
+ proportion_names = [p for p in model.param_names
+ if p.endswith('proportion_c')]
+ total_eq = '(' + '+'.join(proportion_names) + ')'
+ # model.set_param_hint('total', expr=total_eq)
+ for p in proportion_names:
+ model.set_param_hint(p, min=0, value=1)
+ struct_prefix = p.partition('_')[0]
+ model.set_param_hint(f'{struct_prefix}_p', expr=f'{p}/{total_eq}')
+
+ # For each property, ensure structures share slope/constant values
+ for param in filter(
+ lambda param: (any(pname in param for pname in property_groups[0])
+ and not param.startswith('struct0_')),
+ model.param_names):
+ pbase = param.partition('_')[-1] # param name without struct prefix
+ model.set_param_hint(param, expr=f'struct0_{pbase}')
+ if 'center' in pbase:
+ model.set_param_hint(pbase, expr=f'struct0_{pbase}')
+ else:
+ model.set_param_hint(pbase, expr=f'struct0_{pbase}/{total_eq}')
- # Fit each level of the tree
- return [fit_at_depth(tree, experiment, property_name, depth) for
- depth in range(max_depth + 1)]
+ return model
diff --git a/idpflex/cnextend.py b/idpflex/cnextend.py
index 9880536..1f1f86a 100644
--- a/idpflex/cnextend.py
+++ b/idpflex/cnextend.py
@@ -17,7 +17,7 @@ class ClusterNodeX(hierarchy.ClusterNode):
def __init__(self, *args, **kwargs):
# Using of *super* is unfeasible because *ClusterNode* does not
# inherit from *object*.
- # super(ClusterNodeX, self).__init__(*args, **kwargs)
+ # super().__init__(*args, **kwargs)
hierarchy.ClusterNode.__init__(self, *args, **kwargs)
self.parent = None # parent node
self._tree = None
diff --git a/idpflex/properties.py b/idpflex/properties.py
index 129c098..b5a449d 100644
--- a/idpflex/properties.py
+++ b/idpflex/properties.py
@@ -1,10 +1,13 @@
import os
+import warnings
import subprocess
import tempfile
import fnmatch
import functools
import numpy as np
+import scipy
import numbers
+import copy
from collections import OrderedDict
import matplotlib as mpl
from matplotlib import pyplot as plt
@@ -17,7 +20,8 @@
class PropertyDict(object):
- r"""
+ r"""A container of properties.
+
A container of properties mimicking some of the behavior of
a standard python dictionary, plus methods
representing features of the properties when taken as a group.
@@ -34,6 +38,13 @@ def __init__(self, properties=None):
self._properties.update({p.name: p for p in properties})
def __iter__(self):
+ r"""
+ Mimic a dictionary.
+
+ Returns
+ -------
+ list of keys of `_properties`
+ """
return iter(self._properties.keys())
def __getitem__(self, name):
@@ -49,7 +60,7 @@ def __getitem__(self, name):
-------
property object, or `None` if no property is found with *name*
"""
- self._properties.get(name, None)
+ return self._properties.get(name, None)
def __setitem__(self, name, value):
r"""
@@ -63,9 +74,17 @@ def __setitem__(self, name, value):
"""
self._properties[name] = value
+ def __len__(self):
+ r"""Mimic a dict length.
+
+ Returns
+ -------
+ The number of properties in the dict.
+ """
+ return len(self._properties)
+
def get(self, name, default=None):
- r"""
- Mimic get method of a dictionary
+ r"""Mimic get method of a dictionary.
Parameters
----------
@@ -83,7 +102,7 @@ def get(self, name, default=None):
def keys(self):
r"""
- Mimic keys method of a dictionary
+ Mimic keys method of a dictionary.
Returns
-------
@@ -93,7 +112,7 @@ def keys(self):
def items(self):
r"""
- Mimic items method of a dictionary
+ Mimic items method of a dictionary.
Returns
-------
@@ -103,7 +122,7 @@ def items(self):
def values(self):
r"""
- Mimic values method of a dictionary
+ Mimic values method of a dictionary.
Returns
-------
@@ -111,57 +130,95 @@ def values(self):
"""
return self._properties.values()
- def feature_vector(self, names=None):
+ @property
+ def feature_vector(self):
r"""
- Feature vector for the specified sequence of names.
+ Feature vector for the property dict.
The feature vector is a concatenation of the feature vectors for
each of the properties and the concatenation follows the order of
- names.
+ insertion.
- If names is None, return all features in the property dict in the
- order of insertion.
+ Returns
+ -------
+ numpy.ndarray
+ """
+ return np.concatenate([prop.feature_vector
+ for prop in self.values()])
- Parameters
- ----------
- names: list
- List of property names
+ @property
+ def feature_domain(self):
+ r"""
+ Feature domain for the property dict.
+
+ The feature domain is a concatenation of the feature domains for
+ each of the properties and the concatenation follows the order of
+ insertion.
Returns
-------
numpy.ndarray
"""
- if names is None:
- return np.concatenate([prop.feature_vector
- for prop in self.values()])
- return np.concatenate([self._properties[n].feature_vector
- for n in names])
+ return np.concatenate([prop.feature_domain
+ for prop in self.values()])
- def feature_weights(self, names=None):
+ @property
+ def feature_weights(self):
r"""
- Feature vector weights for the specified sequence of names.
+ Feature vector weights for the property group.
The feature vector weights is a concatenation of the feature vectors
weights for each of the properties and the concatenation follows the
- order of names.
-
- If names is None, return all features in the property dict in the
order of insertion.
+ Returns
+ -------
+ numpy.ndarray
+ """
+ return np.concatenate([prop.feature_weights
+ for prop in self.values()])
+
+ def subset(self, names, property_type=None, to_keep_filter=lambda p: True):
+ r"""
+ Property dict for the specified sequence of names.
+
+ The subset is a PropertyDict of the properties with the same order as
+ names. The properties must satisfy the provided conditions, that is
+ they must be one of the types in property_type tuple (or single type
+ instance) and when passed to `to_keep_filter` must return trueish.
+
Parameters
----------
- names: list
- List of property names
+ names: list or str
+ List of property names. If single string treat as if a list of
+ one item.
+ property_type: tuple or property type, optional
+ Tuple of property types to keep. Default of None allows any type.
+ Anded with to_keep_filter.
+ to_keep_filter: callable, optional
+ A callable that should take a property object and return True if
+ the property should be included in the subset. Default allows all
+ properties.
+ Anded with property_types.
Returns
-------
- numpy.ndarray
+ PropertyDict
"""
- if names is None:
- return np.concatenate([prop.feature_weights
- for prop in self.values()])
- return np.concatenate([self._properties[n].feature_weights
- for n in names])
+ # Change to list to apply the constaints of property type and filter
+ # even if only a single name
+ if isinstance(names, str):
+ names = [names]
+
+ # Define an inner function for the purpose of informative stack trace
+ # and to add handling of property types to the filter
+ def _to_keep_filter(prop):
+ if property_type is None:
+ return to_keep_filter(prop)
+ return to_keep_filter(prop) and isinstance(prop, property_type)
+
+ return PropertyDict([self[name] for name in names
+ if _to_keep_filter(self[name])])
def register_as_node_property(cls, nxye):
@@ -198,7 +255,7 @@ def register_as_node_property(cls, nxye):
('errors', 'intensity errors'))
"""
def property_item(attr_name, docstring):
- r"""Factory of the node property items *name*, *x*, *y*, and *e*
+ r"""Factory of the node property items *name*, *x*, *y*, and *e*.
Parameters
----------
@@ -231,7 +288,7 @@ def setter(instance, value):
def decorate_as_node_property(nxye):
- r"""Decorator that endows a class with the node property protocol
+ r"""Decorator that endows a class with the node property protocol.
For details, see :func:`~idpflex.properties.register_as_node_property`
@@ -281,14 +338,21 @@ def set_scalar(self, y):
@property
def feature_vector(self):
+ """Return the y value as the feature vector of the property."""
return np.array([self.y, ])
+ @property
+ def feature_domain(self):
+ """Return the x value as the input domain of the property."""
+ return np.array([self.x])
+
@property
def feature_weights(self):
+ """Return 1 as the weight of the property components."""
return np.array([1])
def histogram(self, bins=10, errors=False, **kwargs):
- r"""Histogram of values for the leaf nodes
+ r"""Histogram of values for the leaf nodes.
Parameters
----------
@@ -350,7 +414,7 @@ class AsphericityMixin(object):
from the gyration radius tensor"""
def from_universe(self, a_universe, selection=None, index=0):
- r"""Calculate asphericity from an MDAnalysis universe instance
+ r"""Calculate asphericity from an MDAnalysis universe instance.
:math:`\frac{(L_1-L_2)^2+(L_1-L_3)^2+L_2-L_3)^2}{2(L_1+L_2+L_3)^2}`
@@ -386,7 +450,7 @@ def from_universe(self, a_universe, selection=None, index=0):
return self
def from_pdb(self, filename, selection=None):
- r"""Calculate asphericity from a PDB file
+ r"""Calculate asphericity from a PDB file.
:math:`\frac{(L_1-L_2)^2+(L_1-L_3)^2+L_2-L_3)^2}{2(L_1+L_2+L_3)^2}`
@@ -438,7 +502,7 @@ def __init__(self, *args, **kwargs):
@property
def asphericity(self):
- r"""Property to read and set the asphericity"""
+ r"""Property to read and set the asphericity."""
return self.y
@asphericity.setter
@@ -451,7 +515,7 @@ class SaSaMixin(object):
solvent accessible surface area"""
def from_mdtraj(self, a_traj, probe_radius=1.4, **kwargs):
- r"""Calculate solvent accessible surface for frames in a trajectory
+ r"""Calculate solvent accessible surface for frames in a trajectory.
SASA units are Angstroms squared
@@ -476,7 +540,7 @@ def from_mdtraj(self, a_traj, probe_radius=1.4, **kwargs):
return self
def from_pdb(self, filename, selection=None, probe_radius=1.4, **kwargs):
- r"""Calculate solvent accessible surface area (SASA) from a PDB file
+ r"""Calculate solvent accessible surface area (SASA) from a PDB file.
If the PBD contains more than one structure, calculation is performed
only for the first one.
@@ -561,7 +625,7 @@ def __init__(self, *args, **kwargs):
@property
def sasa(self):
- r"""Property to read and write the SASA value"""
+ r"""Property to read and write the SASA value."""
return self.y
@sasa.setter
@@ -574,7 +638,7 @@ class EndToEndMixin(object):
the end-to-end distance for a protein"""
def from_universe(self, a_universe, selection='name CA', index=0):
- r"""Calculate radius of gyration from an MDAnalysis Universe instance
+ r"""Calculate radius of gyration from an MDAnalysis Universe instance.
Does not apply periodic boundary conditions
@@ -601,7 +665,7 @@ def from_universe(self, a_universe, selection='name CA', index=0):
return self
def from_pdb(self, filename, selection='name CA'):
- r"""Calculate end-to-end distance from a PDB file
+ r"""Calculate end-to-end distance from a PDB file.
Does not apply periodic boundary conditions
@@ -625,7 +689,7 @@ def from_pdb(self, filename, selection='name CA'):
class EndToEnd(ScalarProperty, EndToEndMixin):
- r"""Implementation of a node property to store the end-to-end distance
+ r"""Implementation of a node property to store the end-to-end distance.
See :class:`~idpflex.properties.ScalarProperty` for initialization
"""
@@ -639,7 +703,7 @@ def __init__(self, *args, **kwargs):
@property
def end_to_end(self):
- r"""Property to read and set the end-to-end distance"""
+ r"""Property to read and set the end-to-end distance."""
return self.y
@end_to_end.setter
@@ -653,7 +717,7 @@ class RadiusOfGyrationMixin(object):
"""
def from_universe(self, a_universe, selection=None, index=0):
- r"""Calculate radius of gyration from an MDAnalysis Universe instance
+ r"""Calculate radius of gyration from an MDAnalysis Universe instance.
Parameters
----------
@@ -678,7 +742,7 @@ def from_universe(self, a_universe, selection=None, index=0):
return self
def from_pdb(self, filename, selection=None):
- r"""Calculate Rg from a PDB file
+ r"""Calculate Rg from a PDB file.
Parameters
----------
@@ -713,7 +777,7 @@ def __init__(self, *args, **kwargs):
@property
def rg(self):
- r"""Property to read and write the radius of gyration value"""
+ r"""Property to read and write the radius of gyration value."""
return self.y
@rg.setter
@@ -726,8 +790,7 @@ def rg(self, value):
('cmap', '(:class:`~numpy:numpy.ndarray`) contact map between residues'), # noqa: E501
('errors', '(:class:`~numpy:numpy.ndarray`) undeterminacies in the contact map'))) # noqa: E501
class ResidueContactMap(object):
- r"""Contact map between residues of the conformation using different
- definitions of contact.
+ r"""Contact map between residues of the conformation using different definitions of contact.
Parameters
----------
@@ -757,7 +820,7 @@ def __init__(self, name=None, selection=None, cmap=None, errors=None,
self.cutoff = cutoff
def from_universe(self, a_universe, cutoff, selection=None, index=0):
- r"""Calculate residue contact map from an MDAnalysis Universe instance
+ r"""Calculate residue contact map from an MDAnalysis Universe instance.
Parameters
----------
@@ -804,7 +867,7 @@ def from_universe(self, a_universe, cutoff, selection=None, index=0):
return self
def from_pdb(self, filename, cutoff, selection=None):
- r"""Calculate residue contact map from a PDB file
+ r"""Calculate residue contact map from a PDB file.
Parameters
----------
@@ -826,11 +889,11 @@ def from_pdb(self, filename, cutoff, selection=None):
return self.from_universe(mda.Universe(filename), cutoff, selection)
def plot(self):
- r"""Plot the residue contact map of the node"""
+ r"""Plot the residue contact map of the node."""
resids = [str(i) for i in list(set(self.selection.resids))]
def format_fn(tick_val, tick_pos):
- r"""Translates matrix index to residue number"""
+ r"""Translate matrix index to residue number."""
if int(tick_val) < len(resids):
return resids[int(tick_val)]
else:
@@ -858,7 +921,7 @@ def format_fn(tick_val, tick_pos):
('profile', '(:class:`~numpy:numpy.ndarray`) secondary structure assignment'), # noqa: E501
('errors', '(:class:`~numpy:numpy.ndarray`) assignment undeterminacy'))) # noqa: E501
class SecondaryStructureProperty(object):
- r"""Node property for secondary structure determined by DSSP
+ r"""Node property for secondary structure determined by DSSP.
Every residue is assigned a vector of length 8. Indexes corresponds to
different secondary structure assignment:
@@ -894,6 +957,7 @@ class SecondaryStructureProperty(object):
N x 8 matrix denoting undeterminacies for each type of assigned
secondary residue in every residue
""" # noqa: E501
+
#: Description of single-letter codes for secondary structure
elements = OrderedDict([('H', 'Alpha helix'),
('B', 'Isolated beta-bridge'),
@@ -941,7 +1005,7 @@ def __init__(self, name=None, aa=None, profile=None, errors=None):
self.node = None
def from_dssp_sequence(self, codes):
- r"""Load secondary structure profile from a single string of DSSP codes
+ r"""Load secondary structure profile from a string of DSSP codes.
Attributes *aa* and *errors* are not modified, only **profile**.
@@ -964,7 +1028,7 @@ def from_dssp_sequence(self, codes):
return self
def from_dssp(self, file_name):
- r"""Load secondary structure profile from a `dssp file `_
+ r"""Load secondary structure profile from a `dssp file `_.
Parameters
----------
@@ -991,7 +1055,7 @@ def from_dssp(self, file_name):
return self
def from_dssp_pdb(self, file_name, command='mkdssp', silent=True):
- r"""Calculate secondary structure with DSSP
+ r"""Calculate secondary structure with DSSP.
Parameters
----------
@@ -1051,8 +1115,7 @@ def collapsed(self):
return self.profile.argmax(axis=1)
def disparity(self, other):
- r"""Secondary Structure disparity of other profile to self, akin to
- :math:`\chi^2`
+ r"""Secondary Structure disparity of other profile to self, akin to :math:`\chi^2`.
:math:`\frac{1}{N(n-1)} \sum_{i=1}^{N}\sum_{j=1}^{n} (\frac{p_{ij}-q_ {ij}}{e})^2`
@@ -1080,7 +1143,7 @@ def disparity(self, other):
return np.sum(np.square(dp/e)) / (n * (self.n_codes - 1))
def plot(self, kind='percents'):
- r"""Plot the secondary structure of the node holding the property
+ r"""Plot the secondary structure of the node holding the property.
Parameters
----------
@@ -1171,47 +1234,157 @@ class ProfileProperty(object):
Intensity values
errors : :class:`~numpy:numpy.ndarray`
Errors in the intensity values
+ node : :class:`~idpflex.cnextend.ClusterNodeX`
+ Node to which this property belongs
+ interp_kws : dict
+ Keyword args to pass to scipy interp1d for interpolating profile data.
+ Defaults to using extrapolation.
+ error_interp_kws : dict
+ Keyword args to pass to scipy interp1d for interpolating error data.
+ Defaults to using extrapolation.
"""
default_name = 'profile'
- def __init__(self, name=None, qvalues=None, profile=None, errors=None):
+ def __init__(self, name=None, qvalues=None, profile=None, errors=None,
+ node=None, interp_kws=None, error_interp_kws=None):
self.name = name
self.qvalues = qvalues
self.profile = profile
self.errors = errors
- self.node = None
+ self.node = node
+ self._interpolator = None
+ self.interp_kws = interp_kws or {'fill_value': 'extrapolate'}
+ self._error_interpolator = None
+ self.error_interp_kws = error_interp_kws or {'fill_value':
+ 'extrapolate'}
@property
def feature_vector(self):
r"""
- Each `qvalue` is interpreted as an independent feature,
- and the related value in `profile` is a particular
- "measured" value of that feature.
+ Each `qvalue` is interpreted as an independent feature, and the related value in `profile` is a particular "measured" value of that feature.
Returns
-------
numpy.ndarray
- """
+ """ # noqa: E501
return self.profile
+ @property
+ def feature_domain(self):
+ r"""
+ Return `qvalue` corresponding to each of the values in the feature vector.
+
+ Returns
+ -------
+ numpy.ndarray
+ """ # noqa: E501
+ return self.qvalues
+
@property
def feature_weights(self):
r"""
- Weights to be used when calculating the square of the euclidean
- distance between two feature vectors
+ Weights to be used when calculating the square of the euclidean distance between two feature vectors.
Returns
-------
numpy.ndarray
+ """ # noqa: E501
+ if self.e is None or np.allclose(np.zeros(len(self.y)), self.e):
+ return np.ones(len(self.profile)) / np.sqrt(len(self.profile))
+ ws = self.profile / self.errors
+ return ws / np.linalg.norm(ws)
+
+ @property
+ def interpolator(self):
+ r"""
+ Return the property's interpolator for the profile data.
+
+ If not previously created, will create an interpolator using
+ `self.interp_kws`. These default to a linear interpolator with
+ extrapolation.
+
+ Returns
+ -------
+ function
"""
- return np.ones(len(self.profile)) / np.sqrt(len(self.profile))
+ if self._interpolator is None:
+ warnings.warn('Property did not have interpolator.'
+ ' Creating interpolator using interp_kws.')
+ self._interpolator = scipy.interpolate.interp1d(
+ self.qvalues, self.profile, **self.interp_kws)
+ return self._interpolator
+
+ @property
+ def error_interpolator(self):
+ r"""
+ Return the property's interpolator for the error data.
+
+ If not previously created, will create an interpolator using
+ `self.error_interp_kws`. These default to a linear interpolator
+ with extrapolation.
+
+ Returns
+ -------
+ function
+ """
+ if self._error_interpolator is None:
+ warnings.warn('Property did not have error interpolator. Creating'
+ ' interpolator using error_interp_kws.')
+ self._error_interpolator = scipy.interpolate.interp1d(
+ self.qvalues, self.errors, **self.error_interp_kws)
+ return self._error_interpolator
+
+ def interpolate(self, qvalues, inplace=False):
+ r"""Return interpolated data in new object or in place."""
+ # New instance of a property potentially using the subclass' init
+ result = self if inplace else copy.deepcopy(self)
+ result.profile = result.interpolator(qvalues)
+ result.errors = result.error_interpolator(qvalues)
+ result.qvalues = qvalues.copy()
+ # When modifying values, should likely reset the interpolator
+ # Will only reset self if inplace
+ warnings.warn('Reseting interpolators due to modifying data.')
+ result._interpolator = None
+ result._error_interpolator = None
+ return result
+
+ def filter(self, to_drop=None, inplace=False):
+ """Filter data using the `to_drop`, otherwise filter out 'bad' data.
+
+ Will remove the portion of the profile, qvalues, and errors that align
+ with true values in the `to_drop`. By default, the `to_drop` is true
+ when any of profile, qvalues, and errors are infinite or the errors
+ are zero.
+
+ Parameters
+ ----------
+ to_drop: numpy.ndarray of type bool
+ The to_drop to apply to the components of the profile.
+ inplace: bool
+ If inplace, modify profile data instead of creating new object.
+
+ Returns
+ -------
+ A new property with the chosen subset of data.
+ """
+ if to_drop is None:
+ to_drop = ~(np.isfinite(self.profile) & np.isfinite(self.qvalues)
+ & np.isfinite(self.errors) & (self.errors != 0))
+ result = self if inplace else copy.deepcopy(self)
+ result.profile = result.profile[~to_drop]
+ result.errors = result.errors[~to_drop]
+ result.qvalues = result.qvalues[~to_drop]
+ # When modifying values, should likely reset the interpolator
+ # Will only reset self if inplace
+ warnings.warn('Reseting interpolators due to modifying data.')
+ result._interpolator = None
+ result._error_interpolator = None
+ return result
class SansLoaderMixin(object):
- r"""Mixin class providing a set of methods to load SANS data into a
- profile property
- """
+ r"""Mixin class providing a set of methods to load SANS data into a profile property.""" # noqa: E501
def from_sassena(self, handle, profile_key='fqt', index=0):
"""Load SANS profile from sassena output.
@@ -1244,7 +1417,7 @@ def from_sassena(self, handle, profile_key='fqt', index=0):
return self
def from_cryson_int(self, file_name):
- r"""Load profile from a `cryson \*.int `_ file
+ r"""Load profile from a `cryson \*.int `_ file.
Parameters
----------
@@ -1282,7 +1455,7 @@ def from_cryson_fit(self, file_name):
def from_cryson_pdb(self, file_name, command='cryson',
args='-lm 20 -sm 0.6 -ns 500 -un 1 -eh -dro 0.075',
silent=True):
- r"""Calculate profile with cryson from a PDB file
+ r"""Calculate profile with cryson from a PDB file.
Parameters
----------
@@ -1365,8 +1538,7 @@ def to_ascii(self, file_name):
class SansProperty(ProfileProperty, SansLoaderMixin):
- r"""Implementation of a node property for SANS data
- """
+ r"""Implementation of a node property for SANS data."""
default_name = 'sans'
@@ -1377,12 +1549,10 @@ def __init__(self, *args, **kwargs):
class SaxsLoaderMixin(object):
- r"""Mixin class providing a set of methods to load X-ray data into a
- profile property
- """
+ r"""Mixin class providing a set of methods to load X-ray data into a profile property.""" # noqa: E501
def from_crysol_int(self, file_name):
- r"""Load profile from a `crysol \*.int `_ file
+ r"""Load profile from a `crysol \*.int `_ file.
Parameters
----------
@@ -1420,7 +1590,7 @@ def from_crysol_fit(self, file_name):
def from_crysol_pdb(self, file_name, command='crysol',
args='-lm 20 -sm 0.6 -ns 500 -un 1 -eh -dro 0.075',
silent=True):
- r"""Calculate profile with crysol from a PDB file
+ r"""Calculate profile with crysol from a PDB file.
Parameters
----------
@@ -1503,8 +1673,7 @@ def to_ascii(self, file_name):
class SaxsProperty(ProfileProperty, SaxsLoaderMixin):
- r"""Implementation of a node property for SAXS data
- """
+ r"""Implementation of a node property for SAXS data."""
default_name = 'saxs'
@@ -1516,8 +1685,9 @@ def __init__(self, *args, **kwargs):
def propagator_weighted_sum(values, tree,
weights=lambda left_node, right_node: (1.0, 1.0)):
- r"""Calculate the property of a node as the sum of its two siblings'
- property values. Propagation applies only to non-leaf nodes.
+ r"""Calculate the property of a node as the sum of its two siblings' property values.
+
+ Propagation applies only to non-leaf nodes.
Parameters
----------
@@ -1529,7 +1699,7 @@ def propagator_weighted_sum(values, tree,
Callable of two arguments (left-node and right-node) returning
a tuple of left and right weights. Default callable returns (1.0, 1.0)
always.
- """
+ """ # noqa: E501
# Insert a property for each leaf
if len(values) != tree.nleafs:
msg = "len(values)={} but there are {} leafs".format(len(values),
@@ -1556,7 +1726,7 @@ def propagator_weighted_sum(values, tree,
def weights_by_size(left_node, right_node):
- r"""Calculate the relative size of two nodes
+ r"""Calculate the relative size of two nodes.
Parameters
----------
diff --git a/tests/conftest.py b/tests/conftest.py
index 3f42d90..2e53590 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,6 +16,8 @@
this_module_path = sys.modules[__name__].__file__
data_dir = os.path.join(os.path.dirname(this_module_path), 'data')
+random_state = np.random.RandomState(23)
+
@idprop.decorate_as_node_property((('name', 'name of the property'),
('domain_bar', 'property domain'),
@@ -56,7 +58,7 @@ def benchmark():
n_leafs = 22379
# Instantiate scalar properties for the leaf nodes, then propagate
# up the tree
- sc = np.random.normal(loc=100.0, size=n_leafs)
+ sc = random_state.normal(loc=100.0, size=n_leafs)
sc_p = [idprop.ScalarProperty(name='sc', y=s) for s in sc]
idprop.propagator_size_weighted_sum(sc_p, t)
return {'z': z,
@@ -139,7 +141,7 @@ def sans_benchmark(request):
# Create a node tree.
# m is a 1D compressed matrix of distances between leafs
- m = np.random.random(int(n_leafs * (n_leafs - 1) / 2))
+ m = random_state.random_sample(int(n_leafs * (n_leafs - 1) / 2))
z = linkage(m)
tree = cnextend.Tree(z)
@@ -201,12 +203,12 @@ def sans_fit(sans_benchmark):
coefficients = dict()
nodes = tree.nodes_at_depth(depth)
n_nodes = 1 + depth # depth=0 corresponds to the root node (nclusters=1)
- q_values = (tree.root[name].x[:-1] + tree.root[name].x[1:]) / 2 # midpoint
+ q_values = tree.root[name].x
profile = np.zeros(len(q_values))
for i in range(n_nodes):
coefficients[nodes[i].id] = coeffs[i]
p = nodes[i][name]
- profile += coeffs[i] * (p.y[:-1] + p.y[1:]) / 2
+ profile += coeffs[i] * p.y
background = 0.05 * max(profile) # flat background
profile += background
experiment_property = idprop.SansProperty(name=name,
diff --git a/tests/test_bayes.py b/tests/test_bayes.py
index 066ce6a..5565665 100644
--- a/tests/test_bayes.py
+++ b/tests/test_bayes.py
@@ -1,51 +1,165 @@
import numpy as np
-from numpy.testing import assert_array_almost_equal
+from numpy.testing import assert_array_almost_equal, assert_array_equal
import pytest
+import warnings
from idpflex import bayes
+from idpflex import properties
-def test_model_at_node(sans_fit):
+def test_TabulatedCompositeModel_interpolation(sans_fit):
tree = sans_fit['tree']
- mod = bayes.model_at_node(tree.root, sans_fit['property_name'])
+ tree.root.property_group = tree.root.property_group.subset(
+ tree.root.property_group.keys(),
+ property_type=properties.ProfileProperty)
+ mod = bayes.create_at_depth(tree, 0, use_tabulated=True)
prop = tree.root[sans_fit['property_name']]
- q_values = (prop.x[:-1] + prop.x[1:]) / 2 # midpoints
params = mod.make_params()
- y = mod.eval(params, x=q_values)
- assert_array_almost_equal(y, (prop.y[:-1] + prop.y[1:]) / 2, decimal=1)
+ # test interpolation
+ model = bayes.TabulatedFunctionModel(prop=prop)
+ qvals = prop.qvalues[:-1] + np.diff(prop.qvalues)/2
+ assert_array_almost_equal(model.eval(model.make_params(), x=qvals),
+ prop.interpolator(qvals), decimal=1)
+ # test centering
+ params[f'{prop.name}_center'].set(vary=True, value=2)
+ prop_shifted = prop.interpolate(prop.qvalues + 2)
+ fit = mod.fit(prop_shifted.y, x=prop.qvalues, params=params)
+ assert_array_almost_equal(fit.best_fit, prop_shifted.y, decimal=1)
+ assert abs(fit.params[f'{prop.name}_center'] - 2) <= 0.2
-def test_model_at_depth(sans_fit):
+def test_fit_at_depth(sans_fit):
tree = sans_fit['tree']
- name = sans_fit['property_name']
- depth = sans_fit['depth']
- # Evaluate the model
- mod = bayes.model_at_depth(tree, depth, name)
+ exp = sans_fit['experiment_property']
+ exp_pd = properties.PropertyDict([exp])
+ mod = bayes.create_at_depth(tree, sans_fit['depth'])
params = mod.make_params()
- for id, coeff in sans_fit['coefficients'].items():
- params['n{}_amplitude'.format(id)].set(value=coeff)
- params['c'].set(value=sans_fit['background'])
- p_exp = sans_fit['experiment_property']
- assert_array_almost_equal(mod.eval(params, x=p_exp.x),
- p_exp.y, decimal=1)
+ fit = bayes.fit_model(mod, exp_pd, params=params)
+ assert fit.weights is None
+ fit2 = bayes.fit_model(mod, exp_pd, weights=1/exp.e)
+ assert fit2.chisqr < 1e-10
+ assert_array_equal(fit2.weights, 1/exp.e)
+ # make assertions about parameters
+ assert all([f'struct{i}_proportion_c' in fit.params
+ for i, _ in enumerate(tree.nodes_at_depth(sans_fit['depth']))])
+ assert all([fit.params[f'struct{i}_proportion_c'].min == 0
+ for i, _ in enumerate(tree.nodes_at_depth(sans_fit['depth']))])
+ assert all([f'struct{i}_{pname}_slope' in fit.params
+ for i, _ in enumerate(tree.nodes_at_depth(sans_fit['depth']))
+ for pname in exp_pd])
+ assert all([fit.params[f'struct{i}_{pname}_slope'].min == 0
+ for i, _ in enumerate(tree.nodes_at_depth(sans_fit['depth']))
+ for pname in exp_pd])
+ assert all([f'struct{i}_{pname}_intercept' in fit.params
+ for i, _ in enumerate(tree.nodes_at_depth(sans_fit['depth']))
+ for pname in exp_pd])
+ assert abs(sum(p for p in fit.params.values()
+ if p.name.endswith('_p')) - 1) <= 1e-3
-def test_fit_at_depth(sans_fit):
+def test_global_minimization(sans_fit):
tree = sans_fit['tree']
- name = sans_fit['property_name']
- p_exp = sans_fit['experiment_property']
- fit = bayes.fit_at_depth(tree, p_exp, name, sans_fit['depth'])
- assert fit.chisqr < 1e-10
+ exp = sans_fit['experiment_property']
+ exp_pd = properties.PropertyDict([exp])
+ mod = bayes.create_at_depth(tree, sans_fit['depth'])
+ params = mod.make_params()
+ for param in params.values():
+ param.set(min=0, max=1e9)
+ fit = bayes.fit_model(mod, exp_pd, params=params, weights=1/exp.e)
+ fit2 = bayes.fit_model(mod, exp_pd, params=params, weights=1/exp.e,
+ method='basin_hopping')
+ assert abs(sum(p for p in fit2.params.values()
+ if p.name.endswith('_p')) - 1) <= 1e-3
+ try:
+ # Expect global fit to be better than typical fit
+ assert fit.redchi <= fit.redchi
+ except AssertionError:
+ warnings.warn('Global minimization did not do better than reference'
+ ' fit. Attempting a refit to avoid test failure.'
+ f' Difference {fit.redchi - fit2.redchi:.3} where global'
+ f' {fit2.redchi:.3} and reference {fit.redchi:.3}.',
+ RuntimeWarning)
+ # Try refitting and looser tolerance
+ fit3 = bayes.fit_model(mod, exp_pd, params=params,
+ weights=exp_pd.feature_weights,
+ method='differential_evolution')
+ assert fit3 <= fit
def test_fit_to_depth(sans_fit):
tree = sans_fit['tree']
- name = sans_fit['property_name']
- p_exp = sans_fit['experiment_property']
- fits = bayes.fit_to_depth(tree, p_exp, name, max_depth=7)
- chi2 = np.asarray([fit.chisqr for fit in fits])
+ exp = sans_fit['experiment_property']
+ exp_pd = properties.PropertyDict([exp])
+ mods = bayes.create_to_depth(tree, max_depth=7)
+ params_list = [m.make_params() for m in mods]
+ fits = bayes.fit_models(mods, exp_pd, weights=1/exp.e,
+ params_list=params_list)
+ # Since only one probability assert that there is no probability
+ assert all([not p.endswith('_p') for p in fits[0].params])
+ chi2 = np.array([fit.chisqr for fit in fits])
assert np.argmax(chi2 < 1e-10) == sans_fit['depth']
+def test_fit(sans_fit):
+ tree = sans_fit['tree']
+ exp = sans_fit['experiment_property']
+ scalar = properties.ScalarProperty(name='foo', x=1, y=1, e=1)
+ values = [properties.ScalarProperty(name='foo', x=1, y=i, e=1)
+ for i in range(len(tree.leafs))]
+ properties.propagator_size_weighted_sum(values, tree)
+ exp_pd = properties.PropertyDict([exp, scalar])
+ ctd = bayes.create_to_depth
+ models = ctd(tree, max_depth=7)
+ params_list = [m.make_params() for m in models]
+ weights = 1/np.concatenate([exp.e, scalar.feature_weights])
+ fits = bayes.fit_models(models, exp_pd, weights=weights,
+ params_list=params_list)
+ chi2 = [fit.chisqr for fit in fits]
+ assert chi2[sans_fit['depth']] <= 1e-10
+ # Test filtering by name
+ cad = bayes.create_at_depth
+ exp_pd2 = properties.PropertyDict([exp])
+ models2 = cad(tree, depth=sans_fit['depth'], names=exp_pd2.keys())
+ params_list2 = models2.make_params()
+ assert all(f'{k}_slope' in params_list2 for k in exp_pd2.keys())
+ # Test filtering by property type
+ models3 = cad(tree, depth=sans_fit['depth'],
+ property_type=properties.ProfileProperty)
+ params_list3 = models3.make_params()
+ assert all(f'{p.name}_slope' in params_list3 for p in exp_pd2.values()
+ if isinstance(p, properties.ProfileProperty))
+ # Test filtering by filtering function
+ models4 = cad(tree, depth=sans_fit['depth'],
+ to_keep_filter=lambda p: p.name == 'foo')
+ params_list4 = models4.make_params()
+ assert 'foo_slope' in params_list4
+
+
+def test_fit_bad_input(sans_fit):
+ tree = sans_fit['tree']
+ exp = sans_fit['experiment_property']
+ name = sans_fit['property_name']
+ scalar = properties.ScalarProperty(name='foo', x=1, y=1, e=1)
+ # Different x value to prompt error
+ values = [properties.ScalarProperty(name='foo', x=10, y=i, e=1)
+ for i in range(len(tree.leafs))]
+ properties.propagator_size_weighted_sum(values, tree)
+ exp_pd = properties.PropertyDict([exp, scalar])
+ ctd = bayes.create_to_depth
+ with pytest.raises(ValueError):
+ models = ctd(tree, max_depth=7, use_tabulated=True)
+ exp_shift = exp.interpolate(exp.x + 2)
+ exp_pd_shifted = properties.PropertyDict([exp_shift, scalar])
+ bayes.fit_models(models, exp_pd_shifted)
+ with pytest.raises(ValueError):
+ models = ctd(tree, max_depth=7)
+ bayes.fit_models(models, exp_pd)
+ with pytest.raises(ValueError):
+ left_child = tree.root.get_left()
+ left_child.property_group = left_child.property_group.subset([name])
+ mods = ctd(tree, max_depth=7)
+ bayes.fit_models(mods, exp_pd)
+
+
if __name__ == '__main__':
pytest.main()
diff --git a/tests/test_cnextend.py b/tests/test_cnextend.py
index 030ae20..05e7b45 100644
--- a/tests/test_cnextend.py
+++ b/tests/test_cnextend.py
@@ -27,9 +27,9 @@ def test_property_group_features(self):
n[prop.name] = prop
prop2 = ScalarProperty(name='some_prop2', y=2)
n[prop2.name] = prop2
- fv = n.property_group.feature_vector()
+ fv = n.property_group.feature_vector
assert_array_equal(fv, np.array([4, 2]))
- ws = n.property_group.feature_weights()
+ ws = n.property_group.feature_weights
assert_array_equal(ws, np.array([1, 1]))
def test_leafs(self, benchmark):
diff --git a/tests/test_properties.py b/tests/test_properties.py
index 0b710ba..7d6a452 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -3,13 +3,91 @@
import pytest
import tempfile
import shutil
+import scipy
from idpflex import properties as ps
from idpflex.properties import SecondaryStructureProperty as SSP
+from numpy.testing import assert_array_equal, assert_almost_equal
+
+
+class TestPropertyDict(object):
+ def test_mimic_dict(self):
+ props = {'profile': ps.ProfileProperty(name='profile',
+ profile=np.arange(10),
+ qvalues=np.arange(10)*5,
+ errors=np.arange(10)*.01),
+ 'scalar': ps.ScalarProperty(name='scalar', x=0, y=1, e=2)}
+ scalar2 = ps.ScalarProperty(name='scalar2', x=1, y=2, e=3)
+ propdict = ps.PropertyDict(properties=props.values())
+ assert [k for k in propdict] == [k for k in props]
+ assert propdict['profile'] == props['profile']
+ propdict['scalar2'] = scalar2
+ assert propdict['scalar2'] == scalar2
+ propdict = propdict.subset(names=props.keys())
+ assert len(propdict) == len(props)
+ assert propdict.get('not_real', default=5) == 5
+ assert list(propdict.keys()) == list(props.keys())
+ assert list(propdict.values()) == list(props.values())
+ assert list(propdict.items()) == list(props.items())
+ propdict2 = propdict.subset(names=list(props.keys())[0])
+ assert len(propdict2) == 1
+
+ def test_subset(self):
+ props = {'profile': ps.ProfileProperty(name='profile',
+ profile=np.arange(10),
+ qvalues=np.arange(10)*5,
+ errors=np.arange(10)*.01),
+ 'scalar': ps.ScalarProperty(name='scalar', x=0, y=1, e=2),
+ 'scalar2': ps.ScalarProperty(name='scalar2', x=1, y=2, e=3),
+ }
+ propdict = ps.PropertyDict(properties=props.values())
+ propdict = propdict.subset(names=props.keys())
+ assert len(propdict) == len(props)
+ propdict2 = propdict.subset(names=props.keys(),
+ property_type=(ps.ProfileProperty,
+ ps.ScalarProperty))
+ assert len(propdict2) == len(props)
+ propdict3 = propdict.subset(names=props.keys(),
+ property_type=ps.ProfileProperty)
+ assert len(propdict3) == 1
+ assert 'profile' in propdict3
+ propdict4 = propdict.subset(names=props.keys(),
+ to_keep_filter=lambda prop:
+ all(prop.feature_vector == 2))
+ assert len(propdict4) == 1
+ assert 'scalar2' in propdict4
+ propdict5 = propdict.subset(names=props.keys(),
+ property_type=(ps.ScalarProperty),
+ to_keep_filter=lambda prop:
+ all(prop.feature_vector == 2))
+ assert len(propdict5) == 1
+ assert 'scalar2' in propdict5
+ propdict6 = propdict.subset(names=props.keys(),
+ property_type=ps.ProfileProperty,
+ to_keep_filter=lambda prop:
+ all(prop.feature_vector == 2))
+ assert len(propdict6) == 0
+
+ def test_feature_vector_domain_weights(self):
+ x = np.arange(10)
+ props = {'profile': ps.ProfileProperty(name='profile',
+ profile=x,
+ qvalues=x*5,
+ errors=x*.01 + 0.001),
+ 'scalar': ps.ScalarProperty(name='scalar', x=0, y=1, e=2)}
+ propdict = ps.PropertyDict(properties=props.values())
+ assert_array_equal(propdict.feature_vector,
+ np.concatenate([p.feature_vector
+ for p in props.values()]))
+ assert_array_equal(propdict.feature_domain,
+ np.concatenate([p.feature_domain
+ for p in props.values()]))
+ assert_array_equal(propdict.feature_weights,
+ np.concatenate([p.feature_weights
+ for p in props.values()]))
class TestRegisterDecorateProperties(object):
-
def test_register_as_node_property(self):
class SomeProperty(object):
def __init__(self):
@@ -69,6 +147,15 @@ def test_plot_histogram(self, benchmark):
ax = root_prop.plot(kind='histogram', errors=True, bins=1)
assert ax.patches[0]._height == benchmark['nleafs']
+ def test_feature_vector_domain_and_weights(self):
+ prop = ps.ScalarProperty(name='foo', x=0, y=1, e=2)
+ assert prop.x == 0
+ assert prop.y == 1
+ assert prop.e == 2
+ assert_array_equal(prop.feature_vector, np.array([prop.y]))
+ assert_array_equal(prop.feature_domain, np.array([prop.x]))
+ assert_array_equal(prop.feature_weights, np.array([1]))
+
class TestAsphericity(object):
@@ -244,6 +331,98 @@ def test_instance_decorated_as_node_property(self):
assert np.array_equal(profile_prop.y, 10*v)
assert np.array_equal(profile_prop.e, 0.1*v)
+ def test_feature_vector_domain_and_weights(self):
+ v = np.arange(9)
+ profile_prop = ps.ProfileProperty(name='foo', qvalues=v, profile=10*v,
+ errors=0.1*v + 0.001)
+ assert_array_equal(profile_prop.feature_vector, profile_prop.profile)
+ assert_array_equal(profile_prop.feature_domain, profile_prop.qvalues)
+ ws = profile_prop.profile/profile_prop.errors
+ ws = ws/np.linalg.norm(ws)
+ assert_array_equal(profile_prop.feature_weights, ws)
+ assert_almost_equal(np.linalg.norm(profile_prop.feature_weights), 1)
+ # mimic reading from a crysol/cryson int
+ prof_prop2 = ps.ProfileProperty(name='foo', qvalues=v, profile=10*v,
+ errors=np.zeros(len(v)))
+ ws2 = np.ones(len(prof_prop2.profile))/len(prof_prop2.profile)**.5
+ assert_array_equal(prof_prop2.feature_weights, ws2)
+ assert_almost_equal(np.linalg.norm(prof_prop2.feature_weights), 1)
+
+ def test_interpolation(self):
+ x1 = np.random.rand(10)
+ # Gaurantee values outside of the range to test extrapolation
+ x2 = x1 + abs(np.random.rand(10))
+ y1 = x1**2
+ y2 = scipy.interpolate.interp1d(x1, y1, fill_value='extrapolate')(x2)
+ e = scipy.interpolate.interp1d(x1, .1*y1, fill_value='extrapolate')(x2)
+ prop = ps.ProfileProperty(name='foo', qvalues=x1, profile=y1,
+ errors=0.1*y1)
+ assert_array_equal(y2, prop.interpolator(x2))
+ new_prop = prop.interpolate(x2, inplace=True)
+ assert_array_equal(y2, new_prop.profile)
+ assert_array_equal(e, new_prop.errors)
+ assert_array_equal(x2, new_prop.qvalues)
+ assert new_prop is prop
+ assert prop.interp_kws['fill_value'] == 'extrapolate'
+ assert prop.error_interp_kws['fill_value'] == 'extrapolate'
+ assert prop._interpolator is None
+ assert prop._error_interpolator is None
+ assert new_prop._interpolator is None
+ assert new_prop._error_interpolator is None
+ sans_prop = ps.SansProperty(name='sans_foo', qvalues=x1, profile=y1,
+ errors=0.1*y1, node='SomeNode')
+ # call interpolator before interpolate to make sure sans already has
+ # an interpolator object to check resetting
+ sans_prop.interpolator
+ sans_prop.error_interpolator
+ new_sans_prop = sans_prop.interpolate(x2, inplace=False)
+ assert isinstance(new_sans_prop, ps.SansProperty)
+ assert sans_prop is not new_sans_prop
+ assert sans_prop._interpolator is not None
+ assert sans_prop._error_interpolator is not None
+ assert new_prop._interpolator is None
+ assert new_prop._error_interpolator is None
+ assert new_sans_prop.node == 'SomeNode'
+ assert_array_equal(y2, new_sans_prop.profile)
+ assert_array_equal(e, new_sans_prop.errors)
+ assert_array_equal(x2, new_sans_prop.qvalues)
+
+ def test_filter(self):
+ x1 = np.random.rand(10)
+ # Gaurantee values outside of the range to test extrapolation
+ x1[3] = np.nan
+ y1 = x1**2
+ e1 = 0.1*y1
+ to_drop = ~(np.isfinite(x1) & np.isfinite(y1) & np.isfinite(e1)
+ & (e1 != 0))
+ prop = ps.ProfileProperty(name='foo', qvalues=x1, profile=y1,
+ errors=e1)
+ y2 = y1[~to_drop]
+ x2 = x1[~to_drop]
+ e2 = e1[~to_drop]
+ new_prop = prop.filter(inplace=True)
+ assert_array_equal(y2, new_prop.profile)
+ assert_array_equal(e2, new_prop.errors)
+ assert_array_equal(x2, new_prop.qvalues)
+ assert new_prop is prop
+ sans_prop = ps.SansProperty(name='sans_foo', qvalues=x1, profile=y1,
+ errors=e1, node='SomeNode')
+ # inplace is False
+ to_drop2 = ~(y1 < 0.5)
+ y3 = y1[~to_drop2]
+ x3 = x1[~to_drop2]
+ e3 = e1[~to_drop2]
+ new_sans_prop = sans_prop.filter(to_drop=to_drop2)
+ assert isinstance(new_sans_prop, ps.SansProperty)
+ assert sans_prop is not new_sans_prop
+ assert new_sans_prop.node == 'SomeNode'
+ assert all(new_sans_prop.profile < .5)
+ assert len(new_sans_prop.profile == new_sans_prop.errors)
+ assert len(new_sans_prop.profile == new_sans_prop.qvalues)
+ assert_array_equal(y3, new_sans_prop.profile)
+ assert_array_equal(e3, new_sans_prop.errors)
+ assert_array_equal(x3, new_sans_prop.qvalues)
+
class TestSansProperty(object):