diff --git a/idpflex/bayes.py b/idpflex/bayes.py index 2f1104e..57e2c22 100644 --- a/idpflex/bayes.py +++ b/idpflex/bayes.py @@ -1,171 +1,316 @@ # from qef.models import TabulatedFunctionModel -from lmfit.models import (Model, ConstantModel, index_of) -from scipy.interpolate import interp1d +import warnings +import operator +import numpy as np +from lmfit.models import (Model, ConstantModel) +from lmfit import CompositeModel +from functools import reduce class TabulatedFunctionModel(Model): - r"""A fit model that uses a table of (x, y) values to interpolate + r"""A fit model that uses a table of (x, y) values to interpolate. - Uses :class:`~scipy.interpolate.interp1d` + Uses an individual property's `interpolator` for the interpolation. + Control of the interpolator can be set using the property's + `create_interpolator` method. Fitting parameters: - - integrated intensity ``amplitude`` :math:`A` + - integrated intensity ``slope`` :math:`A` - position of the peak ``center`` :math:`E_0` - - nominal relaxation time ``tau`` :math:`\tau` - - stretching exponent ``beta`` :math:`\beta` Parameters ---------- - xdata : :class:`~numpy:numpy.ndarray` - X-values to construct the interpolator - ydata : :class:`~numpy:numpy.ndarray` - Y-values to construct the interpolator - interpolator_kind : str - Interpolator that :class:`~scipy.interpolate.interp1d` should use - """ + prop : :class:`~idpflex.properties.ScalarProperty` or :class:`~idpflex.properties.ProfileProperty` + Property used to create interpolator and model + """ # noqa: E501 - def __init__(self, xdata, ydata, interpolator_kind='linear', - prefix='', missing=None, name=None, - **kwargs): - kwargs.update({'prefix': prefix, 'missing': missing}) - self._interpolator = interp1d(xdata, ydata, kind=interpolator_kind) + def __init__(self, prop, prefix='', missing=None, name=None, **kwargs): + kwargs.update({'prefix': prefix, 'missing': missing, 'name': name}) - def tabulate(x, amplitude, center): - return amplitude * self._interpolator(x - center) + def tabulate(x, slope, center, intercept, prop=None): + return slope*prop.interpolator(x - center) + intercept - super(TabulatedFunctionModel, self).__init__(tabulate, **kwargs) - self.set_param_hint('amplitude', min=0, value=1) + super().__init__(tabulate, prop=prop, **kwargs) + self.set_param_hint('slope', min=0, value=1) self.set_param_hint('center', value=0) + self.set_param_hint('intercept', value=0) + + +class TabulatedCompositeModel(Model): + r"""A fit model that uses a table of (x, y) values to interpolate. - def guess(self, y, x=None, **kwargs): - r"""Estimate fitting parameters from input data - - Parameters - ---------- - y : :class:`~numpy:numpy.ndarray` - Values to fit to, e.g., SANS or SAXS intensity values - x : :class:`~numpy:numpy.ndarray` - independent variable, e.g., momentum transfer - - Returns - ------- - :class:`~lmfit.parameter.Parameters` - Parameters with estimated initial values. - """ - amplitude = 1.0 - center = 0.0 - if x is not None: - center = x[index_of(y, max(y))] # assumed peak within domain x - amplitude = max(y) - return self.make_params(amplitude=amplitude, center=center) - - -def model_at_node(node, property_name): - r"""Generate fit model as a tabulated function with a scaling parameter, - plus a flat background + Uses an individual property's `interpolator` for the interpolation. + Control of the interpolator can be set using the property's + `create_interpolator` method. + + Fitting parameters: + - integrated intensity ``slope`` :math:`A` + - position of the peak ``center`` :math:`E_0` Parameters ---------- - node : :class:`~idpflex.cnextend.ClusterNodeX` - One node of the hierarchical :class:`~idpflex.cnextend.Tree` - property_name : str - Name of the property to create the model for + prop : :class:`~idpflex.properties.ScalarProperty` or :class:`~idpflex.properties.ProfileProperty` + Property used to create interpolator and model + """ # noqa: E501 - Returns - ------- - :class:`~lmfit.model.CompositeModel` - A model composed of a :class:`~idpflex.bayes.TabulatedFunctionModel` - and a :class:`~lmfit.models.ConstantModel` + def __init__(self, prop, prefix='', missing=None, name=None, **kwargs): + kwargs.update({'prefix': prefix, 'missing': missing, 'name': name}) + + def tabulate(x, slope, center, intercept, prop=None): + if not set(x).issuperset(prop.feature_domain): + raise ValueError('The domain of the experiment does not align ' + 'with the domain of the profile being fitted.' + ' Interpolate before creating the model.') + return slope*prop.interpolator(prop.x - center) + intercept + + super().__init__(tabulate, prop=prop, **kwargs) + self.set_param_hint('slope', min=0, value=1) + self.set_param_hint('center', value=0) + self.set_param_hint('intercept', value=0) + + +class LinearModel(Model): + r"""A fit model that fits :math:`m*prop.y + b = exp`. + + Fitting parameters: + - slope ``slope`` + - intercept ``intercept`` + + Parameters + ---------- + prop : :class:`~idpflex.properties.ScalarProperty` or :class:`~idpflex.properties.ProfileProperty` + Property used to create interpolator and model """ # noqa: E501 - p = node[property_name] - mod = TabulatedFunctionModel(p.x, p.y) + ConstantModel() - mod.set_param_hint('center', vary=False) - return mod + def __init__(self, prop=None, **kwargs): + def line(x, slope, intercept, prop=None): + if not set(x).issuperset(prop.feature_domain): + raise ValueError('The domain of the experiment does not align ' + 'with the domain of the profile being fitted.' + ' Interpolate before creating the model.') + return slope*prop.y + intercept + super().__init__(line, prop=prop, **kwargs) + self.set_param_hint('slope', value=1, min=0) + self.set_param_hint('intercept', value=0) -def model_at_depth(tree, depth, property_name): - r"""Generate a fit model at a particular tree depth + +def create_at_depth(tree, depth, names=None, use_tabulated=False, + **subset_kws): + r"""Create a model at a particular tree depth from the root node. Parameters ---------- tree : :class:`~idpflex.cnextend.Tree` Hierarchical tree - depth: int - depth level, starting from the tree's root (depth=0) - property_name : str - Name of the property to create the model for + depth : int + Fit at this depth + names : list or str, optional + kwarg to pass on when creating subset of property dict + use_tabulated: bool + Decide to use an tabulated (interpolated) model or a linear model with + out interpolation. Useful to "center" data. + subset_kws : additional args for subset filtering, optional + kwargs to pass on when creating subset of property dict example + includes `property_type` Returns ------- - :class:`~lmfit.model.CompositeModel` - A model composed of a :class:`~idpflex.bayes.TabulatedFunctionModel` - for each node plus a :class:`~lmfit.models.ConstantModel` accounting - for a flat background - """ # noqa: E501 - mod = ConstantModel() - for node in tree.nodes_at_depth(depth): - p = node[property_name] - m = TabulatedFunctionModel(p.x, p.y, prefix='n{}_'.format(node.id)) - m.set_param_hint('center', vary=False) - m.set_param_hint('amplitude', value=1.0 / (1 + depth)) - mod += m - return mod - - -def fit_at_depth(tree, experiment, property_name, depth): - r"""Fit at a particular tree depth from the root node - - Fit experiment against the property stored in the nodes. The fit model - is generated by :func:`~idpflex.bayes.model_at_depth` - - Parameters - ---------- - tree : :class:`~idpflex.cnextend.Tree` - Hierarchical tree - experiment : :class:`~idpflex.properties.ProfileProperty` - A property containing the experimental info. - property_name: str - The name of the simulated property to compare against experiment - max_depth : int - Fit at each depth up to (and including) max_depth - - Returns - ------- - :class:`~lmfit.model.ModelResult` - Results of the fit - """ - mod = model_at_depth(tree, depth, property_name) - params = mod.make_params() - return mod.fit(experiment.y, - x=experiment.x, - weights=1.0 / experiment.e, - params=params) - - -def fit_to_depth(tree, experiment, property_name, max_depth=5): - r"""Fit at each tree depth from the root node up to a maximum depth - - Fit experiment against the property stored in the nodes. The fit model - is generated by :func:`~idpflex.bayes.model_at_depth` + :class:`~lmfit.model.ModelResult` + Model for the depth + """ + pgs = [node.property_group.subset(names or node.property_group.keys(), + **subset_kws) + for node in tree.nodes_at_depth(depth)] + return create_models(pgs, use_tabulated=use_tabulated) + + +def create_to_depth(tree, max_depth, names=None, + use_tabulated=False, **subset_kws): + r"""Create models to a particular tree depth from the root node. Parameters ---------- tree : :class:`~idpflex.cnextend.Tree` Hierarchical tree - experiment : :class:`~idpflex.properties.ProfileProperty` - A property containing the experimental info. - property_name: str - The name of the simulated property to compare against experiment max_depth : int Fit at each depth up to (and including) max_depth + names : list or str, optional + kwarg to pass on when creating subset of property dict + use_tabulated: bool + Decide to use an tabulated (interpolated) model or a linear model with + out interpolation. Useful to "center" data. + subset_kws : additional args for subset filtering, optional + kwargs to pass on when creating subset of property dict example + includes `property_type` Returns ------- - :py:class:`list` - A list of :class:`~lmfit.model.ModelResult` items containing the - fit at each level of the tree up to and including `max_depth` + list of :class:`~lmfit.model.ModelResult` + Models for each depth """ + return [create_at_depth(tree, i, names=names, use_tabulated=use_tabulated, + **subset_kws) + for i in range(max_depth + 1)] + + +def fit_model(model, experiment, params=None, weights=None, method='leastsq'): + """Apply a fit to a particular model. + + Parameters + ---------- + model: :class:`~lmfit.model.ModelResult` + Model to be fit + experiment: :class:`~idpflex.properties.PropertyDict` + Set of experimental properties to be fit. + params: Parameters + Parameters of the model to be used. Can default to model.make_params() + weights: numpy.ndarray, optional + Array of weights to be used for fitting + method: str, optional + Choice of which fitting method to use with lmfit. Defaults to 'leastsq' + but can choose methods such as 'differential_evolution' to find global + minimizations for the parameters. + + Returns + ------- + :class:`~lmfit.model.ModelResult` + The fit of the model + """ + if params is None: + params = model.make_params() + result = model.fit(experiment.feature_vector, weights=weights, + x=experiment.feature_domain, params=params, + method=method) + return result + + +def fit_models(models, experiment, params_list=None, weights=None, + method='leastsq'): + """Apply fitting to a list of models.""" + if params_list is None: + params_list = [m.make_params() for m in models] + return [fit_model(model, experiment, params=params, + weights=weights, method=method) + for model, params in zip(models, params_list)] + + +def create_model(property_group, use_tabulated=False): + """Create a composite model from a PropertyDict and a set of models. + + Parameters + ---------- + property_group: :class:`~idpflex.properties.PropertyDict` + The set of properties used to create a composite model. + use_tabulated: bool + Decide to use an tabulated (interpolated) model or a linear model with + out interpolation. Useful to "center" data. + + Returns + ------- + :class:`~lmfit.CompositeModel` + The composite model created by applying the model to the corresponging + property and concatenating the results. + """ # noqa: E501 + # Create new model instances or copy model instances + if use_tabulated: + model_objs = [TabulatedCompositeModel(prop=p) + for p in property_group.values()] + else: + model_objs = [LinearModel(prop=p) for p in property_group.values()] + # Prefix all models with the associated property name + # Set the model's function prop arg to use the property + for i, p in enumerate(property_group.values()): + model_objs[i].opts['prop'] = p + model_objs[i].prefix = p.name + '_' + # Reduce models to a single composite model + return reduce(lambda joined_model, m: + CompositeModel(joined_model, m, lambda l, r: + np.concatenate([np.atleast_1d(l), + np.atleast_1d(r)])), + model_objs) + + +def create_models(property_groups, use_tabulated=False): + """Create a composite model from a list of PropertyDict and a set of models. + + For each structure there will be a probability parameter of the form + struct{i}_p that indicates the relative amount of the structure used to + make the fit. There is an accompanying struct{i}_proportion_c which is + duplicates this information but is not limited to [0, 1]. + + For each property, there will be internally used parameters of the form + struct{i}_{propertyname}_{paramname}. + These are reported as {propertyname}_{paramname} (which may be rescaled). + + NOTE: + To adjust parameter bounds, values, etc. after model creation, be sure to + adjust the parameters prefixed with struct{i} and not just + {propertyname}_{paramname}. + + Parameters + ---------- + property_groups: list of :class:`~idpflex.properties.PropertyDict` + The set of properties used to create a composite model. + use_tabulated: bool + Decide to use an tabulated (interpolated) model or a linear model with + out interpolation. Useful to "center" data. + + Returns + ------- + :class:`~lmfit.CompositeModel` + The composite model created by applying the model to the corresponging + property and concatenating the results. + """ + try: + property_groups = list(property_groups) + except TypeError: + property_groups = [property_groups] + + if len(property_groups) == 1: + return create_model(property_groups[0], use_tabulated=use_tabulated) + + def create_submodel(i, property_group): + submodel = create_model(property_group, use_tabulated=use_tabulated) + submodel = ConstantModel(prefix='proportion_')*submodel + for component in submodel.components: + component.prefix = f'struct{i}_' + component.prefix + # manually changing prefix eliminates default param hints + for pname in property_groups[0]: + submodel.set_param_hint(f'struct{i}_{pname}_slope', value=1, min=0) + submodel.set_param_hint(f'struct{i}_{pname}_intercept', value=0) + return submodel + + model = reduce(operator.add, (create_submodel(i, pg) + for i, pg in enumerate(property_groups))) + + if len(property_groups[0]) == 1: + warnings.warn('Not enough properties for model to distinguish between' + ' slope and proportion of structure. Setting internal' + ' slope to not vary during fitting.') + for pname in filter(lambda p: 'slope' in p, + model.param_names): + model.set_param_hint(pname, vary=False, value=1, min=0) + + # for each structure calculate a probability using the propotions + proportion_names = [p for p in model.param_names + if p.endswith('proportion_c')] + total_eq = '(' + '+'.join(proportion_names) + ')' + # model.set_param_hint('total', expr=total_eq) + for p in proportion_names: + model.set_param_hint(p, min=0, value=1) + struct_prefix = p.partition('_')[0] + model.set_param_hint(f'{struct_prefix}_p', expr=f'{p}/{total_eq}') + + # For each property, ensure structures share slope/constant values + for param in filter( + lambda param: (any(pname in param for pname in property_groups[0]) + and not param.startswith('struct0_')), + model.param_names): + pbase = param.partition('_')[-1] # param name without struct prefix + model.set_param_hint(param, expr=f'struct0_{pbase}') + if 'center' in pbase: + model.set_param_hint(pbase, expr=f'struct0_{pbase}') + else: + model.set_param_hint(pbase, expr=f'struct0_{pbase}/{total_eq}') - # Fit each level of the tree - return [fit_at_depth(tree, experiment, property_name, depth) for - depth in range(max_depth + 1)] + return model diff --git a/idpflex/cnextend.py b/idpflex/cnextend.py index 9880536..1f1f86a 100644 --- a/idpflex/cnextend.py +++ b/idpflex/cnextend.py @@ -17,7 +17,7 @@ class ClusterNodeX(hierarchy.ClusterNode): def __init__(self, *args, **kwargs): # Using of *super* is unfeasible because *ClusterNode* does not # inherit from *object*. - # super(ClusterNodeX, self).__init__(*args, **kwargs) + # super().__init__(*args, **kwargs) hierarchy.ClusterNode.__init__(self, *args, **kwargs) self.parent = None # parent node self._tree = None diff --git a/idpflex/properties.py b/idpflex/properties.py index 129c098..b5a449d 100644 --- a/idpflex/properties.py +++ b/idpflex/properties.py @@ -1,10 +1,13 @@ import os +import warnings import subprocess import tempfile import fnmatch import functools import numpy as np +import scipy import numbers +import copy from collections import OrderedDict import matplotlib as mpl from matplotlib import pyplot as plt @@ -17,7 +20,8 @@ class PropertyDict(object): - r""" + r"""A container of properties. + A container of properties mimicking some of the behavior of a standard python dictionary, plus methods representing features of the properties when taken as a group. @@ -34,6 +38,13 @@ def __init__(self, properties=None): self._properties.update({p.name: p for p in properties}) def __iter__(self): + r""" + Mimic a dictionary. + + Returns + ------- + list of keys of `_properties` + """ return iter(self._properties.keys()) def __getitem__(self, name): @@ -49,7 +60,7 @@ def __getitem__(self, name): ------- property object, or `None` if no property is found with *name* """ - self._properties.get(name, None) + return self._properties.get(name, None) def __setitem__(self, name, value): r""" @@ -63,9 +74,17 @@ def __setitem__(self, name, value): """ self._properties[name] = value + def __len__(self): + r"""Mimic a dict length. + + Returns + ------- + The number of properties in the dict. + """ + return len(self._properties) + def get(self, name, default=None): - r""" - Mimic get method of a dictionary + r"""Mimic get method of a dictionary. Parameters ---------- @@ -83,7 +102,7 @@ def get(self, name, default=None): def keys(self): r""" - Mimic keys method of a dictionary + Mimic keys method of a dictionary. Returns ------- @@ -93,7 +112,7 @@ def keys(self): def items(self): r""" - Mimic items method of a dictionary + Mimic items method of a dictionary. Returns ------- @@ -103,7 +122,7 @@ def items(self): def values(self): r""" - Mimic values method of a dictionary + Mimic values method of a dictionary. Returns ------- @@ -111,57 +130,95 @@ def values(self): """ return self._properties.values() - def feature_vector(self, names=None): + @property + def feature_vector(self): r""" - Feature vector for the specified sequence of names. + Feature vector for the property dict. The feature vector is a concatenation of the feature vectors for each of the properties and the concatenation follows the order of - names. + insertion. - If names is None, return all features in the property dict in the - order of insertion. + Returns + ------- + numpy.ndarray + """ + return np.concatenate([prop.feature_vector + for prop in self.values()]) - Parameters - ---------- - names: list - List of property names + @property + def feature_domain(self): + r""" + Feature domain for the property dict. + + The feature domain is a concatenation of the feature domains for + each of the properties and the concatenation follows the order of + insertion. Returns ------- numpy.ndarray """ - if names is None: - return np.concatenate([prop.feature_vector - for prop in self.values()]) - return np.concatenate([self._properties[n].feature_vector - for n in names]) + return np.concatenate([prop.feature_domain + for prop in self.values()]) - def feature_weights(self, names=None): + @property + def feature_weights(self): r""" - Feature vector weights for the specified sequence of names. + Feature vector weights for the property group. The feature vector weights is a concatenation of the feature vectors weights for each of the properties and the concatenation follows the - order of names. - - If names is None, return all features in the property dict in the order of insertion. + Returns + ------- + numpy.ndarray + """ + return np.concatenate([prop.feature_weights + for prop in self.values()]) + + def subset(self, names, property_type=None, to_keep_filter=lambda p: True): + r""" + Property dict for the specified sequence of names. + + The subset is a PropertyDict of the properties with the same order as + names. The properties must satisfy the provided conditions, that is + they must be one of the types in property_type tuple (or single type + instance) and when passed to `to_keep_filter` must return trueish. + Parameters ---------- - names: list - List of property names + names: list or str + List of property names. If single string treat as if a list of + one item. + property_type: tuple or property type, optional + Tuple of property types to keep. Default of None allows any type. + Anded with to_keep_filter. + to_keep_filter: callable, optional + A callable that should take a property object and return True if + the property should be included in the subset. Default allows all + properties. + Anded with property_types. Returns ------- - numpy.ndarray + PropertyDict """ - if names is None: - return np.concatenate([prop.feature_weights - for prop in self.values()]) - return np.concatenate([self._properties[n].feature_weights - for n in names]) + # Change to list to apply the constaints of property type and filter + # even if only a single name + if isinstance(names, str): + names = [names] + + # Define an inner function for the purpose of informative stack trace + # and to add handling of property types to the filter + def _to_keep_filter(prop): + if property_type is None: + return to_keep_filter(prop) + return to_keep_filter(prop) and isinstance(prop, property_type) + + return PropertyDict([self[name] for name in names + if _to_keep_filter(self[name])]) def register_as_node_property(cls, nxye): @@ -198,7 +255,7 @@ def register_as_node_property(cls, nxye): ('errors', 'intensity errors')) """ def property_item(attr_name, docstring): - r"""Factory of the node property items *name*, *x*, *y*, and *e* + r"""Factory of the node property items *name*, *x*, *y*, and *e*. Parameters ---------- @@ -231,7 +288,7 @@ def setter(instance, value): def decorate_as_node_property(nxye): - r"""Decorator that endows a class with the node property protocol + r"""Decorator that endows a class with the node property protocol. For details, see :func:`~idpflex.properties.register_as_node_property` @@ -281,14 +338,21 @@ def set_scalar(self, y): @property def feature_vector(self): + """Return the y value as the feature vector of the property.""" return np.array([self.y, ]) + @property + def feature_domain(self): + """Return the x value as the input domain of the property.""" + return np.array([self.x]) + @property def feature_weights(self): + """Return 1 as the weight of the property components.""" return np.array([1]) def histogram(self, bins=10, errors=False, **kwargs): - r"""Histogram of values for the leaf nodes + r"""Histogram of values for the leaf nodes. Parameters ---------- @@ -350,7 +414,7 @@ class AsphericityMixin(object): from the gyration radius tensor""" def from_universe(self, a_universe, selection=None, index=0): - r"""Calculate asphericity from an MDAnalysis universe instance + r"""Calculate asphericity from an MDAnalysis universe instance. :math:`\frac{(L_1-L_2)^2+(L_1-L_3)^2+L_2-L_3)^2}{2(L_1+L_2+L_3)^2}` @@ -386,7 +450,7 @@ def from_universe(self, a_universe, selection=None, index=0): return self def from_pdb(self, filename, selection=None): - r"""Calculate asphericity from a PDB file + r"""Calculate asphericity from a PDB file. :math:`\frac{(L_1-L_2)^2+(L_1-L_3)^2+L_2-L_3)^2}{2(L_1+L_2+L_3)^2}` @@ -438,7 +502,7 @@ def __init__(self, *args, **kwargs): @property def asphericity(self): - r"""Property to read and set the asphericity""" + r"""Property to read and set the asphericity.""" return self.y @asphericity.setter @@ -451,7 +515,7 @@ class SaSaMixin(object): solvent accessible surface area""" def from_mdtraj(self, a_traj, probe_radius=1.4, **kwargs): - r"""Calculate solvent accessible surface for frames in a trajectory + r"""Calculate solvent accessible surface for frames in a trajectory. SASA units are Angstroms squared @@ -476,7 +540,7 @@ def from_mdtraj(self, a_traj, probe_radius=1.4, **kwargs): return self def from_pdb(self, filename, selection=None, probe_radius=1.4, **kwargs): - r"""Calculate solvent accessible surface area (SASA) from a PDB file + r"""Calculate solvent accessible surface area (SASA) from a PDB file. If the PBD contains more than one structure, calculation is performed only for the first one. @@ -561,7 +625,7 @@ def __init__(self, *args, **kwargs): @property def sasa(self): - r"""Property to read and write the SASA value""" + r"""Property to read and write the SASA value.""" return self.y @sasa.setter @@ -574,7 +638,7 @@ class EndToEndMixin(object): the end-to-end distance for a protein""" def from_universe(self, a_universe, selection='name CA', index=0): - r"""Calculate radius of gyration from an MDAnalysis Universe instance + r"""Calculate radius of gyration from an MDAnalysis Universe instance. Does not apply periodic boundary conditions @@ -601,7 +665,7 @@ def from_universe(self, a_universe, selection='name CA', index=0): return self def from_pdb(self, filename, selection='name CA'): - r"""Calculate end-to-end distance from a PDB file + r"""Calculate end-to-end distance from a PDB file. Does not apply periodic boundary conditions @@ -625,7 +689,7 @@ def from_pdb(self, filename, selection='name CA'): class EndToEnd(ScalarProperty, EndToEndMixin): - r"""Implementation of a node property to store the end-to-end distance + r"""Implementation of a node property to store the end-to-end distance. See :class:`~idpflex.properties.ScalarProperty` for initialization """ @@ -639,7 +703,7 @@ def __init__(self, *args, **kwargs): @property def end_to_end(self): - r"""Property to read and set the end-to-end distance""" + r"""Property to read and set the end-to-end distance.""" return self.y @end_to_end.setter @@ -653,7 +717,7 @@ class RadiusOfGyrationMixin(object): """ def from_universe(self, a_universe, selection=None, index=0): - r"""Calculate radius of gyration from an MDAnalysis Universe instance + r"""Calculate radius of gyration from an MDAnalysis Universe instance. Parameters ---------- @@ -678,7 +742,7 @@ def from_universe(self, a_universe, selection=None, index=0): return self def from_pdb(self, filename, selection=None): - r"""Calculate Rg from a PDB file + r"""Calculate Rg from a PDB file. Parameters ---------- @@ -713,7 +777,7 @@ def __init__(self, *args, **kwargs): @property def rg(self): - r"""Property to read and write the radius of gyration value""" + r"""Property to read and write the radius of gyration value.""" return self.y @rg.setter @@ -726,8 +790,7 @@ def rg(self, value): ('cmap', '(:class:`~numpy:numpy.ndarray`) contact map between residues'), # noqa: E501 ('errors', '(:class:`~numpy:numpy.ndarray`) undeterminacies in the contact map'))) # noqa: E501 class ResidueContactMap(object): - r"""Contact map between residues of the conformation using different - definitions of contact. + r"""Contact map between residues of the conformation using different definitions of contact. Parameters ---------- @@ -757,7 +820,7 @@ def __init__(self, name=None, selection=None, cmap=None, errors=None, self.cutoff = cutoff def from_universe(self, a_universe, cutoff, selection=None, index=0): - r"""Calculate residue contact map from an MDAnalysis Universe instance + r"""Calculate residue contact map from an MDAnalysis Universe instance. Parameters ---------- @@ -804,7 +867,7 @@ def from_universe(self, a_universe, cutoff, selection=None, index=0): return self def from_pdb(self, filename, cutoff, selection=None): - r"""Calculate residue contact map from a PDB file + r"""Calculate residue contact map from a PDB file. Parameters ---------- @@ -826,11 +889,11 @@ def from_pdb(self, filename, cutoff, selection=None): return self.from_universe(mda.Universe(filename), cutoff, selection) def plot(self): - r"""Plot the residue contact map of the node""" + r"""Plot the residue contact map of the node.""" resids = [str(i) for i in list(set(self.selection.resids))] def format_fn(tick_val, tick_pos): - r"""Translates matrix index to residue number""" + r"""Translate matrix index to residue number.""" if int(tick_val) < len(resids): return resids[int(tick_val)] else: @@ -858,7 +921,7 @@ def format_fn(tick_val, tick_pos): ('profile', '(:class:`~numpy:numpy.ndarray`) secondary structure assignment'), # noqa: E501 ('errors', '(:class:`~numpy:numpy.ndarray`) assignment undeterminacy'))) # noqa: E501 class SecondaryStructureProperty(object): - r"""Node property for secondary structure determined by DSSP + r"""Node property for secondary structure determined by DSSP. Every residue is assigned a vector of length 8. Indexes corresponds to different secondary structure assignment: @@ -894,6 +957,7 @@ class SecondaryStructureProperty(object): N x 8 matrix denoting undeterminacies for each type of assigned secondary residue in every residue """ # noqa: E501 + #: Description of single-letter codes for secondary structure elements = OrderedDict([('H', 'Alpha helix'), ('B', 'Isolated beta-bridge'), @@ -941,7 +1005,7 @@ def __init__(self, name=None, aa=None, profile=None, errors=None): self.node = None def from_dssp_sequence(self, codes): - r"""Load secondary structure profile from a single string of DSSP codes + r"""Load secondary structure profile from a string of DSSP codes. Attributes *aa* and *errors* are not modified, only **profile**. @@ -964,7 +1028,7 @@ def from_dssp_sequence(self, codes): return self def from_dssp(self, file_name): - r"""Load secondary structure profile from a `dssp file `_ + r"""Load secondary structure profile from a `dssp file `_. Parameters ---------- @@ -991,7 +1055,7 @@ def from_dssp(self, file_name): return self def from_dssp_pdb(self, file_name, command='mkdssp', silent=True): - r"""Calculate secondary structure with DSSP + r"""Calculate secondary structure with DSSP. Parameters ---------- @@ -1051,8 +1115,7 @@ def collapsed(self): return self.profile.argmax(axis=1) def disparity(self, other): - r"""Secondary Structure disparity of other profile to self, akin to - :math:`\chi^2` + r"""Secondary Structure disparity of other profile to self, akin to :math:`\chi^2`. :math:`\frac{1}{N(n-1)} \sum_{i=1}^{N}\sum_{j=1}^{n} (\frac{p_{ij}-q_ {ij}}{e})^2` @@ -1080,7 +1143,7 @@ def disparity(self, other): return np.sum(np.square(dp/e)) / (n * (self.n_codes - 1)) def plot(self, kind='percents'): - r"""Plot the secondary structure of the node holding the property + r"""Plot the secondary structure of the node holding the property. Parameters ---------- @@ -1171,47 +1234,157 @@ class ProfileProperty(object): Intensity values errors : :class:`~numpy:numpy.ndarray` Errors in the intensity values + node : :class:`~idpflex.cnextend.ClusterNodeX` + Node to which this property belongs + interp_kws : dict + Keyword args to pass to scipy interp1d for interpolating profile data. + Defaults to using extrapolation. + error_interp_kws : dict + Keyword args to pass to scipy interp1d for interpolating error data. + Defaults to using extrapolation. """ default_name = 'profile' - def __init__(self, name=None, qvalues=None, profile=None, errors=None): + def __init__(self, name=None, qvalues=None, profile=None, errors=None, + node=None, interp_kws=None, error_interp_kws=None): self.name = name self.qvalues = qvalues self.profile = profile self.errors = errors - self.node = None + self.node = node + self._interpolator = None + self.interp_kws = interp_kws or {'fill_value': 'extrapolate'} + self._error_interpolator = None + self.error_interp_kws = error_interp_kws or {'fill_value': + 'extrapolate'} @property def feature_vector(self): r""" - Each `qvalue` is interpreted as an independent feature, - and the related value in `profile` is a particular - "measured" value of that feature. + Each `qvalue` is interpreted as an independent feature, and the related value in `profile` is a particular "measured" value of that feature. Returns ------- numpy.ndarray - """ + """ # noqa: E501 return self.profile + @property + def feature_domain(self): + r""" + Return `qvalue` corresponding to each of the values in the feature vector. + + Returns + ------- + numpy.ndarray + """ # noqa: E501 + return self.qvalues + @property def feature_weights(self): r""" - Weights to be used when calculating the square of the euclidean - distance between two feature vectors + Weights to be used when calculating the square of the euclidean distance between two feature vectors. Returns ------- numpy.ndarray + """ # noqa: E501 + if self.e is None or np.allclose(np.zeros(len(self.y)), self.e): + return np.ones(len(self.profile)) / np.sqrt(len(self.profile)) + ws = self.profile / self.errors + return ws / np.linalg.norm(ws) + + @property + def interpolator(self): + r""" + Return the property's interpolator for the profile data. + + If not previously created, will create an interpolator using + `self.interp_kws`. These default to a linear interpolator with + extrapolation. + + Returns + ------- + function """ - return np.ones(len(self.profile)) / np.sqrt(len(self.profile)) + if self._interpolator is None: + warnings.warn('Property did not have interpolator.' + ' Creating interpolator using interp_kws.') + self._interpolator = scipy.interpolate.interp1d( + self.qvalues, self.profile, **self.interp_kws) + return self._interpolator + + @property + def error_interpolator(self): + r""" + Return the property's interpolator for the error data. + + If not previously created, will create an interpolator using + `self.error_interp_kws`. These default to a linear interpolator + with extrapolation. + + Returns + ------- + function + """ + if self._error_interpolator is None: + warnings.warn('Property did not have error interpolator. Creating' + ' interpolator using error_interp_kws.') + self._error_interpolator = scipy.interpolate.interp1d( + self.qvalues, self.errors, **self.error_interp_kws) + return self._error_interpolator + + def interpolate(self, qvalues, inplace=False): + r"""Return interpolated data in new object or in place.""" + # New instance of a property potentially using the subclass' init + result = self if inplace else copy.deepcopy(self) + result.profile = result.interpolator(qvalues) + result.errors = result.error_interpolator(qvalues) + result.qvalues = qvalues.copy() + # When modifying values, should likely reset the interpolator + # Will only reset self if inplace + warnings.warn('Reseting interpolators due to modifying data.') + result._interpolator = None + result._error_interpolator = None + return result + + def filter(self, to_drop=None, inplace=False): + """Filter data using the `to_drop`, otherwise filter out 'bad' data. + + Will remove the portion of the profile, qvalues, and errors that align + with true values in the `to_drop`. By default, the `to_drop` is true + when any of profile, qvalues, and errors are infinite or the errors + are zero. + + Parameters + ---------- + to_drop: numpy.ndarray of type bool + The to_drop to apply to the components of the profile. + inplace: bool + If inplace, modify profile data instead of creating new object. + + Returns + ------- + A new property with the chosen subset of data. + """ + if to_drop is None: + to_drop = ~(np.isfinite(self.profile) & np.isfinite(self.qvalues) + & np.isfinite(self.errors) & (self.errors != 0)) + result = self if inplace else copy.deepcopy(self) + result.profile = result.profile[~to_drop] + result.errors = result.errors[~to_drop] + result.qvalues = result.qvalues[~to_drop] + # When modifying values, should likely reset the interpolator + # Will only reset self if inplace + warnings.warn('Reseting interpolators due to modifying data.') + result._interpolator = None + result._error_interpolator = None + return result class SansLoaderMixin(object): - r"""Mixin class providing a set of methods to load SANS data into a - profile property - """ + r"""Mixin class providing a set of methods to load SANS data into a profile property.""" # noqa: E501 def from_sassena(self, handle, profile_key='fqt', index=0): """Load SANS profile from sassena output. @@ -1244,7 +1417,7 @@ def from_sassena(self, handle, profile_key='fqt', index=0): return self def from_cryson_int(self, file_name): - r"""Load profile from a `cryson \*.int `_ file + r"""Load profile from a `cryson \*.int `_ file. Parameters ---------- @@ -1282,7 +1455,7 @@ def from_cryson_fit(self, file_name): def from_cryson_pdb(self, file_name, command='cryson', args='-lm 20 -sm 0.6 -ns 500 -un 1 -eh -dro 0.075', silent=True): - r"""Calculate profile with cryson from a PDB file + r"""Calculate profile with cryson from a PDB file. Parameters ---------- @@ -1365,8 +1538,7 @@ def to_ascii(self, file_name): class SansProperty(ProfileProperty, SansLoaderMixin): - r"""Implementation of a node property for SANS data - """ + r"""Implementation of a node property for SANS data.""" default_name = 'sans' @@ -1377,12 +1549,10 @@ def __init__(self, *args, **kwargs): class SaxsLoaderMixin(object): - r"""Mixin class providing a set of methods to load X-ray data into a - profile property - """ + r"""Mixin class providing a set of methods to load X-ray data into a profile property.""" # noqa: E501 def from_crysol_int(self, file_name): - r"""Load profile from a `crysol \*.int `_ file + r"""Load profile from a `crysol \*.int `_ file. Parameters ---------- @@ -1420,7 +1590,7 @@ def from_crysol_fit(self, file_name): def from_crysol_pdb(self, file_name, command='crysol', args='-lm 20 -sm 0.6 -ns 500 -un 1 -eh -dro 0.075', silent=True): - r"""Calculate profile with crysol from a PDB file + r"""Calculate profile with crysol from a PDB file. Parameters ---------- @@ -1503,8 +1673,7 @@ def to_ascii(self, file_name): class SaxsProperty(ProfileProperty, SaxsLoaderMixin): - r"""Implementation of a node property for SAXS data - """ + r"""Implementation of a node property for SAXS data.""" default_name = 'saxs' @@ -1516,8 +1685,9 @@ def __init__(self, *args, **kwargs): def propagator_weighted_sum(values, tree, weights=lambda left_node, right_node: (1.0, 1.0)): - r"""Calculate the property of a node as the sum of its two siblings' - property values. Propagation applies only to non-leaf nodes. + r"""Calculate the property of a node as the sum of its two siblings' property values. + + Propagation applies only to non-leaf nodes. Parameters ---------- @@ -1529,7 +1699,7 @@ def propagator_weighted_sum(values, tree, Callable of two arguments (left-node and right-node) returning a tuple of left and right weights. Default callable returns (1.0, 1.0) always. - """ + """ # noqa: E501 # Insert a property for each leaf if len(values) != tree.nleafs: msg = "len(values)={} but there are {} leafs".format(len(values), @@ -1556,7 +1726,7 @@ def propagator_weighted_sum(values, tree, def weights_by_size(left_node, right_node): - r"""Calculate the relative size of two nodes + r"""Calculate the relative size of two nodes. Parameters ---------- diff --git a/tests/conftest.py b/tests/conftest.py index 3f42d90..2e53590 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,8 @@ this_module_path = sys.modules[__name__].__file__ data_dir = os.path.join(os.path.dirname(this_module_path), 'data') +random_state = np.random.RandomState(23) + @idprop.decorate_as_node_property((('name', 'name of the property'), ('domain_bar', 'property domain'), @@ -56,7 +58,7 @@ def benchmark(): n_leafs = 22379 # Instantiate scalar properties for the leaf nodes, then propagate # up the tree - sc = np.random.normal(loc=100.0, size=n_leafs) + sc = random_state.normal(loc=100.0, size=n_leafs) sc_p = [idprop.ScalarProperty(name='sc', y=s) for s in sc] idprop.propagator_size_weighted_sum(sc_p, t) return {'z': z, @@ -139,7 +141,7 @@ def sans_benchmark(request): # Create a node tree. # m is a 1D compressed matrix of distances between leafs - m = np.random.random(int(n_leafs * (n_leafs - 1) / 2)) + m = random_state.random_sample(int(n_leafs * (n_leafs - 1) / 2)) z = linkage(m) tree = cnextend.Tree(z) @@ -201,12 +203,12 @@ def sans_fit(sans_benchmark): coefficients = dict() nodes = tree.nodes_at_depth(depth) n_nodes = 1 + depth # depth=0 corresponds to the root node (nclusters=1) - q_values = (tree.root[name].x[:-1] + tree.root[name].x[1:]) / 2 # midpoint + q_values = tree.root[name].x profile = np.zeros(len(q_values)) for i in range(n_nodes): coefficients[nodes[i].id] = coeffs[i] p = nodes[i][name] - profile += coeffs[i] * (p.y[:-1] + p.y[1:]) / 2 + profile += coeffs[i] * p.y background = 0.05 * max(profile) # flat background profile += background experiment_property = idprop.SansProperty(name=name, diff --git a/tests/test_bayes.py b/tests/test_bayes.py index 066ce6a..5565665 100644 --- a/tests/test_bayes.py +++ b/tests/test_bayes.py @@ -1,51 +1,165 @@ import numpy as np -from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_almost_equal, assert_array_equal import pytest +import warnings from idpflex import bayes +from idpflex import properties -def test_model_at_node(sans_fit): +def test_TabulatedCompositeModel_interpolation(sans_fit): tree = sans_fit['tree'] - mod = bayes.model_at_node(tree.root, sans_fit['property_name']) + tree.root.property_group = tree.root.property_group.subset( + tree.root.property_group.keys(), + property_type=properties.ProfileProperty) + mod = bayes.create_at_depth(tree, 0, use_tabulated=True) prop = tree.root[sans_fit['property_name']] - q_values = (prop.x[:-1] + prop.x[1:]) / 2 # midpoints params = mod.make_params() - y = mod.eval(params, x=q_values) - assert_array_almost_equal(y, (prop.y[:-1] + prop.y[1:]) / 2, decimal=1) + # test interpolation + model = bayes.TabulatedFunctionModel(prop=prop) + qvals = prop.qvalues[:-1] + np.diff(prop.qvalues)/2 + assert_array_almost_equal(model.eval(model.make_params(), x=qvals), + prop.interpolator(qvals), decimal=1) + # test centering + params[f'{prop.name}_center'].set(vary=True, value=2) + prop_shifted = prop.interpolate(prop.qvalues + 2) + fit = mod.fit(prop_shifted.y, x=prop.qvalues, params=params) + assert_array_almost_equal(fit.best_fit, prop_shifted.y, decimal=1) + assert abs(fit.params[f'{prop.name}_center'] - 2) <= 0.2 -def test_model_at_depth(sans_fit): +def test_fit_at_depth(sans_fit): tree = sans_fit['tree'] - name = sans_fit['property_name'] - depth = sans_fit['depth'] - # Evaluate the model - mod = bayes.model_at_depth(tree, depth, name) + exp = sans_fit['experiment_property'] + exp_pd = properties.PropertyDict([exp]) + mod = bayes.create_at_depth(tree, sans_fit['depth']) params = mod.make_params() - for id, coeff in sans_fit['coefficients'].items(): - params['n{}_amplitude'.format(id)].set(value=coeff) - params['c'].set(value=sans_fit['background']) - p_exp = sans_fit['experiment_property'] - assert_array_almost_equal(mod.eval(params, x=p_exp.x), - p_exp.y, decimal=1) + fit = bayes.fit_model(mod, exp_pd, params=params) + assert fit.weights is None + fit2 = bayes.fit_model(mod, exp_pd, weights=1/exp.e) + assert fit2.chisqr < 1e-10 + assert_array_equal(fit2.weights, 1/exp.e) + # make assertions about parameters + assert all([f'struct{i}_proportion_c' in fit.params + for i, _ in enumerate(tree.nodes_at_depth(sans_fit['depth']))]) + assert all([fit.params[f'struct{i}_proportion_c'].min == 0 + for i, _ in enumerate(tree.nodes_at_depth(sans_fit['depth']))]) + assert all([f'struct{i}_{pname}_slope' in fit.params + for i, _ in enumerate(tree.nodes_at_depth(sans_fit['depth'])) + for pname in exp_pd]) + assert all([fit.params[f'struct{i}_{pname}_slope'].min == 0 + for i, _ in enumerate(tree.nodes_at_depth(sans_fit['depth'])) + for pname in exp_pd]) + assert all([f'struct{i}_{pname}_intercept' in fit.params + for i, _ in enumerate(tree.nodes_at_depth(sans_fit['depth'])) + for pname in exp_pd]) + assert abs(sum(p for p in fit.params.values() + if p.name.endswith('_p')) - 1) <= 1e-3 -def test_fit_at_depth(sans_fit): +def test_global_minimization(sans_fit): tree = sans_fit['tree'] - name = sans_fit['property_name'] - p_exp = sans_fit['experiment_property'] - fit = bayes.fit_at_depth(tree, p_exp, name, sans_fit['depth']) - assert fit.chisqr < 1e-10 + exp = sans_fit['experiment_property'] + exp_pd = properties.PropertyDict([exp]) + mod = bayes.create_at_depth(tree, sans_fit['depth']) + params = mod.make_params() + for param in params.values(): + param.set(min=0, max=1e9) + fit = bayes.fit_model(mod, exp_pd, params=params, weights=1/exp.e) + fit2 = bayes.fit_model(mod, exp_pd, params=params, weights=1/exp.e, + method='basin_hopping') + assert abs(sum(p for p in fit2.params.values() + if p.name.endswith('_p')) - 1) <= 1e-3 + try: + # Expect global fit to be better than typical fit + assert fit.redchi <= fit.redchi + except AssertionError: + warnings.warn('Global minimization did not do better than reference' + ' fit. Attempting a refit to avoid test failure.' + f' Difference {fit.redchi - fit2.redchi:.3} where global' + f' {fit2.redchi:.3} and reference {fit.redchi:.3}.', + RuntimeWarning) + # Try refitting and looser tolerance + fit3 = bayes.fit_model(mod, exp_pd, params=params, + weights=exp_pd.feature_weights, + method='differential_evolution') + assert fit3 <= fit def test_fit_to_depth(sans_fit): tree = sans_fit['tree'] - name = sans_fit['property_name'] - p_exp = sans_fit['experiment_property'] - fits = bayes.fit_to_depth(tree, p_exp, name, max_depth=7) - chi2 = np.asarray([fit.chisqr for fit in fits]) + exp = sans_fit['experiment_property'] + exp_pd = properties.PropertyDict([exp]) + mods = bayes.create_to_depth(tree, max_depth=7) + params_list = [m.make_params() for m in mods] + fits = bayes.fit_models(mods, exp_pd, weights=1/exp.e, + params_list=params_list) + # Since only one probability assert that there is no probability + assert all([not p.endswith('_p') for p in fits[0].params]) + chi2 = np.array([fit.chisqr for fit in fits]) assert np.argmax(chi2 < 1e-10) == sans_fit['depth'] +def test_fit(sans_fit): + tree = sans_fit['tree'] + exp = sans_fit['experiment_property'] + scalar = properties.ScalarProperty(name='foo', x=1, y=1, e=1) + values = [properties.ScalarProperty(name='foo', x=1, y=i, e=1) + for i in range(len(tree.leafs))] + properties.propagator_size_weighted_sum(values, tree) + exp_pd = properties.PropertyDict([exp, scalar]) + ctd = bayes.create_to_depth + models = ctd(tree, max_depth=7) + params_list = [m.make_params() for m in models] + weights = 1/np.concatenate([exp.e, scalar.feature_weights]) + fits = bayes.fit_models(models, exp_pd, weights=weights, + params_list=params_list) + chi2 = [fit.chisqr for fit in fits] + assert chi2[sans_fit['depth']] <= 1e-10 + # Test filtering by name + cad = bayes.create_at_depth + exp_pd2 = properties.PropertyDict([exp]) + models2 = cad(tree, depth=sans_fit['depth'], names=exp_pd2.keys()) + params_list2 = models2.make_params() + assert all(f'{k}_slope' in params_list2 for k in exp_pd2.keys()) + # Test filtering by property type + models3 = cad(tree, depth=sans_fit['depth'], + property_type=properties.ProfileProperty) + params_list3 = models3.make_params() + assert all(f'{p.name}_slope' in params_list3 for p in exp_pd2.values() + if isinstance(p, properties.ProfileProperty)) + # Test filtering by filtering function + models4 = cad(tree, depth=sans_fit['depth'], + to_keep_filter=lambda p: p.name == 'foo') + params_list4 = models4.make_params() + assert 'foo_slope' in params_list4 + + +def test_fit_bad_input(sans_fit): + tree = sans_fit['tree'] + exp = sans_fit['experiment_property'] + name = sans_fit['property_name'] + scalar = properties.ScalarProperty(name='foo', x=1, y=1, e=1) + # Different x value to prompt error + values = [properties.ScalarProperty(name='foo', x=10, y=i, e=1) + for i in range(len(tree.leafs))] + properties.propagator_size_weighted_sum(values, tree) + exp_pd = properties.PropertyDict([exp, scalar]) + ctd = bayes.create_to_depth + with pytest.raises(ValueError): + models = ctd(tree, max_depth=7, use_tabulated=True) + exp_shift = exp.interpolate(exp.x + 2) + exp_pd_shifted = properties.PropertyDict([exp_shift, scalar]) + bayes.fit_models(models, exp_pd_shifted) + with pytest.raises(ValueError): + models = ctd(tree, max_depth=7) + bayes.fit_models(models, exp_pd) + with pytest.raises(ValueError): + left_child = tree.root.get_left() + left_child.property_group = left_child.property_group.subset([name]) + mods = ctd(tree, max_depth=7) + bayes.fit_models(mods, exp_pd) + + if __name__ == '__main__': pytest.main() diff --git a/tests/test_cnextend.py b/tests/test_cnextend.py index 030ae20..05e7b45 100644 --- a/tests/test_cnextend.py +++ b/tests/test_cnextend.py @@ -27,9 +27,9 @@ def test_property_group_features(self): n[prop.name] = prop prop2 = ScalarProperty(name='some_prop2', y=2) n[prop2.name] = prop2 - fv = n.property_group.feature_vector() + fv = n.property_group.feature_vector assert_array_equal(fv, np.array([4, 2])) - ws = n.property_group.feature_weights() + ws = n.property_group.feature_weights assert_array_equal(ws, np.array([1, 1])) def test_leafs(self, benchmark): diff --git a/tests/test_properties.py b/tests/test_properties.py index 0b710ba..7d6a452 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -3,13 +3,91 @@ import pytest import tempfile import shutil +import scipy from idpflex import properties as ps from idpflex.properties import SecondaryStructureProperty as SSP +from numpy.testing import assert_array_equal, assert_almost_equal + + +class TestPropertyDict(object): + def test_mimic_dict(self): + props = {'profile': ps.ProfileProperty(name='profile', + profile=np.arange(10), + qvalues=np.arange(10)*5, + errors=np.arange(10)*.01), + 'scalar': ps.ScalarProperty(name='scalar', x=0, y=1, e=2)} + scalar2 = ps.ScalarProperty(name='scalar2', x=1, y=2, e=3) + propdict = ps.PropertyDict(properties=props.values()) + assert [k for k in propdict] == [k for k in props] + assert propdict['profile'] == props['profile'] + propdict['scalar2'] = scalar2 + assert propdict['scalar2'] == scalar2 + propdict = propdict.subset(names=props.keys()) + assert len(propdict) == len(props) + assert propdict.get('not_real', default=5) == 5 + assert list(propdict.keys()) == list(props.keys()) + assert list(propdict.values()) == list(props.values()) + assert list(propdict.items()) == list(props.items()) + propdict2 = propdict.subset(names=list(props.keys())[0]) + assert len(propdict2) == 1 + + def test_subset(self): + props = {'profile': ps.ProfileProperty(name='profile', + profile=np.arange(10), + qvalues=np.arange(10)*5, + errors=np.arange(10)*.01), + 'scalar': ps.ScalarProperty(name='scalar', x=0, y=1, e=2), + 'scalar2': ps.ScalarProperty(name='scalar2', x=1, y=2, e=3), + } + propdict = ps.PropertyDict(properties=props.values()) + propdict = propdict.subset(names=props.keys()) + assert len(propdict) == len(props) + propdict2 = propdict.subset(names=props.keys(), + property_type=(ps.ProfileProperty, + ps.ScalarProperty)) + assert len(propdict2) == len(props) + propdict3 = propdict.subset(names=props.keys(), + property_type=ps.ProfileProperty) + assert len(propdict3) == 1 + assert 'profile' in propdict3 + propdict4 = propdict.subset(names=props.keys(), + to_keep_filter=lambda prop: + all(prop.feature_vector == 2)) + assert len(propdict4) == 1 + assert 'scalar2' in propdict4 + propdict5 = propdict.subset(names=props.keys(), + property_type=(ps.ScalarProperty), + to_keep_filter=lambda prop: + all(prop.feature_vector == 2)) + assert len(propdict5) == 1 + assert 'scalar2' in propdict5 + propdict6 = propdict.subset(names=props.keys(), + property_type=ps.ProfileProperty, + to_keep_filter=lambda prop: + all(prop.feature_vector == 2)) + assert len(propdict6) == 0 + + def test_feature_vector_domain_weights(self): + x = np.arange(10) + props = {'profile': ps.ProfileProperty(name='profile', + profile=x, + qvalues=x*5, + errors=x*.01 + 0.001), + 'scalar': ps.ScalarProperty(name='scalar', x=0, y=1, e=2)} + propdict = ps.PropertyDict(properties=props.values()) + assert_array_equal(propdict.feature_vector, + np.concatenate([p.feature_vector + for p in props.values()])) + assert_array_equal(propdict.feature_domain, + np.concatenate([p.feature_domain + for p in props.values()])) + assert_array_equal(propdict.feature_weights, + np.concatenate([p.feature_weights + for p in props.values()])) class TestRegisterDecorateProperties(object): - def test_register_as_node_property(self): class SomeProperty(object): def __init__(self): @@ -69,6 +147,15 @@ def test_plot_histogram(self, benchmark): ax = root_prop.plot(kind='histogram', errors=True, bins=1) assert ax.patches[0]._height == benchmark['nleafs'] + def test_feature_vector_domain_and_weights(self): + prop = ps.ScalarProperty(name='foo', x=0, y=1, e=2) + assert prop.x == 0 + assert prop.y == 1 + assert prop.e == 2 + assert_array_equal(prop.feature_vector, np.array([prop.y])) + assert_array_equal(prop.feature_domain, np.array([prop.x])) + assert_array_equal(prop.feature_weights, np.array([1])) + class TestAsphericity(object): @@ -244,6 +331,98 @@ def test_instance_decorated_as_node_property(self): assert np.array_equal(profile_prop.y, 10*v) assert np.array_equal(profile_prop.e, 0.1*v) + def test_feature_vector_domain_and_weights(self): + v = np.arange(9) + profile_prop = ps.ProfileProperty(name='foo', qvalues=v, profile=10*v, + errors=0.1*v + 0.001) + assert_array_equal(profile_prop.feature_vector, profile_prop.profile) + assert_array_equal(profile_prop.feature_domain, profile_prop.qvalues) + ws = profile_prop.profile/profile_prop.errors + ws = ws/np.linalg.norm(ws) + assert_array_equal(profile_prop.feature_weights, ws) + assert_almost_equal(np.linalg.norm(profile_prop.feature_weights), 1) + # mimic reading from a crysol/cryson int + prof_prop2 = ps.ProfileProperty(name='foo', qvalues=v, profile=10*v, + errors=np.zeros(len(v))) + ws2 = np.ones(len(prof_prop2.profile))/len(prof_prop2.profile)**.5 + assert_array_equal(prof_prop2.feature_weights, ws2) + assert_almost_equal(np.linalg.norm(prof_prop2.feature_weights), 1) + + def test_interpolation(self): + x1 = np.random.rand(10) + # Gaurantee values outside of the range to test extrapolation + x2 = x1 + abs(np.random.rand(10)) + y1 = x1**2 + y2 = scipy.interpolate.interp1d(x1, y1, fill_value='extrapolate')(x2) + e = scipy.interpolate.interp1d(x1, .1*y1, fill_value='extrapolate')(x2) + prop = ps.ProfileProperty(name='foo', qvalues=x1, profile=y1, + errors=0.1*y1) + assert_array_equal(y2, prop.interpolator(x2)) + new_prop = prop.interpolate(x2, inplace=True) + assert_array_equal(y2, new_prop.profile) + assert_array_equal(e, new_prop.errors) + assert_array_equal(x2, new_prop.qvalues) + assert new_prop is prop + assert prop.interp_kws['fill_value'] == 'extrapolate' + assert prop.error_interp_kws['fill_value'] == 'extrapolate' + assert prop._interpolator is None + assert prop._error_interpolator is None + assert new_prop._interpolator is None + assert new_prop._error_interpolator is None + sans_prop = ps.SansProperty(name='sans_foo', qvalues=x1, profile=y1, + errors=0.1*y1, node='SomeNode') + # call interpolator before interpolate to make sure sans already has + # an interpolator object to check resetting + sans_prop.interpolator + sans_prop.error_interpolator + new_sans_prop = sans_prop.interpolate(x2, inplace=False) + assert isinstance(new_sans_prop, ps.SansProperty) + assert sans_prop is not new_sans_prop + assert sans_prop._interpolator is not None + assert sans_prop._error_interpolator is not None + assert new_prop._interpolator is None + assert new_prop._error_interpolator is None + assert new_sans_prop.node == 'SomeNode' + assert_array_equal(y2, new_sans_prop.profile) + assert_array_equal(e, new_sans_prop.errors) + assert_array_equal(x2, new_sans_prop.qvalues) + + def test_filter(self): + x1 = np.random.rand(10) + # Gaurantee values outside of the range to test extrapolation + x1[3] = np.nan + y1 = x1**2 + e1 = 0.1*y1 + to_drop = ~(np.isfinite(x1) & np.isfinite(y1) & np.isfinite(e1) + & (e1 != 0)) + prop = ps.ProfileProperty(name='foo', qvalues=x1, profile=y1, + errors=e1) + y2 = y1[~to_drop] + x2 = x1[~to_drop] + e2 = e1[~to_drop] + new_prop = prop.filter(inplace=True) + assert_array_equal(y2, new_prop.profile) + assert_array_equal(e2, new_prop.errors) + assert_array_equal(x2, new_prop.qvalues) + assert new_prop is prop + sans_prop = ps.SansProperty(name='sans_foo', qvalues=x1, profile=y1, + errors=e1, node='SomeNode') + # inplace is False + to_drop2 = ~(y1 < 0.5) + y3 = y1[~to_drop2] + x3 = x1[~to_drop2] + e3 = e1[~to_drop2] + new_sans_prop = sans_prop.filter(to_drop=to_drop2) + assert isinstance(new_sans_prop, ps.SansProperty) + assert sans_prop is not new_sans_prop + assert new_sans_prop.node == 'SomeNode' + assert all(new_sans_prop.profile < .5) + assert len(new_sans_prop.profile == new_sans_prop.errors) + assert len(new_sans_prop.profile == new_sans_prop.qvalues) + assert_array_equal(y3, new_sans_prop.profile) + assert_array_equal(e3, new_sans_prop.errors) + assert_array_equal(x3, new_sans_prop.qvalues) + class TestSansProperty(object):