Source code for kliff.models.parameter

import pickle
import warnings
from typing import TYPE_CHECKING, List, Tuple, Union

import numpy as np

if TYPE_CHECKING:
    from kliff.transforms.parameter_transforms import ParameterTransform

from loguru import logger


[docs] class Parameter(np.ndarray): """Parameter class for containing physics-based model parameters. This class provides utilities for managing model parameters between the "model space" and the "parameter space". See the glossary below for the definition of these spaces. Modeled on `torch.nn.Parameters`, it inherits from `numpy.ndarray`. It is a numpy array with additional attributes such as name, transform, etc. Glossary: - Model space: The space in which the model expects the parameters to be in. Currently, all models in OpenKIM expect the parameters to be in the affine cartesian space. - Parameter space: Parameter space is the space in which you want to sample/optimize the parameters. Most often parameters are transformed using bijective transformations of the model inputs for ease of optimization/sampling. For example, the log transform is used in searching for the optimal parameters of sloppy model parameters. There can be cases where transformation functions are not bijective, e.g. ceiling function for mapping continuous parameters to discrete values. Parameter space is mostly used for optimization, and not the model itself. If no transform_function is provided then parameter space and model space are same. All functions that needs input/output in the model space, will use the suffix `_model_space` and `_param_space` for the transformed parameter space. Below is the list of such twinned functions, and their designed use cases: 1. `get_numpy_array_model_space` and `get_numpy_array_param_space`: These functions return the numpy array of parameters in the model space and parameter space. These functions should be used for getting the pure numpy array of parameters where the ``Parameters`` class might not work, e.g for feeding values to the model. They are also used in case of comparing the parameter state etc. 2. `copy_from_model_space` and `copy_from_param_space`: These functions copy the provided array to self in the model space and parameter space. They are useful for copying values during optimization etc. NOTE: These functions expect the incoming array to be of the same type and shape as self, compensated for opt_mask. 3. `add_bounds_model_space` and `add_bounds_param_space`: These functions add bounds to the parameter in the model space and parameter space. 4. `get_bounds_model_space` and `get_bounds_param_space`: These functions return the bounds in the model space and parameter space. 5. `get_opt_numpy_array_param_space`: This function returns the numpy array of parameters in the parameter space, with the opt_mask applied. This should be the de-facto method for getting the numpy array of parameters for optimization. At present, it does not have `_model_space` version, as there are no applications for it. If needed, it can be added later. Attributes: name: Name of the parameter. transform_function: Instance of ``ParameterTransform`` object to be applied to the parameter. index: Index of the parameter in the parameter vector. used for setting the parameter in the KIMPY. bounds: Bounds for the parameter, must be numpy array of shape n x 2, with [n,0] as lower bound, and [n,1] as the upper bound. If None, no bounds are applied. opt_mask: A boolean or boolean array of the same shape as the parameter. For a vector parameter ``opt_mask`` acts as a binary mask to determine which vector components will be optimized, e.g. for a parameter with value [1., 2., 3.], and opt_mask [True, False, True], only the first and third components will be optimized, and second one will be presumed constant. """ name: str transform_function: "ParameterTransform" index: int bounds: np.ndarray opt_mask: Union[np.ndarray, bool] def __new__( cls, input_array: Union[np.ndarray, float, int], name: str = None, transform_function: "ParameterTransform" = None, bounds: np.ndarray = None, index: int = None, opt_mask: [np.ndarray, bool] = None, ): """Initializes and returns a new instance of Parameter. Args: input_array: Input numpy array to initialize the parameter with. name: Name of the parameter transform_function: Instance of ``ParameterTransform`` object to be applied to the parameter. bounds: n x 2 array of lower and upper bounds for the parameter. If None, no bounds are applied index: Index of the parameter in the parameter vector. Used for setting the parameter in the KIMPY. opt_mask: Boolean array of the same shape as the parameter. The values marked ``True`` are optimized, and ``False`` are not optimized. Single boolean value can also be provided, in which case it will be applied to all the components of the parameter. Returns: A new instance of Parameter. """ array_in = np.array(input_array) obj = array_in.view(cls) obj.name = name obj.transform_function = transform_function obj.index = index obj._is_transformed = False obj.bounds = bounds if opt_mask is not None: if isinstance(opt_mask, bool): opt_mask = np.ones_like(obj, dtype=bool) * opt_mask obj.opt_mask = opt_mask else: obj.opt_mask = np.zeros(obj.shape, dtype=bool) obj._bounds_transformed = False return obj # TODO: This seems a bit off, the signature should match np.array, need to look more in it # but the format matches the one in numpy examples. def __array_finalize__(self, obj): """Finalizes a parameter, needed for numpy object cleanup.""" if obj is None: return self.name = getattr(obj, "name", None) self.transform_function = getattr(obj, "transform_function", None) self.bounds = getattr(obj, "bounds", None) self.index = getattr(obj, "index", None) self._is_transformed = getattr(obj, "_is_transformed", False) self.opt_mask = getattr(obj, "opt_mask", None) self._bounds_transformed = getattr(obj, "_bounds_transformed", False) def __repr__(self): return f"Parameter {self.name} {np.ndarray.__repr__(self)}."
[docs] def transform(self): """Apply the transform to the parameter. This method simple applies the function ~:kliff.transforms.ParameterTransform.__call__ to the parameter (or equivalently, ~:kliff.transforms.ParameterTransform.transform). """ if self._is_transformed: # warnings.warn("Parameter {0} has already been transformed.".format(self.name)) # Warnings become quite noisy, so commenting it out for now. # TODO: figure out a better solution for this. return else: if self.transform_function is not None: transformed_array = self.transform_function(self) self[:] = transformed_array self._is_transformed = True
[docs] def inverse_transform(self): """Apply the inverse transform to the parameter. Simply applies the function :kliff.transforms.ParameterTransform.inverse_transform() in place, to the parameters.""" if not self._is_transformed: warnings.warn(f"Parameter {self.name} has not been transformed.") return else: if self.transform_function is not None: inv_transformed_array = self.transform_function.inverse_transform(self) self[:] = inv_transformed_array self._is_transformed = False
[docs] def copy_from_param_space(self, arr: np.ndarray): """Copy array to self in the parameter space. Array can be a numpy array or a Parameter object. This method assumes that the array is of the same type and shape as self, compensated for opt_mask. If not, it will raise an error. This method also assumes that the incoming array is in the same space, as the parameter currently (i.e. "Parameter space", see glossary above for detail). Args: arr: Array to copy to self. """ # convert to numpy array if (not isinstance(arr, (np.ndarray, Parameter))) and isinstance( arr, (float, int) ): arr = np.asarray(arr) tmp_arr = np.zeros_like(self) tmp_arr[self.opt_mask] = arr tmp_arr[~self.opt_mask] = self[~self.opt_mask] arr = tmp_arr arr = arr.astype(self.dtype) self[:] = arr
[docs] def copy_from_model_space(self, arr: np.array): """Copy arr from model space. Array can be a numpy array or a Parameter object. This method assumes that the incoming array is in the model space and would need transformation to the parameter space before copying. It is a safer method to use in most cases. If the parameter is not transformed, it will transform it first for maintaining consistency. This method requires the copied array to have consistent opt_mask applied. Args: arr: Array to copy to self. """ # ensure that the parameter is transformed if not self._is_transformed: self.transform() if self.transform_function is not None: arr = self.transform_function.transform(arr) self.copy_from_param_space(arr)
[docs] def get_numpy_array_model_space(self) -> np.ndarray: """Get a numpy array of parameters in the model space. This method should be uses for getting the numpy array of parameters where the ``Parameters`` class might not work, for feeding values to the model. Returns: A numpy array of parameters in the original space. """ if (self.transform_function is not None) and self._is_transformed: return self.transform_function.inverse_transform(self) else: return np.array(self)
[docs] def get_numpy_array_param_space(self): """Applies the transform to the parameter, and returns the transformed array.""" self.transform() return np.array(self)
[docs] def get_opt_numpy_array_param_space(self) -> np.ndarray: """Get a masked numpy array of parameters in the transformed space. This method is similar to :get_numpy_array_param_space but additionally does apply the opt_mask, and returns the array. This ensures the correctness of the array for optimization/other applications. *This should be the de-facto method for getting the numpy array of parameters.* Returns: A numpy array of parameters in the original space. """ np_arr = self.get_numpy_array_param_space() # in transformed space if self.opt_mask is not None: np_arr = np_arr[self.opt_mask] return np_arr
[docs] def copy_at_param_space( self, arr: Union[int, float, np.ndarray, List], index: Union[int, List[int]] ): """Copy values at a particular index or indices in parameter space. This method directly copies the provided data, and does not perform any checks. Args: index: Index or indices to copy the values at. arr: Array to copy to self. """ if isinstance(index, int) and isinstance(arr, (int, float)): index = [index] arr = np.array([arr]) elif isinstance(index, list) and isinstance(arr, (list, np.ndarray)): index = np.array(index) arr = np.array(arr) elif isinstance(index, np.ndarray) and isinstance(arr, np.ndarray): if index.shape != arr.shape: raise ParameterError("Index and value are array of different shapes.") else: raise ParameterError( "Either index and value should both be scalar, or both be list/array of same length." ) arr = arr.astype(self.dtype) for i, j in zip(index, arr): self[i] = j
[docs] def add_transform(self, transform: "ParameterTransform"): """Save a transform object with the parameter. Args: transform: Instance of ``ParameterTransform`` object to be applied to the parameter. """ self.transform_function = transform self.transform() self._is_transformed = True if self.bounds is not None and not self._bounds_transformed: self.bounds = self.transform_function(self.bounds)
[docs] def add_bounds_model_space(self, bounds: np.ndarray): """Add bounds to the parameter. Bounds should be supplied in the model space. The bounds will be transformed if the transform_function is provided to the parameter. Args: bounds: numpy array of shape (n, 2) """ if bounds.shape[1] != 2: raise ParameterError("Bounds must have shape (n, 2).") if self.transform_function is not None: self.bounds = self.transform_function(bounds) self._bounds_transformed = True else: self.bounds = bounds
[docs] def add_bounds_param_space(self, bounds: np.ndarray): """Add bounds to the parameter. Add bounds to the parameter in parameter space. It does not do any additional checks or perform any transformations. Args: bounds: numpy array of shape (n, 2) """ if bounds.shape[1] != 2: raise ParameterError("Bounds must have shape (n, 2).") self.bounds = bounds self._bounds_transformed = True
[docs] def add_opt_mask(self, mask: Union[np.ndarray, bool]): """Set mask for optimizing vector quantities. It expects an input array of shape (n,), where n is the dimension of the vector quantity to be optimized. This array must contain n booleans indicating which properties to optimize. Args: mask: boolean array of same shape as the vector quantity to be optimized """ if isinstance(mask, bool): mask = np.ones_like(self, dtype=bool) * mask if mask.shape != self.shape: raise ParameterError("Mask must have shape {0}.".format(self.shape)) self.opt_mask = mask
[docs] def get_bounds_param_space(self) -> List[Tuple[int, int]]: """Returns bounds array that is used by scipy optimizer. Returns: A list of tuples of the form (lower_bound, upper_bound) """ arr = self.get_opt_numpy_array_param_space() bounds = [] if self.bounds is not None: if (self.bounds.shape[0] == arr.shape[0]) and (self.bounds.shape[1] == 2): for i in range(arr.shape[0]): bounds.append((self.bounds[i, 0], self.bounds[i, 1])) else: raise ValueError("Bounds must have shape: {0}x2.".format(arr.shape)) else: bounds = [(None, None) for i in range(arr.shape[0])] return bounds
[docs] def get_bounds_model_space(self) -> np.ndarray: """Get the bounds in the original space. Returns: A numpy array of bounds in the original space. """ if self.transform_function is not None: return self.transform_function.inverse_transform(self.bounds) else: return self.bounds
[docs] def has_bounds(self) -> bool: """Check if bounds are set for optimizing quantities Returns: True if bounds are set, False otherwise. """ return self.bounds is not None
[docs] def as_dict(self): """Return a dictionary containing the state of the object.""" state_dict = self.__dict__.copy() # Original dict will not have values state_dict["@value"] = self.get_numpy_array_model_space() state_dict["@module"] = self.__class__.__module__ state_dict["@class"] = self.__class__.__name__ return state_dict
[docs] def save(self, filename): """Save the parameter to disk.""" state_dict = self.as_dict() with open(filename, "wb") as f: pickle.dump(state_dict, f)
[docs] @classmethod def from_dict(cls, state_dict): """Update the object's attributes based on the provided state dictionary. Args: state_dict (dict): The dictionary containing the state of the object. This dictionary should include the "value" key. """ # Extract the value from the state dictionary value = state_dict.pop("@value") class_name = state_dict.pop("@class") module_name = state_dict.pop("@module") is_transformed = state_dict.pop("_is_transformed") bounds_transformed = state_dict.pop("_bounds_transformed") # Update the object's attributes with the remaining key-value pairs # Copy the extracted value to a parameter obj = cls(value, **state_dict) obj._is_transformed = is_transformed obj._bounds_transformed = bounds_transformed return obj
[docs] @classmethod def load(cls, filename): """Load a parameter from disk. TODO: non classmethod version """ with open(filename, "rb") as f: state_dict = pickle.load(f) return cls.from_dict(state_dict)
[docs] def get_opt_param_name_value_and_indices( self, ) -> Tuple[str, Union[float, np.ndarray], int]: """Get the name, value, and indices of the optimizable parameters. Returns: A tuple of lists of names, values, and indices of the optimizable parameters. """ return self.name, self.get_numpy_array_model_space(), self.index
@property def lower_bound(self): """Get the lower bounds of the parameter. Always returns values in parameter space. Returns: A numpy array of lower bounds. """ bounds = self.get_bounds_param_space() return np.array([b[0] for b in bounds]) @property def upper_bound(self): """Get the upper bounds of the parameter. Always returns values in parameter space. Returns: A numpy array of upper bounds. """ bounds = self.get_bounds_param_space() return np.array([b[1] for b in bounds]) @property def is_mutable(self): """Check if the parameter is mutable. Returns: True if the parameter is mutable, False otherwise. """ return np.any(self.opt_mask)
[docs] class ParameterError(Exception): def __init__(self, msg): super(ParameterError, self).__init__(msg) self.msg = msg