Source code for kliff.analyzers.rmse

import os
import sys
from collections.abc import Iterable

import numpy as np
from loguru import logger

from kliff.utils import create_directory, split_string


[docs]class EnergyForcesRMSE: r""" Analyzer to compute the root-mean-square error (RMSE) for energy and forces. The `energy difference norm` for a configuration is defined as: .. math:: e_\text{norm} = |e_\text{pred} - e_\text{ref}| / N, where :math:`e_\text{pred}` is the prediction of the total energy from the model, :math:`e_\text{ref}` is the corresponding reference energy, and :math:`N` is the number of atoms in the configuration. The division by :math:`N` is applied only when ``normalize = True`` in ``run``. Similarly, the `forces difference norm` for a configuration is defined as: .. math:: f_\text{norm} = || \bm f_\text{pred} - \bm f_\text{ref}|| / N, where :math:`f_\text{pred}` is the prediction of the forces on atoms from the model and :math:`f_\text{ref}` is the corresponding reference forces, :math:`N` is the number of atoms in the configuration. The division by :math:`N` is applied only when ``normalize = True`` in ``run``. The RMSEs for energy and forces are defined as: .. math:: e_\text{RMSE} = \sqrt{ \frac{\sum_{m=1}^M e_\text{norm}^2}{M}} and .. math:: f_\text{RMSE} = \sqrt{ \frac{\sum_{m=1}^M f_\text{norm}^2}{M}}, in which :math:`M` is the total number of configurations in the dataset. """ def __init__(self, calculator, energy=True, forces=True): self.calculator = calculator self.compute_energy = energy self.compute_forces = forces
[docs] def run(self, normalize=True, sort=None, path=None, verbose=1): """ Run the RMSE analyzer. Parameters ---------- normalize: bool Whether to normalize the energy (forces) by the number of atoms in a configuration. sort: str (optional) Sort per configuration information according to `energy` or `forces`. If `None`, no sort. This works only when per configuration information is requested, i.e. ``verbose > 0``. path: str (optional) Path to write out the results. If `None`, write to stdout, otherwise, write to the file specified by `path`. Note, if ``verbose==3``, the difference of energy and forces will be written to a directory named `energy_forces_RMSE-difference`. verbose: int (optional) Verbose level of the output info. Available values are: 0, 1, 2. If ``verbose=0``, only output the energy and forces RMSEs for the dataset. If ``verbose==1``, output the norms of the energy and forces for each configuration additionally. If ``verbose==2``, output the difference of the energy and forces for each atom, and the information is written to extended XYZ files with the location specified by ``path``. """ logger.info("Start analyzing energy and forces RMSE.") cas = self.calculator.get_compute_arguments() all_enorm = [] all_fnorm = [] all_identifier = [] # common path of dataset paths = [_get_config(ca).path for ca in cas] common = _get_common_path(paths) for i, ca in enumerate(cas): if i % 100 == 0: logger.info(f"Processing configuration {i}.") prefix = "analysis_energy_forces_RMSE-difference" enorm, fnorm = self._compute_single_config( ca, normalize, verbose, common, prefix ) all_enorm.append(enorm) all_fnorm.append(fnorm) all_identifier.append(_get_config(ca).identifier) all_enorm = np.asarray(all_enorm) all_fnorm = np.asarray(all_fnorm) all_identifier = np.asarray(all_identifier) if sort == "energy": if self.compute_energy: order = all_enorm.argsort() all_enorm = all_enorm[order] all_fnorm = all_fnorm[order] all_identifier = all_identifier[order] elif sort == "forces": if self.compute_forces: order = all_fnorm.argsort() all_enorm = all_enorm[order] all_fnorm = all_fnorm[order] all_identifier = all_identifier[order] if path is not None: fout = open(path, "w") else: fout = sys.stdout # header print("#" * 80, file=fout) print("#", file=fout) print("# Root-mean-square errors for energy and forces", file=fout) print("#", file=fout) msg = ( 'Values reported is per atom quantify if "normalize=True". For example, ' '"eV/atom" for energy and "(eV/Angstrom)/atom" if "eV" is the units for ' 'energy and "Angstrom" is the units for forces.' ) print(split_string(msg, length=80, starter="#"), file=fout) print("#", file=fout) print( "# See (TODO insert url of doc) for the meaning of the reported values.", file=fout, ) print("#" * 80 + "\n", file=fout) # norms of each config if verbose >= 1: print("#" * 80, file=fout) print("Per configuration quantify\n", file=fout) print("# config", end=" " * 4, file=fout) if self.compute_energy: print("energy difference norm", end=" " * 4, file=fout) if self.compute_forces: print("forces difference norm", end=" " * 4, file=fout) print("config identifier", file=fout) for i, (enorm, fnorm, identifier) in enumerate( zip(all_enorm, all_fnorm, all_identifier) ): print("{:<10d}".format(i), end=" " * 4, file=fout) if self.compute_energy: print("{:.10e}".format(enorm), end=" " * 10, file=fout) if self.compute_forces: print("{:.10e}".format(fnorm), end=" " * 10, file=fout) print(identifier, file=fout) print("\n", file=fout) # RMSE of all configs print("#" * 80, file=fout) print("RMSE for the dataset (all configurations).", file=fout) if self.compute_energy: e_rmse = np.linalg.norm(all_enorm) / len(all_enorm) ** 0.5 print("{:.10e} # energy RMSE".format(e_rmse), file=fout) if self.compute_forces: f_rmse = np.linalg.norm(all_fnorm) / len(all_fnorm) ** 0.5 print("{:.10e} # forces RMSE".format(f_rmse), file=fout) print("\n", file=fout) # difference of each atom if verbose >= 2: print("#" * 80, file=fout) msg = ( "The differences of energy and forces are written to the directory " '"energy_forces_RMSE-difference" in extended XYZ format.' ) print(split_string(msg, length=80, starter="#"), file=fout) print("\n", file=fout) logger.info("Finish analyzing energy and forces RMSE.")
def _compute_single_config(self, ca, normalize, verbose, common_path, prefix): self.calculator.compute(ca) conf = _get_config(ca) conf_path = os.path.abspath(conf.path) natoms = conf.get_num_atoms() if self.compute_energy: pred_e = self.calculator.get_energy(ca) pred_e = _to_numpy(pred_e, ca) ref_e = conf.energy ediff = pred_e - ref_e enorm = abs(ediff) if normalize: enorm /= natoms else: ediff = None enorm = None if self.compute_forces: pred_f = self.calculator.get_forces(ca) pred_f = _to_numpy(pred_f, ca).reshape(-1, 3) ref_f = conf.forces.reshape(-1, 3) fdiff = pred_f - ref_f fnorm = np.linalg.norm(fdiff) if normalize: fnorm /= natoms else: fdiff = None fnorm = None # write the difference to extxyz files if verbose >= 2: if conf_path.startswith(common_path): base = conf_path[len(common_path) :] else: raise AnalyzerError( 'identifier "{}" not start with common_path "{}".'.format( conf_path, common_path ) ) path = os.path.join(prefix, base) create_directory(path) conf.to_file(path) return enorm, fnorm
def _get_config(compute_argument): """ Get the configuration attached to a compute argument. For KIM model and Torch model, the way is different. It would be better to unify these two. The method here is very vulnerable. """ if isinstance(compute_argument, Iterable): # compute argument from Torch dataset; [0] because it is a batch of 1 element conf = compute_argument[0]["configuration"] else: # For KIM and built-in models, it is a compute argument class conf = compute_argument.conf return conf def _to_numpy(x, compute_argument): """ Convert to a numpy array from a tensor. `compute_argument` is needed to determine whether ``x`` is a list of tensor of a numpy array. """ if isinstance(compute_argument, Iterable): return x[0].detach().numpy() else: return x def _get_common_path(paths): """ Find the common path of a list of paths. For example, given paths = ['/A/B/c.x', '/A/B/D/e.x'], the returns `/A/B/`. """ paths = [os.path.abspath(p) for p in paths] common = "" i = 0 while True: if i < len(paths[0]): c = paths[0][i] else: break not_same = False for p in paths: if not p[i] == c: not_same = True break if not_same: break common += c i += 1 return common class AnalyzerError(Exception): def __init__(self, msg): super(AnalyzerError, self).__init__(msg) self.msg = msg