Source code for qharv.reel.stat_h5

# Author: Yubo "Paul" Yang
# Email: yubo.paul.yang@gmail.com
# Routines to parse hdf5 spectral and volumetric data output.
#  Mostly built around h5py's API.
import os
import numpy as np

from qharv.seed.wf_h5 import read, ls

[docs]def path_loc(handle, path): return handle[path][()]
[docs]def me2d(edata, kappa=None, axis=0): """ Calculate mean and error of a table of columns Args: edata (np.array): 2D array of equilibrated time series data kappa (float, optional): pre-calculate auto-correlation, default is to re-calculate on-the-fly axis (int, optional): axis to average over, default 0 i.e. columns Return: (np.array, np.array): (mean, error) of each column """ # get autocorrelation ntrace = edata.shape[axis] if kappa is None: try: # fortran implementation is faster than np FFT for len(trace)<1000 from qharv.reel.forlib.stats import corr except ImportError as err: msg = str(err) msg += '\n Please compile qharv.reel.forlib.stats using f2py.' raise ImportError(msg) kappa = np.apply_along_axis(corr, axis, edata) neffective = ntrace/kappa # calculate mean and error val_mean = edata.mean(axis=axis) val_std = edata.std(ddof=1, axis=axis) val_err = val_std/np.sqrt(neffective) return val_mean, val_err
[docs]def mean_and_err(handle, obs_path, nequil, kappa=None): """ calculate mean and error of an observable from QMCPACK stat.h5 file Args: handle (h5py.Group): or h5py.File or h5py.Dataset obs_path (str): path to observable, e.g. 'gofr_e_1_1' nequil (int): number of equilibration blocks to throw out kappa (float, optional): auto-correlation, default recalculate Return: (np.array, np.array): (mean, err), the mean and error of observable """ # look for hdf5 group corresponding to the requested observable if obs_path not in handle: raise RuntimeError('group %s not found' % obs_path) val_path = os.path.join(obs_path, 'value') if not (val_path in handle): val_path = obs_path # !!!! assuming obs_path includes value already # `handle[val_path]` will fail if this assumption is not correct # get equilibrated data val_data = handle[val_path][()] nblock = len(val_data) if (nequil >= nblock): msg = 'cannot throw out %d blocks from %d blocks' % (nequil, nblock) raise RuntimeError(msg) edata = val_data[nequil:] # get statistics val_mean, val_err = me2d(edata) return val_mean, val_err
[docs]def dsk_from_skall(fp, obs_name, nequil, kappa=None): """ extract fluctuating structure factor dS(k) from skall Args: fp (h5py.File): stat.h5 handle obs_name (str, optional): name the "skall" estimator, likely "skall" nequil (int): equilibration length kappa (float, optional): auto-correlation, default recalculate Return: (np.array, np.array, np.array): (kvecs, dskm, dske), kvectors and S(k) mean and error """ # get data kpt_path = '%s/kpoints/value' % obs_name sk_path = '%s/rhok_e_e/value' % obs_name rhoki_path = '%s/rhok_e_i/value' % obs_name rhokr_path = '%s/rhok_e_r/value' % obs_name kvecs = fp[kpt_path][()] ska = fp[sk_path][()] rkra = fp[rhokr_path][()] rkia = fp[rhoki_path][()] nblock, nk = ska.shape assert len(kvecs) == nk dska = ska[nequil:]-(rkra[nequil:]**2+rkia[nequil:]**2) dskm, dske = me2d(dska) return kvecs, dskm, dske
[docs]def rhok_from_skall(fp, obs_name, nequil, kappa=None): """ extract electronic density rho(k) from stat.h5 file Args: fp (h5py.File): h5py handle of stat.h5 file obs_name (str, optional): name the "skall" estimator, likely "skall" nequil (int): number of equilibration blocks to remove kappa (float, optional): auto-correlation, default recalculate Return: (np.array, np.array, np.array): (kvecs, rhokm, rhoke) k-vectors, rho(k) mean and error, shape (nk, ndim) notice rhok is the real-view of a complex vector, shape (2*nk,) """ # get data kpt_path = '%s/kpoints/value' % obs_name rhokr_path = '%s/rhok_e_r/value' % obs_name rhoki_path = '%s/rhok_e_i/value' % obs_name kvecs = fp[kpt_path][()] rkrm, rkre = mean_and_err(fp, rhokr_path, nequil, kappa) rkim, rkie = mean_and_err(fp, rhoki_path, nequil, kappa) rhokm = rkrm + 1j*rkim rhoke = rkre + 1j*rkie return kvecs, rhokm.view(float), rhoke.view(float)
[docs]def gofr(fp, obs_name, nequil, kappa=None, force=False): """ extract pair correlation function g(r) from stat.h5 file Args: fp (h5py.File): h5py handle of stat.h5 file obs_name (str): observable name, should start with 'gofr', e.g. gofr_e_0_1 nequil (int): number of equilibration blocks to remove kappa (float, optional): auto-correlation, default recalculate force (bool, optional): force execution, i.e. skip all checks Returns: tuple: (myr, grm, gre): bin locations, g(r) mean, g(r) error """ if (not obs_name.startswith('gofr')) and (not force): msg = '%s does not start with "gofr"; set force=True to bypass' % obs_name raise RuntimeError(msg) grm, gre = mean_and_err(fp, '%s/value' % obs_name, nequil, kappa) rmax = path_loc(fp, '%s/cutoff' % obs_name)[0] dr = path_loc(fp, '%s/delta' % obs_name)[0] # guess bin locations myr = np.arange(0, rmax, dr) if (len(myr) != len(grm)) and (not force): raise RuntimeError('num_bin mismatch; try read from input?') return myr, grm, gre
[docs]def nofk(fp, obs_name, nequil, kappa=None): """ extract momentum estimator output n(k) from stat.h5 file Args: fp (h5py.File): h5py handle of stat.h5 file obs_name (str): observable name, probably 'nofk' nequil (int): number of equilibration blocks to remove kappa (float, optional): auto-correlation, default recalculate Return: (np.array, np.array, np.array): (kvecs, nkm, nke), k-vectors, n(k) mean and error """ kvecs = fp[obs_name]['kpoints'][()] nkm, nke = mean_and_err(fp, '%s/value' % obs_name, nequil, kappa) return kvecs, nkm, nke
[docs]def rdm1(fp, obs_name, nequil, kappa=None): """ extract 1RMD output from stat.h5 file Args: fp (h5py.File): h5py handle of stat.h5 file obs_name (str): observable name, probably '1rdms' nequil (int): number of equilibration blocks to remove kappa (float, optional): auto-correlation, default recalculate Return: dict: a dictionary of 1RDMs, one for each species (eg. u, d) """ matrix_path = os.path.join(obs_name, 'number_matrix') groups = fp[matrix_path].keys() rdms = {} for grp in groups: path = os.path.join(matrix_path, grp, 'value') ym, ye = mean_and_err(fp, path, nequil, kappa) rdms[grp] = (ym, ye) return rdms
[docs]def afobs(fp, obs_name, nequil, kappa=None, numer='one_rdm'): """ extract "best" 1RMD output from AFQMC stat.h5 file BackPropagated (BP) Args: fp (h5py.File): h5py handle of stat.h5 file obs_name (str): observable name, probably 'FullOneRDM' nequil (int): number of equilibration BP blocks to remove kappa (float, optional): auto-correlation, default recalculate numer (str, optional): numerator to extract, default 'one_rdm' Return: dict: a dictionary of 1RDMs, one for each species (eg. u, d) """ avg_path = os.path.join('Observables', 'BackPropagated', obs_name) avgs = fp[avg_path].keys() iavgs = [int(a.replace('Average_', '')) for a in avgs] mav = max(iavgs) # use longest BP matrix_path = os.path.join(avg_path, 'Average_%d' % mav) blocks = fp[matrix_path].keys() rdm_blocks = [key for key in blocks if key.startswith(numer)] nblock = len(rdm_blocks) if nequil >= nblock: msg = 'cannot discard %d/%d blocks' % (nequil, nblock) raise RuntimeError(msg) # get 1RDM at all equilibrated blocks data = [] for block in rdm_blocks[nequil:]: path = os.path.join(matrix_path, block) rdm = fp[path][()] dpath = os.path.join(matrix_path, block.replace(numer, 'denominator')) deno = fp[dpath][()] data.append(rdm/deno) rdm_shape = rdm.shape assert len(rdm_shape) == 2 # (nbas*nbas, 2); 2 for real, complex nbas = int(round(rdm_shape[0]**0.5)) assert nbas**2 == rdm_shape[0] rdm_shape = (nbas, nbas, 2) # get mean and standard error mat = np.array(data).reshape(-1, np.prod(rdm_shape)) ym, ye = me2d(mat) dm = ym.reshape(rdm_shape) de = ye.reshape(rdm_shape) rdms = {} rdms['u'] = (dm, de) return rdms