Source code for qharv.reel.mole

# Author: Yubo "Paul" Yang
# Email: yubo.paul.yang@gmail.com
# dig around for goodies
import subprocess as sp

[docs]def findall(regex, rundir, prepend_star=True, **kwargs): """ find all matching files with regular expression in rundir Args: regex (str): regular expression for file names rundir (str): directory containing the files to be found prepend_star (bool, optional): add '*' to regex, default True Return: list: flist, list of all matching files """ if prepend_star: myregex = '*'+regex else: myregex = regex flist = files_with_regex(myregex, rundir, **kwargs) return flist
[docs]def files_with_regex(regex, rundir, case=True, ftype='f', **kwargs): """ find files with the given suffix in folder rundir rely on bash `find` command Args: regex (str): regular expression for file names rundir (str): directory containing the files to be found case (bool, optional): case sensity, default is True ftype (str, optional): files type, default is regular file 'f' , may be directory 'd' Return: list: flist, a list of filenames matching the given regular expression """ popt = '-path' if not case: popt = '-ipath' # not case sensitive options = [] for key, val in kwargs.items(): options.append('-'+key) options.append(str(val)) cmdl = ['find', rundir] + options + [popt, regex, '-type', ftype] out = sp.check_output(cmdl) flist = out.decode().split('\n')[:-1] return flist
[docs]def find(regex, rundir, **kwargs): """ find the first file that matches the given regular expression RuntimeError will be raised unless exactly one file is found Args: regex (str): regular expression for file names rundir (str): directory containing the files to be found Return: str: filename """ flist = files_with_regex(regex, rundir, **kwargs) if len(flist) != 1: raise RuntimeError('expect 1 but found %d' % len(flist)) return flist[0]
[docs]def clean_path(path): """ remove . and .. from path Args: path (str): file or folder path Return: str: clean path """ segs = path.split('/') segs1 = [seg for seg in segs if seg not in ['.', '..']] path1 = '/'.join(segs1) return path1
[docs]def interpret_qmcpack_fname(fname): """ extract metadata regarding the contents of a file based on its filename. QMCPACK generates files having a pre-determined suffix structure. This function will interpret the last 4 period-separated segments of the suffix. fname examples: qmc.s000.scalar.dat qmc.g000.s000.stat.h5 qmc.g161.s000.config.h5 qmc.g005.s001.cont.xml Args: fname (str): filename, must end in one of ['dat','h5','qmc','xml']. Return: dict: a dictionary of metadata. """ known_extensions = set(['dat', 'h5', 'qmc', 'xml']) tokens = fname.split('.') ext = tokens[-1] # dat,h5,qmc if ext not in known_extensions: raise RuntimeError('unable to interpret %s' % fname) # end if # interpret various pieces of the filename # category cate = tokens[-2] # scalar,stat,config,random,qmc # series index isst = tokens[-3] # s000 iss = int(isst.replace('s', '')) # series index # group index grouped = False # single input is not grouped igt = tokens[-4] # g000 or $prefix ig = 0 # group index suf_list = [isst, cate, ext] if igt.startswith('g') and len(igt) == 4: ig = int(igt.replace('g', '')) suf_list = [igt] + suf_list grouped = True else: # there is no group index pass # keep defaul ig=0, grouped=False # end if # get project id by removing the suffix suffix = '.' + '.'.join(suf_list) prefix = fname.replace(suffix, '') # metadata entry entry = { 'id': prefix, 'group': ig, 'series': iss, 'category': cate, 'ext': ext, 'grouped': grouped } return entry
[docs]def build_qmcpack_fname(entry): """ inverse of interpret_qmcpack_fname Args: entry (dict): a dictionary of meta data, must include ['id','grouped','group','series','category','ext'] in key Return: str: filename """ order = ['id', 'series', 'category', 'ext'] if entry['grouped']: order.insert(1, 'group') # end if tokens = [] for key in order: val = entry[key] # get string representation if key == 'group': val = 'g'+str(val).zfill(3) elif key == 'series': val = 's'+str(val).zfill(3) else: val = str(val) # end if tokens.append(val) # end for fname = '.'.join(tokens) return fname