Source code for in3Utils.cfgHandling

#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
    Utilities for working with cfg info
'''

import logging
import numpy as np
import pathlib
import pandas as pd

# Local imports
from avaframe.in3Utils import cfgUtils
import avaframe.in3Utils.fileHandlerUtils as fU

log = logging.getLogger(__name__)


[docs]def insertIntoSimName(name, keys, values, index): """ Add keys and values to name, in between parts of name split by index Parameters ----------- name: str name to extend keys: list list with keys values: list list with values index: str used to split name Returns -------- newName: string containing newName, with keys and values inserted after index """ # Split according to index splitName = name.split(index + '_') newPart = '_' # Loop through keys for key, value in zip(keys, values): newPart = newPart + str(key) + '_' + str(value) + '_' # Put newname back together try: newName = splitName[0] + str(index) + newPart + splitName[1] except IndexError: log.info(splitName) msg = 'Some part is missing. SOMENAME_simHash_XXX is expected' log.error(msg) raise IndexError(msg) return(newName)
[docs]def addInfoToSimName(avalancheDir, csvString=''): """ Add parameterName and value to simNames of simulation dataframe E.g used as helper routine for renaming layernames in qgis Parameters ----------- avalancheDir: str path to avalanche directory csvString: comma separated list with parameter names, as found in com1DFA ini file eg. 'mu,tau0,tEnd' Returns -------- simDF: dataframe containing index, the parameters and the old and new name """ # read the allConfiigurationInfo simDF, _ = cfgUtils.readAllConfigurationInfo(avalancheDir) vars = csvString.split(',') for var in vars: # get the newName for every row by applying insertIntoSimName on each row simDF['newName'] = simDF.apply(lambda row: insertIntoSimName(row['simName'], vars, row[vars], row.name), axis=1) vars.append('simName') vars.append('newName') return(simDF[vars])
[docs]def filterSims(avalancheDir, parametersDict, specDir='', simDF=''): """ Filter simulations using a list of parameters and a pandas dataFrame of simulation configurations if ~ is used as a prefix for a parameter - it is filtered according to values that do NOT match the value provided with the ~Parameter Parameters ----------- avalancheDir: str path to avalanche directory parametersDict: dict dictionary with parameter and parameter values for filtering specDir: str path to a directory where simulation configuration files can be found - optional simDF: pandas DataFrame optional - if simDF already available Returns -------- simNameList: list list of simNames that match filtering criteria """ if isinstance(simDF, pd.DataFrame) is False: # load dataFrame for all configurations simDF = cfgUtils.createConfigurationInfo(avalancheDir, standardCfg='', writeCSV=False, specDir=specDir) # filter simulations all conditions in the parametersDict have to be met if parametersDict != '': for key, value in parametersDict.items(): # first check if values are valid if value == '' or value == []: log.debug('Parameter %s is not used for filtering as no valid value is provided: %s' % (key, value)) # required as np.float64 is False for np.float64 != [] else: # convert values to list if not isinstance(value, (list, np.ndarray)): value = [value] # remove non matching simulations from simDF if key in ['relTh', 'entTh', 'secondaryRelTh', '~relTh', '~entTh', '~secondaryRelTh']: simDF = filterCom1DFAThicknessValues(key, value, simDF) else: simDF = removeSimsNotMatching(simDF, key, value) # list of simNames after filtering simNameList = simDF['simName'].tolist() return simNameList
[docs]def removeSimsNotMatching(simDF, key, value): """ remove simulations from simDF that do not match filtering critera Parameters ----------- simDF: pandas dataframe dataframe with one row per simulation and info on its characteristics, parameters used,.. key: str name of parameter that shall be used for filtering value: list list of parameter values used for filtering Returns --------- simDF: pandas dataframe updated dataframe with only those simulations that match filtering criteria """ # check if negation in filtering criteria notIn = False if '~' in key: # only add simulations that do not match the value of ~key key = key.replace("~", "") notIn = True # only keep simulations in simDF that match filtering criteria if isinstance(value[0], str): if '<' in value[0]: simDF = simDF[simDF[key] < float(value[0].split('<')[1])] elif '>' in value[0]: simDF = simDF[simDF[key] > float(value[0].split('>')[1])] else: if notIn: simDF = simDF[~simDF[key].isin(value)] else: simDF = simDF[simDF[key].isin(value)] else: # if float comparison allow for tolerance filterMask = np.isclose(simDF[key].values.reshape(-1, 1), value, atol=1.e-7, rtol=1.e-8).any(axis=1) if notIn: simDF = simDF[~filterMask] else: simDF = simDF[filterMask] return simDF
[docs]def orderSimulations(varParList, ascendingOrder, simDF): """ Order simulations datadframe using a list of parameters and a flag if in ascending or descending order Parameters ----------- varParList: str or list simulation configuration parameters for ordering simulations ascendingOrder: bool True if simulations shall be ordered in ascending order regarding varPar simDF: pandas dataFrame dataFrame of simulations (one line per simultaion with fileName, ... and values for parameters in varParList) Returns -------- simDF: pandas dataFrame sorted dataFrame of simulation results (fileName, ... and values for parameters in varParList) """ # make sure that parameters used for ordering are provided as list if isinstance(varParList, str): varParList = [varParList] # sort according to varParList and ascendingOrder flag # also check that key exists try: simDF = simDF.sort_values(by=varParList, ascending=ascendingOrder) except KeyError as e: message = 'Choose a valid parameter for sorting the simulations. \'%s\' is not valid.' % e.args[0] log.error(message) raise KeyError(message) return varParList, simDF
[docs]def fetchAndOrderSimFiles(avalancheDir, inputDir, varParList, ascendingOrder, specDir='', resFiles=False): """ Filter simulations results using a list of parameters and a flag if in ascending or descending order Parameters ----------- avalancheDir: str path to avalanche directory inputDir: str path to simulation results varParList: str or list simulation configuration parameters for ordering simulations ascendingOrder: bool True if simulations shall be ordered in ascending order regarding varPar specDir: str path to a directory where simulation configuration files can be found - optional Returns -------- dataDF: pandas dataFrame dataFrame of simulation results (fileName, ... and values for parameters in varParList) """ # load dataFrame for all configurations simDF = cfgUtils.createConfigurationInfo(avalancheDir, specDir=specDir) if resFiles: # create dataframe for simulation results in inputDir dataDF = fU.makeSimDF(inputDir) if isinstance(varParList, str): varParList = [varParList] # append 'simName' for merging of dataframes according to simNames columnNames = ['simName'] + varParList # merge varParList parameters as columns to dataDF for matching simNames dataDFNew = dataDF.merge(simDF[columnNames], left_on='simName', right_on='simName') else: dataDFNew = simDF varParList, dataDFNew = orderSimulations(varParList, ascendingOrder, dataDFNew) return dataDFNew
[docs]def orderSimFiles(avalancheDir, inputDir, varParList, ascendingOrder, specDir='', resFiles=False): """ Filter simulations results using a list of parameters and a flag if in ascending or descending order Parameters ----------- avalancheDir: str path to avalanche directory inputDir: str path to simulation results varParList: str or list simulation configuration parameters for ordering simulations ascendingOrder: bool True if simulations shall be ordered in ascending order regarding varPar specDir: str path to a directory where simulation configuration files can be found - optional Returns -------- dataDF: pandas dataFrame dataFrame of simulation results (fileName, ... and values for parameters in varParList) """ # load dataFrame for all configurations simDF = cfgUtils.createConfigurationInfo(avalancheDir, specDir=specDir) # make sure that parameters used for ordering are provided as list if isinstance(varParList, str): varParList = [varParList] if resFiles: # create dataframe for simulation results in inputDir dataDF = fU.makeSimDF(inputDir) # append 'simName' for merging of dataframes according to simNames columnNames = ['simName'] + varParList # merge varParList parameters as columns to dataDF for matching simNames dataDFNew = dataDF.merge(simDF[columnNames], left_on='simName', right_on='simName') else: dataDFNew = simDF # sort according to varParList and ascendingOrder flag dataDFNew = dataDFNew.sort_values(by=varParList, ascending=ascendingOrder) return dataDFNew
[docs]def filterCom1DFAThicknessValues(key, value, simDF): """ thickness settings different if read from shpfile - requires more complex filtering if read from shp - thickness values are provided per feature!! for example relTh = '' but relTh0 = 1 is appended for feature with id 0, relTh1 for feature with id 1, etc. Parameters ----------- key: str name of parameter value: list list of values used for filtering simDF: pandas dataframe configuration info for each simulation Returns -------- simDF: pandas data frame updated dataframe """ # check if filter for values that do NOT match criteria notIn = False if '~' in key: key = key.split('~')[1] notIn = True # create required parameters for searching thFlag = key + 'FromShp' thId = key + 'Id' thThickness = key + 'Thickness' thPercentVariation = key + 'PercentVariation' # append identifier if simulation matches thickness filter criteria simDF['toBeAdded'] = False # initialize list for thickness parameter names (according to thickness configuration - # e.g. mutiple features) allThNames = [] # loop over simDF and set identifier if filter criteria are matched for simHash, simDFrow in simDF.iterrows(): if simDFrow[thFlag] == 'True': # inititialise thickness ids and thickness parameter names if thickness read from shp thIdList = str(simDFrow[thId]).split('|') thNames = [(key + id) for id in thIdList] allThNames = allThNames + thNames log.warning('Filtering applied for %s - multiple features found as %s was read \ from shp file - only simulations where all features match %s will be added' % (key, key, value)) else: # if thickness read from ini add thickness parameter name thIdList = [0] thNames = [key] allThNames = allThNames + [key] # check if filter criteria are met by thickness parameters for the sim in simDFrow for val in value: validationString = fetchValidationString(val, thIdList, thNames, simDFrow) # if we set new column value to True if validationString: simDF.loc[simHash, 'toBeAdded'] = True # get a list with all thickness parameters included in search allThNames = list(set(allThNames)) if notIn: # return all sims that do not match filter criteria simDF = simDF[simDF['toBeAdded'] == False] else: # return all sims that do match filter criteria simDF = simDF[simDF['toBeAdded'] == True] log.info('simulations for %s found with values: %s' % (key, simDF[allThNames])) return simDF
[docs]def fetchValidationString(val, thIdList, thNames, simDFrow): """ create a validation string to be checked if simDFrow matches filtering criteria (given by val) Parameters ----------- val: str, float value to be checked thIdList: list list with thickness feature ids thNames: list list with thickness feature names simDFrow: pandas dataframe row parameters of simulation Returns -------- validationString: bool bool if simulation given by simDFrow matches filtering criteria """ if isinstance(val, str): if '<' in val: validationString = (simDFrow[thNames].values < [float(val.split('<')[1])] * len(thIdList)).all() elif '>' in val: validationString = (simDFrow[thNames].values > [float(val.split('>')[1])] * len(thIdList)).all() else: validationString = (simDFrow[thNames].values == [val] * len(thIdList)).all() return validationString
[docs]def applyCfgOverride(cfgToOverride, cfgWithOverrideParameters, module, addModValues=False): """ override configuration parameter values with the values provided in cfgWithOverrideParameters[modName_override] if addModValues True update the cfgWithOverrideParameters with the values for all parameters that are not provided in the override parameters Parameters ---------- cfgToOverride: configparer object configuration of module of interest cfgWithOverrideParameters: configparser object full configuration settings containing a section modName_override with parameter values that should be overriden in the cfgToOverride module module of the cfgToOverride configuration addModValues: bool if True add all parameters from cfgToOverride module to cfgWithOverrideParameters override section Returns -------- cfgToOverride: configparser object updated configuration of module cfgWithOverrideParameters: configparser object updated configuration of module """ # get path of module modPath = pathlib.Path(module.__file__).resolve().parent # get filename of module modName = str(pathlib.Path(module.__file__).stem) # create list with parameters that become overridden overrideParameters = cfgWithOverrideParameters['%s_override' % modName] overrideKeys = [item for item in overrideParameters] overrideKeys.remove('defaultConfig') # loop through sections of the configuration of the module foundKeys = [] for section in cfgToOverride.sections(): for key in overrideKeys: if cfgToOverride.has_option(section, key): cfgToOverride.set(section, key, overrideParameters[key]) log.info('Override %s parameter: %s in section: %s with %s' % (modName, key, section, str(overrideParameters[key]))) foundKeys.append(key) if addModValues: for section in cfgToOverride.sections(): for key in cfgToOverride[section]: if key not in overrideKeys: # if no override value is provided add actual configuration parameter to override section # useful for reproduction if onlyDefault = False and modName config was read from local cfgWithOverrideParameters['%s_override' % modName][key] = cfgToOverride[section][key] log.debug('Added %s: %s to override parameters ' % (key, cfgToOverride[section][key])) # log warning if parameter in override was not found in modName configuration notOverride = set(foundKeys).symmetric_difference(set(overrideKeys)) for item in notOverride: if item != 'defaultConfig': log.warning('Additional Key [\'%s\'] in section %s_override is ignored.' % (item, modName)) return cfgToOverride, cfgWithOverrideParameters