#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Utilities for working with cfg info
'''
import logging
import numpy as np
import pathlib
import pandas as pd
# Local imports
from avaframe.in3Utils import cfgUtils
import avaframe.in3Utils.fileHandlerUtils as fU
log = logging.getLogger(__name__)
[docs]def insertIntoSimName(name, keys, values, index):
""" Add keys and values to name, in between parts of name split by index
Parameters
-----------
name: str
name to extend
keys: list
list with keys
values: list
list with values
index: str
used to split name
Returns
--------
newName: string
containing newName, with keys and values inserted after index
"""
# Split according to index
splitName = name.split(index + '_')
newPart = '_'
# Loop through keys
for key, value in zip(keys, values):
newPart = newPart + str(key) + '_' + str(value) + '_'
# Put newname back together
try:
newName = splitName[0] + str(index) + newPart + splitName[1]
except IndexError:
log.info(splitName)
msg = 'Some part is missing. SOMENAME_simHash_XXX is expected'
log.error(msg)
raise IndexError(msg)
return(newName)
[docs]def addInfoToSimName(avalancheDir, csvString=''):
""" Add parameterName and value to simNames of simulation dataframe
E.g used as helper routine for renaming layernames in qgis
Parameters
-----------
avalancheDir: str
path to avalanche directory
csvString:
comma separated list with parameter names, as found in com1DFA ini file
eg. 'mu,tau0,tEnd'
Returns
--------
simDF: dataframe
containing index, the parameters and the old and new name
"""
# read the allConfiigurationInfo
simDF, _ = cfgUtils.readAllConfigurationInfo(avalancheDir)
vars = csvString.split(',')
for var in vars:
# get the newName for every row by applying insertIntoSimName on each row
simDF['newName'] = simDF.apply(lambda row: insertIntoSimName(row['simName'],
vars, row[vars], row.name), axis=1)
vars.append('simName')
vars.append('newName')
return(simDF[vars])
[docs]def filterSims(avalancheDir, parametersDict, specDir='', simDF=''):
""" Filter simulations using a list of parameters and a pandas dataFrame of simulation configurations
if ~ is used as a prefix for a parameter - it is filtered according to values that do NOT match the value
provided with the ~Parameter
Parameters
-----------
avalancheDir: str
path to avalanche directory
parametersDict: dict
dictionary with parameter and parameter values for filtering
specDir: str
path to a directory where simulation configuration files can be found - optional
simDF: pandas DataFrame
optional - if simDF already available
Returns
--------
simNameList: list
list of simNames that match filtering criteria
"""
if isinstance(simDF, pd.DataFrame) is False:
# load dataFrame for all configurations
simDF = cfgUtils.createConfigurationInfo(avalancheDir, standardCfg='', writeCSV=False, specDir=specDir)
# filter simulations all conditions in the parametersDict have to be met
if parametersDict != '':
for key, value in parametersDict.items():
# first check if values are valid
if value == '' or value == []:
log.debug('Parameter %s is not used for filtering as no valid value is provided: %s' % (key, value))
# required as np.float64 is False for np.float64 != []
else:
# convert values to list
if not isinstance(value, (list, np.ndarray)):
value = [value]
# remove non matching simulations from simDF
if key in ['relTh', 'entTh', 'secondaryRelTh', '~relTh', '~entTh', '~secondaryRelTh']:
simDF = filterCom1DFAThicknessValues(key, value, simDF)
else:
simDF = removeSimsNotMatching(simDF, key, value)
# list of simNames after filtering
simNameList = simDF['simName'].tolist()
return simNameList
[docs]def removeSimsNotMatching(simDF, key, value):
""" remove simulations from simDF that do not match filtering critera
Parameters
-----------
simDF: pandas dataframe
dataframe with one row per simulation and info on its characteristics, parameters used,..
key: str
name of parameter that shall be used for filtering
value: list
list of parameter values used for filtering
Returns
---------
simDF: pandas dataframe
updated dataframe with only those simulations that match filtering criteria
"""
# check if negation in filtering criteria
notIn = False
if '~' in key:
# only add simulations that do not match the value of ~key
key = key.replace("~", "")
notIn = True
# only keep simulations in simDF that match filtering criteria
if isinstance(value[0], str):
if '<' in value[0]:
simDF = simDF[simDF[key] < float(value[0].split('<')[1])]
elif '>' in value[0]:
simDF = simDF[simDF[key] > float(value[0].split('>')[1])]
else:
if notIn:
simDF = simDF[~simDF[key].isin(value)]
else:
simDF = simDF[simDF[key].isin(value)]
else:
# if float comparison allow for tolerance
filterMask = np.isclose(simDF[key].values.reshape(-1, 1), value, atol=1.e-7, rtol=1.e-8).any(axis=1)
if notIn:
simDF = simDF[~filterMask]
else:
simDF = simDF[filterMask]
return simDF
[docs]def orderSimulations(varParList, ascendingOrder, simDF):
""" Order simulations datadframe using a list of parameters and a flag if in ascending or descending order
Parameters
-----------
varParList: str or list
simulation configuration parameters for ordering simulations
ascendingOrder: bool
True if simulations shall be ordered in ascending order regarding varPar
simDF: pandas dataFrame
dataFrame of simulations (one line per simultaion with fileName, ... and values for parameters in
varParList)
Returns
--------
simDF: pandas dataFrame
sorted dataFrame of simulation results (fileName, ... and values for parameters in varParList)
"""
# make sure that parameters used for ordering are provided as list
if isinstance(varParList, str):
varParList = [varParList]
# sort according to varParList and ascendingOrder flag
# also check that key exists
try:
simDF = simDF.sort_values(by=varParList, ascending=ascendingOrder)
except KeyError as e:
message = 'Choose a valid parameter for sorting the simulations. \'%s\' is not valid.' % e.args[0]
log.error(message)
raise KeyError(message)
return varParList, simDF
[docs]def fetchAndOrderSimFiles(avalancheDir, inputDir, varParList, ascendingOrder, specDir='', resFiles=False):
""" Filter simulations results using a list of parameters and a flag if in ascending or descending order
Parameters
-----------
avalancheDir: str
path to avalanche directory
inputDir: str
path to simulation results
varParList: str or list
simulation configuration parameters for ordering simulations
ascendingOrder: bool
True if simulations shall be ordered in ascending order regarding varPar
specDir: str
path to a directory where simulation configuration files can be found - optional
Returns
--------
dataDF: pandas dataFrame
dataFrame of simulation results (fileName, ... and values for parameters in varParList)
"""
# load dataFrame for all configurations
simDF = cfgUtils.createConfigurationInfo(avalancheDir, specDir=specDir)
if resFiles:
# create dataframe for simulation results in inputDir
dataDF = fU.makeSimDF(inputDir)
if isinstance(varParList, str):
varParList = [varParList]
# append 'simName' for merging of dataframes according to simNames
columnNames = ['simName'] + varParList
# merge varParList parameters as columns to dataDF for matching simNames
dataDFNew = dataDF.merge(simDF[columnNames], left_on='simName',
right_on='simName')
else:
dataDFNew = simDF
varParList, dataDFNew = orderSimulations(varParList, ascendingOrder, dataDFNew)
return dataDFNew
[docs]def orderSimFiles(avalancheDir, inputDir, varParList, ascendingOrder, specDir='', resFiles=False):
""" Filter simulations results using a list of parameters and a flag if in ascending or descending order
Parameters
-----------
avalancheDir: str
path to avalanche directory
inputDir: str
path to simulation results
varParList: str or list
simulation configuration parameters for ordering simulations
ascendingOrder: bool
True if simulations shall be ordered in ascending order regarding varPar
specDir: str
path to a directory where simulation configuration files can be found - optional
Returns
--------
dataDF: pandas dataFrame
dataFrame of simulation results (fileName, ... and values for parameters in varParList)
"""
# load dataFrame for all configurations
simDF = cfgUtils.createConfigurationInfo(avalancheDir, specDir=specDir)
# make sure that parameters used for ordering are provided as list
if isinstance(varParList, str):
varParList = [varParList]
if resFiles:
# create dataframe for simulation results in inputDir
dataDF = fU.makeSimDF(inputDir)
# append 'simName' for merging of dataframes according to simNames
columnNames = ['simName'] + varParList
# merge varParList parameters as columns to dataDF for matching simNames
dataDFNew = dataDF.merge(simDF[columnNames], left_on='simName',
right_on='simName')
else:
dataDFNew = simDF
# sort according to varParList and ascendingOrder flag
dataDFNew = dataDFNew.sort_values(by=varParList, ascending=ascendingOrder)
return dataDFNew
[docs]def filterCom1DFAThicknessValues(key, value, simDF):
""" thickness settings different if read from shpfile - requires more complex filtering
if read from shp - thickness values are provided per feature!!
for example relTh = '' but relTh0 = 1 is appended for feature with id 0, relTh1 for feature
with id 1, etc.
Parameters
-----------
key: str
name of parameter
value: list
list of values used for filtering
simDF: pandas dataframe
configuration info for each simulation
Returns
--------
simDF: pandas data frame
updated dataframe
"""
# check if filter for values that do NOT match criteria
notIn = False
if '~' in key:
key = key.split('~')[1]
notIn = True
# create required parameters for searching
thFlag = key + 'FromShp'
thId = key + 'Id'
thThickness = key + 'Thickness'
thPercentVariation = key + 'PercentVariation'
# append identifier if simulation matches thickness filter criteria
simDF['toBeAdded'] = False
# initialize list for thickness parameter names (according to thickness configuration -
# e.g. mutiple features)
allThNames = []
# loop over simDF and set identifier if filter criteria are matched
for simHash, simDFrow in simDF.iterrows():
if simDFrow[thFlag] == 'True':
# inititialise thickness ids and thickness parameter names if thickness read from shp
thIdList = str(simDFrow[thId]).split('|')
thNames = [(key + id) for id in thIdList]
allThNames = allThNames + thNames
log.warning('Filtering applied for %s - multiple features found as %s was read \
from shp file - only simulations where all features match %s will be added' %
(key, key, value))
else:
# if thickness read from ini add thickness parameter name
thIdList = [0]
thNames = [key]
allThNames = allThNames + [key]
# check if filter criteria are met by thickness parameters for the sim in simDFrow
for val in value:
validationString = fetchValidationString(val, thIdList, thNames, simDFrow)
# if we set new column value to True
if validationString:
simDF.loc[simHash, 'toBeAdded'] = True
# get a list with all thickness parameters included in search
allThNames = list(set(allThNames))
if notIn:
# return all sims that do not match filter criteria
simDF = simDF[simDF['toBeAdded'] == False]
else:
# return all sims that do match filter criteria
simDF = simDF[simDF['toBeAdded'] == True]
log.info('simulations for %s found with values: %s' % (key, simDF[allThNames]))
return simDF
[docs]def fetchValidationString(val, thIdList, thNames, simDFrow):
""" create a validation string to be checked if simDFrow matches filtering criteria (given by val)
Parameters
-----------
val: str, float
value to be checked
thIdList: list
list with thickness feature ids
thNames: list
list with thickness feature names
simDFrow: pandas dataframe row
parameters of simulation
Returns
--------
validationString: bool
bool if simulation given by simDFrow matches filtering criteria
"""
if isinstance(val, str):
if '<' in val:
validationString = (simDFrow[thNames].values < [float(val.split('<')[1])] * len(thIdList)).all()
elif '>' in val:
validationString = (simDFrow[thNames].values > [float(val.split('>')[1])] * len(thIdList)).all()
else:
validationString = (simDFrow[thNames].values == [val] * len(thIdList)).all()
return validationString
[docs]def applyCfgOverride(cfgToOverride, cfgWithOverrideParameters, module, addModValues=False):
""" override configuration parameter values with the values provided in cfgWithOverrideParameters[modName_override]
if addModValues True update the cfgWithOverrideParameters with the values for all parameters that are not
provided in the override parameters
Parameters
----------
cfgToOverride: configparer object
configuration of module of interest
cfgWithOverrideParameters: configparser object
full configuration settings containing a section modName_override with parameter values
that should be overriden in the cfgToOverride
module
module of the cfgToOverride configuration
addModValues: bool
if True add all parameters from cfgToOverride module to cfgWithOverrideParameters override
section
Returns
--------
cfgToOverride: configparser object
updated configuration of module
cfgWithOverrideParameters: configparser object
updated configuration of module
"""
# get path of module
modPath = pathlib.Path(module.__file__).resolve().parent
# get filename of module
modName = str(pathlib.Path(module.__file__).stem)
# create list with parameters that become overridden
overrideParameters = cfgWithOverrideParameters['%s_override' % modName]
overrideKeys = [item for item in overrideParameters]
overrideKeys.remove('defaultConfig')
# loop through sections of the configuration of the module
foundKeys = []
for section in cfgToOverride.sections():
for key in overrideKeys:
if cfgToOverride.has_option(section, key):
cfgToOverride.set(section, key, overrideParameters[key])
log.info('Override %s parameter: %s in section: %s with %s' % (modName, key, section, str(overrideParameters[key])))
foundKeys.append(key)
if addModValues:
for section in cfgToOverride.sections():
for key in cfgToOverride[section]:
if key not in overrideKeys:
# if no override value is provided add actual configuration parameter to override section
# useful for reproduction if onlyDefault = False and modName config was read from local
cfgWithOverrideParameters['%s_override' % modName][key] = cfgToOverride[section][key]
log.debug('Added %s: %s to override parameters ' % (key, cfgToOverride[section][key]))
# log warning if parameter in override was not found in modName configuration
notOverride = set(foundKeys).symmetric_difference(set(overrideKeys))
for item in notOverride:
if item != 'defaultConfig':
log.warning('Additional Key [\'%s\'] in section %s_override is ignored.' % (item, modName))
return cfgToOverride, cfgWithOverrideParameters