'''
Utilities for handling configuration files
'''
import configparser
import logging
import pathlib
import hashlib
import json
import pandas as pd
import re
import math
import multiprocessing
from deepmerge import always_merger
from copy import deepcopy
from deepdiff import DeepDiff
from pprint import pformat
import numpy as np
# Local imports
import avaframe as avaf
from avaframe.in3Utils import logUtils
from avaframe.in3Utils import fileHandlerUtils as fU
log = logging.getLogger(__name__)
[docs]def getGeneralConfig(nameFile=''):
''' Returns the general configuration for avaframe
returns a configParser object
Parameters
----------
nameFile: pathlib path
optional full path to file, if empty use avaframeCfg from folder one level up
'''
# get path of module
modPath = pathlib.Path(avaf.__file__).resolve().parent
if isinstance(nameFile, pathlib.Path):
localFile = nameFile.parents[0] / ('local_' + nameFile.name)
defaultFile = nameFile
else:
localFile = modPath / 'local_avaframeCfg.ini'
defaultFile = modPath / 'avaframeCfg.ini'
if localFile.is_file():
iniFile = localFile
iniFile = [defaultFile, localFile]
compare = True
elif defaultFile.is_file():
iniFile = defaultFile
compare = False
else:
raise FileNotFoundError('None of the provided cfg files exist ')
# Finally read it
cfg, _ = readCompareConfig(iniFile, 'General', compare)
return cfg
[docs]def getModuleConfig(module, fileOverride='', modInfo=False, toPrint=True, onlyDefault=False):
''' Returns the configuration for a given module
returns a configParser object
module object: module : the calling function provides the already imported
module eg.:
from avaframe.com2AB import com2AB
leads to getModuleConfig(com2AB)
whereas
from avaframe.com2AB import com2AB as c2
leads to getModuleConfig(c2)
OR: pathlib Path to module (python file)
Str: fileOverride : allows for a completely different file location. However note:
missing values from the default cfg will always be added!
modInfo: bool
true if dictionary with info on differences to standard config
onlyDefault: bool
if True, only use the default configuration
Order is as follows:
fileOverride -> local_MODULECfg.ini -> MODULECfg.ini
'''
if isinstance(onlyDefault, bool) == False:
message = 'OnlyDefault parameter is not a boolean but %s' % type(onlyDefault)
log.error(message)
raise TypeError(message)
if isinstance(module, pathlib.Path):
modPath = module.parent
# get filename of module
modName = module.stem
else:
modPath, modName = getModPathName(module)
localFile = modPath / ('local_'+modName+'Cfg.ini')
defaultFile = modPath / (modName+'Cfg.ini')
log.debug('localFile: %s', localFile)
log.debug('defaultFile: %s', defaultFile)
# Decide which one to take
if fileOverride:
fileOverride = fU.checkPathlib(fileOverride)
if fileOverride.is_file():
iniFile = [defaultFile, fileOverride]
compare = True
else:
raise FileNotFoundError('Provided fileOverride does not exist: ' +
str(fileOverride))
elif localFile.is_file() and not onlyDefault:
iniFile = localFile
iniFile = [defaultFile, localFile]
compare = True
elif defaultFile.is_file():
iniFile = defaultFile
compare = False
else:
raise FileNotFoundError('None of the provided cfg files exist ')
# Finally read it
cfg, modDict = readCompareConfig(iniFile, modName, compare, toPrint)
if modInfo:
return cfg, modDict
return cfg
[docs]def getDefaultModuleConfig(module, toPrint=True):
''' Returns the default configuration for a given module
returns a configParser object
module object: module : the calling function provides the already imported
module eg.:
from avaframe.com2AB import com2AB
leads to getModuleConfig(com2AB)
whereas
from avaframe.com2AB import com2AB as c2
leads to getModuleConfig(c2)
'''
# get path to the module and its name
modPath, modName = getModPathName(module)
defaultFile = modPath / (modName+'Cfg.ini')
log.info('Getting the default config for %s', modName)
log.debug('defaultFile: %s', defaultFile)
# Finally read it
cfg, _ = readCompareConfig(defaultFile, modName, compare=False, toPrint=toPrint)
return cfg
[docs]def readCompareConfig(iniFile, modName, compare, toPrint=True):
''' Read and optionally compare configuration files (if a local and default are both provided)
and inform user of the eventual differences. Take the default as reference.
Parameters
----------
iniFile: path to config file
Only one path if compare=False
compare: boolean
True if two paths are provided and a comparison is needed
toPrint: boolean
True (default) to print configuration to terminal. Differences to default
will ALWAYS be printed
Returns
-------
Output: ConfigParser object
contains combined config
modDict: dict
dictionary containing only differences from default
'''
if compare:
log.info('Reading config from: %s and %s' % (iniFile[0], iniFile[1]))
# initialize configparser object to read
defCfg = configparser.ConfigParser()
defCfg.optionxform = str
locCfg = configparser.ConfigParser()
locCfg.optionxform = str
# read default and local parser files
defCfg.read(iniFile[0])
locCfg.read(iniFile[1])
log.debug('Writing cfg for: %s', modName)
# compare to default config and get modification dictionary and config
modDict, modCfg = compareTwoConfigs(defCfg, locCfg, toPrint=toPrint)
else:
log.info('Reading config from: %s', iniFile)
# initialize our final configparser object
modCfg = configparser.ConfigParser()
modCfg.optionxform = str
# Finally read it
modCfg.read(iniFile)
modDict = {}
# Write config to log file
if toPrint:
logUtils.writeCfg2Log(modCfg, modName)
return modCfg, modDict
def _splitDeepDiffValuesChangedItem(inKey, inVal):
""" splits one item of a deepdiff result into section, key, old value, new value
Parameters
-----------
inputKey: str
key of a deepdiff changed_values item
inputValue: dict
value of a deepdiff changed_values item
Returns
--------
section: str
section name of changed item
key: str
key name of changed item
oldVal: str
old value
newVal: str
new value
"""
splitKey = re.findall(r"\['?([A-Za-z0-9_]+)'?\]", inKey)
section = splitKey[0]
key = splitKey[1]
return section, key, inVal['old_value'], inVal['new_value']
[docs]def compareTwoConfigs(defCfg, locCfg, toPrint=False):
""" compare locCfg to defCfg and return a cfg object and modification dict
Values are merged from locCfg to defCfg:
- parameters already in defCfg get the value from locCfg
- additional values in locCfg get added in the resulting Cfg
Parameters
-----------
defCfg: configparser object
default configuration
locCfg: configuration object
configuration that is compared to defCfg
toPrint: bool
flag if config shall be printed to log
Returns
--------
modInfo: dict
dictionary containing only differences from default
cfg: configParser object
contains combined config
"""
log.info('Comparing two configs')
# initialize modInfo and printOutInfo
modInfo = dict()
# Switch to dict
defCfgD = convertConfigParserToDict(defCfg)
locCfgD = convertConfigParserToDict(locCfg)
# Get the difference info
cfgDiff = DeepDiff(defCfgD, locCfgD)
# Combine them, different keys are just added, for the same keys, the
# local (right) value is used
modCfgD = deepcopy(defCfgD)
always_merger.merge(modCfgD, locCfgD)
# Convert to ConfigParser
modCfg = convertDictToConfigParser(modCfgD)
modCfg.optionxform = str
# Merge is done, from here on down it is only printout and modInfo creation
# If toPrint is set, print full configuration:
if toPrint:
for line in pformat(modCfgD, sort_dicts=False).split('\n'):
log.info(line)
# Generate modInfo dictionary for output
if 'values_changed' in cfgDiff:
for key, value in cfgDiff['values_changed'].items():
section, itemKey, defValue, locValue = _splitDeepDiffValuesChangedItem(key, value)
if section not in modInfo:
modInfo[section] = {}
modString = [locValue, defValue]
modInfo[section][itemKey] = modString
# Log changes
log.info('COMPARING TO DEFAULT, THESE CHANGES HAPPENED:')
for line in cfgDiff.pretty().split('\n'):
log.info(line.replace('root',''))
return modInfo, modCfg
[docs]def writeCfgFile(avaDir, module, cfg, fileName='', filePath=''):
""" Save configuration used to text file in Outputs/moduleName/configurationFiles/modName.ini
or optional to filePath and with fileName
Parameters
-----------
avaDir: str
path to avalanche directory
module:
module
cfg: configparser object
configuration settings
fileName: str
name of saved configuration file - optional
filePath: str or pathlib path
path where file should be saved to except file name - optional
"""
# get filename of module
name = pathlib.Path(module.__file__).name
modName = name.split('.')[0]
# set outputs
if filePath == '':
outDir = pathlib.Path(avaDir, 'Outputs', modName, 'configurationFiles')
fU.makeADir(outDir)
else:
if filePath.is_dir():
outDir = pathlib.Path(filePath)
else:
message = '%s is not a valid location for saving cfg file' % str(filePath)
log.error(message)
raise NotADirectoryError(message)
# set path to file
if fileName == '':
fileName = modName
pathToFile = pathlib.Path(outDir, '%s.ini' % (fileName))
# write file
with open(pathToFile, 'w') as conf:
cfg.write(conf)
return pathToFile
[docs]def readCfgFile(avaDir, module='', fileName=''):
""" Read configuration from ini file, if module is provided, module configuration is read from Ouputs,
if fileName is provided configuration is read from fileName
Parameters
-----------
avaDir: str
path to avalanche directory
module:
module
fileName: str
path to file that should be read - optional
Returns
--------
cfg: configParser object
configuration that is from file
"""
# define file that should be read
if fileName != '':
inFile = fileName
elif module != '':
# get module name
name = pathlib.Path(module.__file__).name
modName = name.split('.')[0]
# set input file
inFile = pathlib.Path(avaDir, 'Outputs', '%s_settings.ini' % (modName))
else:
log.error('Please provide either a module or a fileName to read configuration from file')
raise NameError
# read configParser object from input file, case sensitive
cfg = configparser.ConfigParser()
cfg.optionxform = str
cfg.read(inFile)
cfg.optionxform = str
return cfg
[docs]def cfgHash(cfg, typeDict=False):
""" UID hash of a config. Given a configParser object cfg,
or a dictionary - then typeDict=True, returns a uid hash
Parameters
----------
cfg: configParser object
typeDict : dict
dictionary
Returns:
--------
uid: str
uid hash
"""
uidHash = hashlib.shake_256()
if typeDict:
cfgDict = cfg
else:
cfgDict = convertConfigParserToDict(cfg)
jsonDict = json.dumps(cfgDict, sort_keys=True, ensure_ascii=True)
encoded = jsonDict.encode()
uidHash.update(encoded)
uid = uidHash.hexdigest(5)
return uid
[docs]def convertConfigParserToDict(cfg):
""" create dictionary from configparser object """
cfgDict = {}
for section in cfg.sections():
cfgDict[section] = {}
for key, val in cfg.items(section):
cfgDict[section][key] = val
return cfgDict
[docs]def convertDictToConfigParser(cfgDict):
""" create configParser object from dict """
cfg = configparser.ConfigParser()
cfg.optionxform = str
for section in cfgDict:
cfg[section] = cfgDict[section]
return cfg
[docs]def writeDictToJson(inDict, outFilePath):
""" write a dictionary to a json file """
jsonDict = json.dumps(inDict, sort_keys=True, ensure_ascii=True)
f = open(outFilePath, "w")
f.write(jsonDict)
f.close()
[docs]def createConfigurationInfo(avaDir, comModule='com1DFA', standardCfg='', writeCSV=False, specDir=''):
""" Read configurations from all simulations configuration ini files from directory
Parameters
-----------
avaDir: str
path to avalanche directory
standardCfg: dict
standard configuration for module - option
writeCSV: bool
True if configuration dataFrame shall be written to csv file
specDir: str
path to a directory where simulation configuration files can be found - optional
Returns
--------
simDF: pandas DataFrame
DF with all the simulation configurations
"""
# collect all configuration files for this module from directory
if specDir != '':
inDir = pathlib.Path(specDir, 'configurationFiles')
else:
inDir = pathlib.Path(avaDir, 'Outputs', comModule, 'configurationFiles')
configFiles = inDir.glob('*.ini')
if not inDir.is_dir():
message = 'configuration file directory not found: %s' % (inDir)
log.error(message)
raise NotADirectoryError(message)
elif configFiles == []:
message = 'No configuration file found in: %s' % (inDir)
log.error(message)
raise FileNotFoundError(message)
# create confiparser object, convert to json object, write to dataFrame
# append all dataFrames
simDF = ''
for cFile in configFiles:
if 'sourceConfiguration' not in str(cFile):
simName = pathlib.Path(cFile).stem
if '_AF_' in simName:
nameParts = simName.split('_AF_')
infoParts = nameParts[1].split('_')
else:
nameParts = simName.split('_')
infoParts = nameParts[1:]
simHash = infoParts[0]
cfgObject = readCfgFile(avaDir, fileName=cFile)
simDF = appendCgf2DF(simHash, simName, cfgObject, simDF)
# convert numeric parameters to numerics
simDF = convertDF2numerics(simDF)
# add default configuration
if standardCfg != '':
# read default configuration of this module
simDF = appendCgf2DF('current standard', 'current standard', standardCfg, simDF)
# if writeCSV, write dataFrame to csv file
if writeCSV:
writeAllConfigurationInfo(avaDir, simDF, specDir=specDir)
return simDF
[docs]def appendCgf2DF(simHash, simName, cfgObject, simDF):
""" append simulation configuration to the simulation dataframe
only account for sections GENERAL and INPUT
Parameters
-----------
simHash: str
hash of the simulation to append
simName: str
name of the simulation
cfgObject: configParser
configuration coresponding to the simulation
simDF: pandas dataFrame
configuration dataframe
Returns
--------
simDF: pandas DataFrame
DFappended with the new simulation configuration
"""
indexItem = [simHash]
cfgDict = convertConfigParserToDict(cfgObject)
simItemDFGeneral = pd.DataFrame(data=cfgDict['GENERAL'], index=indexItem)
simItemDFInput = pd.DataFrame(data=cfgDict['INPUT'], index=indexItem)
if 'VISUALISATION' in cfgDict:
simItemDFVisualisation = pd.DataFrame(data=cfgDict['VISUALISATION'], index=indexItem)
simItemDF = pd.concat([simItemDFGeneral, simItemDFInput, simItemDFVisualisation], axis=1)
else:
simItemDF = pd.concat([simItemDFGeneral, simItemDFInput], axis=1)
simItemDF = simItemDF.assign(simName=simName)
if isinstance(simDF, str):
simDF = simItemDF
else:
simDF = pd.concat([simDF, simItemDF], axis=0)
return simDF
[docs]def appendTcpu2DF(simHash, tCPU, tCPUDF):
""" append Tcpu dictionary to the dataframe
Parameters
-----------
simHash: str
hash of the simulation corresponding to the tCPU dict to append
tCPU: dict
cpu time dict of the simulation
tCPUDF: pandas dataFrame
tCPU dataframe
Returns
--------
simDF: pandas DataFrame
DFappended with the new simulation configuration
"""
indexItem = [simHash]
tCPUItemDF = pd.DataFrame(data=tCPU, index=indexItem)
if isinstance(tCPUDF, str):
tCPUDF = tCPUItemDF
else:
tCPUDF = pd.concat([tCPUDF, tCPUItemDF], axis=0)
return tCPUDF
[docs]def convertDF2numerics(simDF):
""" convert a string DF to a numerical one
Parameters
-----------
simDF: pandas dataFrame
dataframe
Returns
--------
simDF: pandas DataFrame
"""
for name, values in simDF.items():
simDFTest = simDF[name].str.replace('.', '', regex=False)
# allow for - sign too
simDFTest = simDFTest.replace('-', '', regex=False)
# check for str(np.nan) as these cannot be converted to numerics by pd.to_numeric
# but as friction model parameters are set to nans this is required here
if simDFTest.str.match('nan').any():
simDF = setStrnanToNan(simDF, simDFTest, name)
# also include columns where nan is in first row - so check for any row
if simDFTest.str.isdigit().any() and (name != 'tSteps'):
# problem here is that it finds even if not present in | although not in ini
simDFTest = simDF[name].str.replace('|', '§', regex=False)
if simDFTest.str.contains('§').any() == False:
simDF[name] = pd.to_numeric(simDF[name])
log.debug('Converted to numeric %s' % name)
else:
log.debug('Not converted to numeric: %s' % name)
return simDF
[docs]def setStrnanToNan(simDF, simDFTest, name):
""" set pandas element to np.nan if it is a string nan
Parameters
-----------
simDF: pandas dataFrame
dataframe
simDFTest: pandas series
series of sim DF column named name
replaced "." with " "
name: str
name of pandas dataframe column
Returns
--------
simDF: pandas dataframe
updated pandas dataframe with np.nan values where string nan was
"""
nanIndex = simDFTest.str.match('nan', flags=re.IGNORECASE)
simIndex = simDF.index.values
# loop over each row and use simDF.at to avoid copy vs view warning
for index, nanInd in enumerate(nanIndex):
if nanInd:
simDF.at[simIndex[index], name] = np.nan
log.info('%s for index: %s set to numpy nan' % (name, index))
return simDF
[docs]def readAllConfigurationInfo(avaDir, specDir='', configCsvName='allConfigurations'):
""" Read allConfigurations.csv file as dataFrame from directory
Parameters
-----------
avaDir: str
path to avalanche directory
specDir: str
path to a directory where simulation configuration files can be found - optional
configCsvName: str
name of configuration csv file
Returns
--------
simDF: pandas DataFrame
DF with all the simulation configurations
simDFName: array
simName column of the dataframe
"""
# collect all configuration files for this module from directory
if specDir != '':
inDir = pathlib.Path(specDir, 'configurationFiles')
else:
inDir = pathlib.Path(avaDir, 'Outputs', 'com1DFA', 'configurationFiles')
configFiles = inDir / ('%s.csv' % configCsvName)
if configFiles.is_file():
with open(configFiles, 'rb') as file:
simDF = pd.read_csv(file, index_col=0, keep_default_na=False)
simDFName = simDF['simName'].to_numpy()
else:
simDF = None
simDFName = []
return simDF, simDFName
[docs]def writeAllConfigurationInfo(avaDir, simDF, specDir='', csvName='allConfigurations.csv'):
""" Write cfg configuration to allConfigurations.csv
Parameters
-----------
avaDir: str
path to avalanche directory
simDF: pandas dataFrame
daaframe of the configuration
specDir: str
path to a directory where simulation configuration shal be saved - optional
csvName: str
name of csv file in which to save to - optional
Returns
--------
configFiles: pathlib Path
path where the configuration dataframe was saved
"""
# collect all configuration files for this module from directory
if specDir != '':
inDir = pathlib.Path(specDir, 'configurationFiles')
else:
inDir = pathlib.Path(avaDir, 'Outputs', 'com1DFA', 'configurationFiles')
configFiles = inDir / csvName
simDF.to_csv(configFiles)
return configFiles
[docs]def convertToCfgList(parameterList):
""" convert a list into a string where individual list items are separated by |
Parameters
-----------
parameterList: list
list of parameter values
Returns
---------
parameterString: str
str with parameter values separated by |
"""
if len(parameterList) == 0:
parameterString = ''
else:
parameterString = parameterList[0]
for item in parameterList[1:]:
parameterString = parameterString + '|' + item
return parameterString
[docs]def getNumberOfProcesses(cfgMain, nSims):
""" Determine how many CPU cores to take for parallel tasks
Parameters
-----------
cfgMain: configuration object
the main avaframe configuration
nSims: integer
number of simulations that need to be calculated
Returns
---------
nCPU: int
number of cores to take
"""
maxCPU = multiprocessing.cpu_count()
if cfgMain["MAIN"]["nCPU"] == 'auto':
cpuPerc = float(cfgMain["MAIN"]["CPUPercent"]) / 100.
nCPU = math.floor(maxCPU * cpuPerc)
else:
nCPU = cfgMain['MAIN'].getint('nCPU')
# if number of sims is lower than nCPU
nCPU = min(nCPU, nSims)
log.info("Number of simulations to perform: %s " % nSims)
log.info("Taking %s cpu cores out of maximum of %s cores." % (nCPU, maxCPU))
return nCPU
[docs]def getModPathName(module):
""" get the path and name of a module from imported module
Parameters
------------
module: imported module
Returns
--------
modPath: pathlib path
path to directory where module is located
modName: str
name of module
"""
# get path of module
modPath = pathlib.Path(module.__file__).resolve().parent
# get filename of module
modName = str(pathlib.Path(module.__file__).stem)
return modPath, modName