"""
plot statistics of simulations
"""
# load python modules
import numpy as np
import logging
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pathlib
# local imports
import avaframe.out3Plot.plotUtils as pU
import avaframe.in3Utils.fileHandlerUtils as fU
import avaframe.in2Trans.ascUtils as IOf
from avaframe.in3Utils import cfgHandling
import avaframe.in1Data.getInput as gI
import avaframe.com1DFA.deriveParameterSet as dP
import avaframe.out3Plot.plotUtils as pU
import avaframe.in3Utils.geoTrans as gT
# create local logger
log = logging.getLogger(__name__)
[docs]def plotValuesScatter(peakValues, resType1, resType2, cfg, avalancheDir, statsMeasure='max', flagShow=False):
""" Produce scatter plot of statistical measures (eg. max values) (resType1 und resType2),
for one set of simulations or multiple
Parameters
-----------
peakDictList: dict
peakValues dictionary that contain max values of peak parameters and parameter variation info
resType1: str
result parameter 1, 'ppr', 'pft', 'pfv'
resType2: str
result parameter 1, 'ppr', 'pft', 'pfv'
cfg: dict
configuration, for now contains output location and varPar: parameter that is varied
to perfom a set of simulations
statsMeasure: str
statistical measure for plotting, options: max, mean, min, std
flagShow: bool
if True show plot
"""
varPar = cfg['varPar']
# extract values from dictionaries
varVal = []
values1 = []
values2 = []
for key in peakValues:
values1.append(peakValues[key][resType1][statsMeasure])
values2.append(peakValues[key][resType2][statsMeasure])
varVal.append(peakValues[key]['varPar'])
log.info('Number of simulations is: %d' % (len(varVal)))
# Get name and units for resTypes and parameter variation to annotate plots
name1 = pU.cfgPlotUtils['name%s' % resType1]
name2 = pU.cfgPlotUtils['name%s' % resType2]
unit1 = pU.cfgPlotUtils['unit%s' % resType1]
unit2 = pU.cfgPlotUtils['unit%s' % resType2]
nameVar = cfg['varParName']
unitVar = cfg['varParUnit']
# if the varied parameter used for colorcoding is a string
if isinstance(varVal[0], str):
itemsList, ticksList, varValV = pU.getColorbarTicksForStrings(varVal)
else:
varValV = np.array(varVal)
itemsList = ''
# load variation colormap
cmap, _, ticks, norm = pU.makeColorMap(pU.cmapVar, np.amin(varValV), np.amax(varValV), continuous=True)
if isinstance(varVal[0], str):
ticks = ticksList
fig, ax = plt.subplots()
plt.title('%s vs. %s' % (name1, name2))
# set range and steps of colormap
cc = varValV
sc = ax.scatter(values1, values2, c=cc, cmap=cmap)
ax.set_xlabel('%s %s [%s]' % (statsMeasure, name1, unit1))
ax.set_ylabel('%s %s [%s]' % (statsMeasure, name2, unit2))
pU.addColorBar(sc, ax, ticks, unitVar, nameVar, tickLabelsList=itemsList)
pU.putAvaNameOnPlot(ax, avalancheDir)
outDir = cfg['outDir']
plotName = 'Scatter_%s_vs_%s_dist%s_%s' % (resType1, resType2, cfg['distType'], cfg['scenario'])
# save and or show figure
plotPath = pU.saveAndOrPlot({'pathResult': outDir}, plotName, fig)
[docs]def plotValuesScatterHist(peakValues, resType1, resType2, cfg, avalancheDir,
statsMeasure='max', flagShow=False, flagHue=False):
""" Produce scatter and marginal kde plot of max values, for one set of simulations or multiple
Parameters
-----------
peakValues: dict
peakValues dictionary that contain max values of peak parameters and parameter variation info
resType1: str
result parameter 1, 'ppr', 'pft', 'pfv'
resType2: str
result parameter 1, 'ppr', 'pft', 'pfv'
cfg: dict
configuration, for now contains output location and varPar: parameter that is varied
to perfom a set of simulations
statsMeasure: str
statistical measure for plotting, options: max, mean, min, std
flagShow: bool
if True show plot
"""
varPar = cfg['varPar']
# extract values from dictionaries
varVal = []
values1 = []
values2 = []
scenario = []
for key in peakValues:
values1.append(peakValues[key][resType1][statsMeasure])
values2.append(peakValues[key][resType2][statsMeasure])
varVal.append(peakValues[key]['varPar'])
if 'scenario' in peakValues[key] and flagHue:
scenario.append(peakValues[key]['scenario'])
log.info('Number of simulations is: %d' % (len(varVal)))
# Get name and units for resTypes and parameter variation to annotate plots
name1 = pU.cfgPlotUtils['name%s' % resType1]
name2 = pU.cfgPlotUtils['name%s' % resType2]
unit1 = pU.cfgPlotUtils['unit%s' % resType1]
unit2 = pU.cfgPlotUtils['unit%s' % resType2]
nameVar = cfg['varParName']
unitVar = cfg['varParUnit']
varValV = np.array(varVal)
title1 = statsMeasure + ' ' + name1+' [' + unit1 + ']'
title2 = statsMeasure + ' ' + name2+' [' + unit2 + ']'
if flagHue:
# create pandas data frame reqiured for seaborn jointplot
maxVals = {name1: values1, name2: values2, 'scenario': scenario}
maxData = pd.DataFrame(maxVals)
fig1 = sns.jointplot(data=maxData, x=name1, y=name2, hue="scenario")
else:
fig1 = sns.jointplot(x=values1, y=values2)
# add title and text box
fig1.ax_joint.set_xlabel(title1)
fig1.ax_joint.set_ylabel(title2)
pU.putAvaNameOnPlot(fig1.ax_joint, avalancheDir)
# save fig
outDir = cfg['outDir']
plotName = 'Scatterkde_%s_vs_%s_dist%s_%s' % (resType1, resType2, cfg['distType'], cfg['scenario'])
# save and or show figure
plotPath = pU.saveAndOrPlot({'pathResult': outDir}, plotName, fig1.figure)
[docs]def plotHistCDFDiff(dataDiffPlot, ax1, ax2, insert='True', title=['', '']):
""" Produce histogram and CDF plot of the raster difference of two simulations
Parameters
-----------
dataDiffPlot: 2D numpy array
raster of the difference of the two simulations
ax1: axes
axes for the histogram plot
ax2: axes
axes for the CDF plot
insert: boolean
true if the plots are in inserted axes (size of the lables is then smaller)
title: list
if not inserts, title for the plots
"""
# Difference between datasets
diffMax = np.nanmax(dataDiffPlot)
diffMin = np.nanmin(dataDiffPlot)
sortedDiffPlot = np.sort(np.abs(dataDiffPlot))
nSample = np.size(sortedDiffPlot)
hist = np.array(range(nSample))/float(nSample)
ticks = []
centile1 = float(pU.cfg['centile1'])
centile2 = float(pU.cfg['centile2'])
for val in [centile1, centile2]:
ind = np.searchsorted(hist, val)
ind = min(ind, np.size(hist)-1)
ax1.plot(sortedDiffPlot, hist)
ax1.hlines(hist[ind], 0, sortedDiffPlot[ind], linestyles='--', linewidths=0.5)
ax1.vlines(sortedDiffPlot[ind], 0, hist[ind], linestyles='--', linewidths=0.5)
ticks.append(sortedDiffPlot[ind])
ax2.set_xlim([-sortedDiffPlot[ind], sortedDiffPlot[ind]])
width = diffMax - diffMin
stepsInterval = int(pU.cfg['steps2Centile2'])
stepWidth = 2*sortedDiffPlot[ind]/stepsInterval # stepsInterval bins in the [-2sigma,+2sigma] interval
bins = int(width/stepWidth)
# reduce bins to a sensible size
if bins > 1000:
bins = 1000
ax2.hist(dataDiffPlot, bins=bins, density=True, histtype="stepfilled")
ax2.get_yaxis().set_ticks([])
if insert:
ax2.tick_params(axis='x', which='major', labelsize=8, rotation=45)
ax2.set_title(title[0])
ticks.append(np.floor(np.nanmax(np.abs(dataDiffPlot))))
ax1.set_xticks(ticks)
if insert:
ax1.tick_params(axis='both', which='major', labelsize=8, rotation=45)
ax1.set_title(title[1])
return ticks
[docs]def plotProbMap(avaDir, inDir, cfgFull, demPlot=False):
""" plot probability maps including contour lines
Parameters
----------
avaDir: str
path to avalanche directory
inDir: str
path to datasets that shall be plotted
cfgFull: configParser object
configuration settings for probAna
keys used: name, cmapType, levels, unit
demPlot: bool
if True plot dem in background with contourlines for elevation that is found in avaDir/Inputs
"""
# configuration settings
cfg = cfgFull['PLOT']
avaName = pathlib.PurePath(avaDir).name
# fetch probabiltiy map datasets in inDir
dataFiles = list(inDir.glob('*.asc'))
if dataFiles == []:
message = 'No probability map dataset found in: %s' % (inDir)
log.error(message)
raise FileNotFoundError(message)
log.info('Probability maps found for generating plots: %d' % len(dataFiles))
# load levels and define colors
levels = fU.splitIniValueToArraySteps(cfg['levels'])
multLabel = False
if len(levels) > 2:
multLabel = True
cmapType = pU.cmapGreys1
colorBackGround = 'seashell'
else:
cmapType = pU.colorMaps[cfg['cmapType']]
colorsP = ['black', 'white']
colorBackGround = 'gainsboro'
if len(levels) == 1 and levels[0] > 0.5:
colorsP = ['white']
unit = cfg['unit']
# set colors for contourlines according to levels
labels = []
for lev in levels:
labels.append('%.0f %%' % (lev*100))
# loop over all datasets and create plots
for data in dataFiles:
raster = IOf.readRaster(data, noDataToNan=True)
dataPlot = raster['rasterData']
header = IOf.readASCheader(data)
cellSize = header['cellsize']
# load correspoding DEM check for matching cellsize
demInputs = gI.getDEMPath(avaDir)
cfgDEM = {'GENERAL': {'avalancheDir': avaDir, 'meshCellSize': header['cellsize'],
'meshCellSizeThreshold': cfgFull['PLOT']['meshCellSizeThreshold']}}
pathDem = dP.checkRasterMeshSize(cfgDEM, demInputs, onlySearch=True)
if pathDem != '':
demFile = pathlib.Path(cfgDEM['GENERAL']['avalancheDir'], 'Inputs', pathDem)
demData = IOf.readRaster(demFile, noDataToNan=True)
demField = demData['rasterData']
else:
log.warning('No matching DEM file found for cellSize of %s - skipping DEM plot and zoom plot' % header['cellsize'])
demPlot = False
# Set dimensions of plots
ny = dataPlot.shape[0]
nx = dataPlot.shape[1]
Ly = ny*cellSize
Lx = nx*cellSize
# constrain plot to where there is data
rowsMin, rowsMax, colsMin, colsMax, dataConstrained = pU.constrainPlotsToData(dataPlot, cellSize,
extentOption=False, constrainedData=True)
dataPlot = np.ma.masked_where(dataConstrained == 0.0, dataConstrained)
# create figure and add title
fig = plt.figure(figsize=(pU.figW*2, pU.figH))
fullTitle = '%s %s based on %s $>$ %s %s' % (avaName,
cfg['name'],
cfgFull['GENERAL']['peakVar'],
cfgFull['GENERAL']['peakLim'],
cfgFull['GENERAL']['unit'])
suptitle = fig.suptitle(fullTitle, fontsize=14, color='0.5')
ax1 = fig.add_subplot(121)
# set extent in meters using cellSize
rowsMinPlot = rowsMin*cellSize
rowsMaxPlot = (rowsMax+1)*cellSize
colsMinPlot = colsMin*cellSize
colsMaxPlot = (colsMax+1)*cellSize
extent = [colsMinPlot, colsMaxPlot, rowsMinPlot, rowsMaxPlot]
# for now continuous color map is desired
cmap, _, ticks, norm = pU.makeColorMap(cmapType, np.nanmin(dataPlot), np.nanmax(dataPlot),
continuous=True)
if demPlot:
# also constrain DEM to data constrained
demConstrained = demField[rowsMin:rowsMax+1, colsMin:colsMax+1]
# add DEM hillshade with contour lines
ls, CS = pU.addHillShadeContours(ax1, demConstrained, cellSize, extent)
dataPlot = np.ma.masked_where(dataConstrained == 0.0, dataConstrained)
cmap.set_bad(alpha=0)
else:
cmap.set_bad(colorBackGround)
# add data plot
im1 = ax1.imshow(dataPlot, cmap=cmap, extent=extent, origin='lower', aspect='equal', norm=norm,
zorder=3)
# create meshgrid for contour plot also constrained to where there is data
xx = np.arange(colsMinPlot, colsMaxPlot, cellSize)
yy = np.arange(rowsMinPlot, rowsMaxPlot, cellSize)
X, Y = np.meshgrid(xx, yy)
# add contourlines for levels
if multLabel:
CS = ax1.contour(X, Y, dataPlot, levels=levels, cmap=pU.cmapT.reversed(), linewidths=1, zorder=4)
else:
CS = ax1.contour(X, Y, dataPlot, levels=levels, colors=colorsP, linewidths=1, zorder=4)
for i in range(len(labels)):
CS.collections[i].set_label(labels[i])
pU.addColorBar(im1, ax1, ticks, unit)
title = str('%s' % cfg['name'])
ax1.set_xlabel('x [m]')
ax1.set_ylabel('y [m]')
# add zoom plot of runout area
ax2 = fig.add_subplot(122)
if demPlot:
# determine zoom in runout area
dataCut, xOrigin, yOrigin = pU.constrainToMinElevation(avaDir, raster['rasterData'], cfg, cellSize,
extentOption=True, providedDEM=demData)
# constrain to where there is data
rowsMinPlot, rowsMaxPlot, colsMinPlot, colsMaxPlot, dataCutConstrained = pU.constrainPlotsToData(dataCut,
cellSize, extentOption=True, constrainedData=True, buffer=cfg.getfloat('constrainBuffer'))
dataCutConstrained = np.ma.masked_where(dataCutConstrained == 0.0, dataCutConstrained)
# set extent of zoom Plot
x0 = xOrigin + colsMinPlot
x1 = xOrigin + colsMaxPlot
y0 = yOrigin + rowsMinPlot
y1 = yOrigin + rowsMaxPlot
# add plot
im2 = ax2.imshow(dataCutConstrained, cmap=cmap, extent=[x0, x1, y0, y1],
origin='lower', norm=norm, aspect='equal')
# create meshgrid for contour plot also constrained to where there is data
xx2 = np.linspace(x0, x1, dataCutConstrained.shape[1])
yy2 = np.linspace(y0, y1, dataCutConstrained.shape[0])
X2, Y2 = np.meshgrid(xx2, yy2)
# add contourlines for levels
if multLabel:
CS2 = ax2.contour(X2, Y2, dataCutConstrained, levels=levels,
cmap=pU.cmapT.reversed(), linewidths=1)
else:
CS2 = ax2.contour(X2, Y2, dataCutConstrained, levels=levels,
colors=colorsP, linewidths=1)
# Get the handles for the legend elements
handles, _ = CS2.legend_elements()
ax2.set_xlabel('x [m]')
ax2.set_ylabel('y [m]')
plt.legend(handles, labels, facecolor='black', framealpha=0.04)
pU.addColorBar(im2, ax2, ticks, unit)
outDir = inDir / 'plots'
outFileName = 'probMap_' + data.stem
pathDict = {'pathResult': outDir}
pU.saveAndOrPlot(pathDict, outFileName, fig)
[docs]def resultHistPlot(cfg, dataDF, xName='', scenario='', stat='count', parametersDict=''):
""" create a histogram of values and optional colorcode using scenario name
and option to filter simulations using parametersDict
Parameters
-----------
cfg: configparser object
configuration info here used outDir
dataDF: dataFrame
dataFrame with info on simulation results and configuration one line per simulation
(e.g. aimec resAnalysisDF)
xName: str
column name for x axis
scenario: str
column name used to colorcode values
stat: str
statistical measure to show (percent, probability, density, count, frequency), default count
parametersDict: dict
optional - dictionary filter criteria, parameter name and list of values
Returns
--------
plotPath: pathlib path
path to figure
"""
# filter DF with parametersDict
if parametersDict != '':
simNameList = cfgHandling.filterSims(cfg['avalancheDir'], parametersDict, specDir='', simDF=dataDF)
dataDF = dataDF[dataDF['simName'].isin(simNameList)]
# initialize figure
fig, ax = plt.subplots()
# create histogram
bars = sns.histplot(data=dataDF, x=xName, hue="scenario", stat=stat, ax=ax)
# create second y axis for ecdf
ax2 = ax.twinx()
cdf = sns.ecdfplot(data=dataDF, x=xName, hue='scenario', stat='count', ax=ax2)
ax.set_ylabel('histogram ' + stat)
ax2.set_ylabel('ecdf count')
outFileName = '%s_' % xName + 'histogram'
plotPath = pU.saveAndOrPlot({'pathResult': cfg['outDir']}, outFileName, fig)
return plotPath
[docs]def plotDistFromDF(cfg, dataDF, name1, name2, scenario='', parametersDict='', type=''):
""" create a dist plot from dataframe for name1 on x axis and name2 on y axis, optionally
colorcoded with scenario name and filtered with parametersDict
Parameters
-----------
cfg: configparser object
configuration settings here outDir
dataDF: dataframe
dataframe with one line per simulation and info on model parameters and results
name1: str
column name of dataDF to use for plot x axis
name2: str
column name of dataDF to use for plot y axis
scenario: str
optional name of column used to colorcode points in plots for type=scatter
or kde for type=dist
parametersDict: dict
optional - dictionary filter criteria
type: str
optional - type of plot dist or scatter
Returns
--------
plotPath: pathlib path
path to figure
"""
# filter DF using parametersDict
if parametersDict != '':
simNameList = cfgHandling.filterSims(cfg['avalancheDir'], parametersDict, specDir='', simDF=dataDF)
dataDF = dataDF[dataDF['simName'].isin(simNameList)]
# # create figure
if scenario !='':
if type == 'scatter':
fig, ax = plt.subplots()
ax = sns.scatterplot(data=dataDF[dataDF['simName'].isin(simNameList)], x=name1, y=name2, hue=scenario)
else:
dist = sns.jointplot(data=dataDF[dataDF['simName'].isin(simNameList)], x=name1, y=name2, hue=scenario)
ax = dist.ax_joint
fig = dist.fig
else:
dist = sns.jointplot(data=dataDF[dataDF['simName'].isin(simNameList)], x=name1, y=name2)
ax = dist.ax_joint
fig = dist.fig
# put ava name on plot and save figure
pU.putAvaNameOnPlot(ax, cfg['avalancheDir'])
outFileName = '%s_vs_%s_' % (name1, name2) + 'distplot'
plotPath = pU.saveAndOrPlot({'pathResult': cfg['outDir']}, outFileName, fig)
return plotPath
[docs]def plotSample(paramValuesD, outDir, releaseScenario=''):
""" plot the parameter sample only if two parameters are varied
Parameters
-----------
paramValuesD: dict
dictionary with parameter names and sets of values
outDir: pathlib path
path where to save the plot
"""
# Figure
fig, ax = plt.subplots(figsize=(pU.figW, pU.figH))
plt.title('Parameter sample')
plt.plot(paramValuesD['values'][:,0], paramValuesD['values'][:,1], 'b*')
plt.xlabel(paramValuesD['names'][0])
plt.ylabel(paramValuesD['names'][1])
# put ava name on plot and save figure
outFileName = 'ParameterSample_%s' % releaseScenario
plotPath = pU.saveAndOrPlot({'pathResult': outDir}, outFileName, fig)
[docs]def plotThSample(simDF, name1, thName, outDir):
""" plot the parameter sample if generated for th thickness values read from shp file
one plot for each release Scenario using simDF
Parameters
------------
simDF: pandas DataFrame
dataframe with one row per sim and info on sim configuration
name1: str
name of parameter that is varied for x-axis
thName: str
name of the thickness parameter (relTh, entTh, secondaryRelTh)
outDir: pathlib path
path to folder where to save plot
"""
# fetch all release scenarios
releaseScenario = simDF['releaseScenario'].values
relSc = list(set(releaseScenario))
# create one plot per release scenario
for count, rel in enumerate(relSc):
simDFPlot = simDF[simDF['releaseScenario'] == rel]
# create names of release features
thIds = (simDFPlot[thName + 'Id'].iloc[0]).split('|')
thFeatures = [thName + id for id in thIds]
# check if empty strings - possible if multiple scenarios and not all have same thId features
# replace empty string with nans to create numpy array of type float and not string
# required for the twinx to work properly
simDF2 = simDFPlot.copy()
for thF in thFeatures:
simDF2[thF] = simDFPlot[thF].replace('', np.nan).astype(float)
# setup figure
fig, ax = plt.subplots(figsize=(pU.figW*1.5, pU.figH))
fig.suptitle('Parameter sample of %s vs %s for %s' % (name1, thName, rel))
# scatterplot for first th feature vs name1
dist = sns.scatterplot(data=simDF2, x=name1, y= thFeatures[0], ax=ax, hue='releaseScenario')
# position of twin axes
distPosition = 1.
# loop over all the other th features and add axes for them
for count1, thFeat in enumerate(thFeatures[1:]):
axNew = ax.twinx()
axNew.spines["right"].set_position(("axes", distPosition))
dist = sns.scatterplot(data=simDF2, x=name1, y=thFeat, ax=axNew, hue='releaseScenario')
distPosition = distPosition + 0.25
plt.show()
# put ava name on plot and save figure
outFileName = 'ParameterSample_%s' % rel
plotPath = pU.saveAndOrPlot({'pathResult': outDir}, outFileName, fig)
[docs]def plotThSampleFromVals(paramValuesD, outDir):
""" plot the parameter sample if generated for thickness values read from shp file
one plot for each release Scenario - hence paramValuesD should be for one release scenario only
Parameters
------------
paramValuesD: dict
dict with info on parameter variation and initial values of parameters
outDir: pathlib path
path to folder for saving plot
"""
# if only two parameters are varied but one is read from shp then create a sample plot
# fetch parameters that are varied and thickness feature names
name1 = list(set(paramValuesD['varParNamesInitial']).symmetric_difference(set(paramValuesD['thVariationBasedOnFromShp'])))[0]
thName = paramValuesD['thVariationBasedOnFromShp'][0]
thFeatures = [th for th in paramValuesD['names'] if thName in th]
name1Index = paramValuesD['names'].index(name1)
# setup figure
fig, ax = plt.subplots(figsize=(pU.figW*1.5, pU.figH))
fig.suptitle('Parameter sample of %s vs %s' % (name1, thName))
# scatterplot for first th feature vs name1
thIndex = paramValuesD['names'].index(thFeatures[0])
plt.plot(paramValuesD['values'][:, name1Index], paramValuesD['values'][:, thIndex], 'o')
ax.set_ylabel('%s' % thFeatures[0])
# position of twin axes
distPosition = 1.
# loop over all the other th features and add axes for them
for count1, thFeat in enumerate(thFeatures[1:]):
axNew = ax.twinx()
axNew.spines["right"].set_position(("axes", distPosition))
axNew.set_ylabel('%s' % thFeat)
thIndex = paramValuesD['names'].index(thFeat)
plt.plot(paramValuesD['values'][:, name1Index], paramValuesD['values'][:, thIndex], 'o')
distPosition = distPosition + 0.25
# put ava name on plot and save figure
outFileName = 'ParameterSample_%s' % paramValuesD['releaseScenario']
plotPath = pU.saveAndOrPlot({'pathResult': outDir}, outFileName, fig)