Source code for out3Plot.statsPlots

"""
    plot statistics of simulations

"""

# load python modules
import os
import numpy as np
import logging
import matplotlib.pyplot as plt
import numpy.ma as ma
import seaborn as sns
import pandas as pd
import pathlib

# local imports
import avaframe.out3Plot.plotUtils as pU
import avaframe.in3Utils.fileHandlerUtils as fU
import avaframe.in2Trans.ascUtils as IOf
import avaframe.in1Data.getInput as gI


# create local logger
log = logging.getLogger(__name__)


[docs]def plotValuesScatter(peakValues, resType1, resType2, cfg, avalancheDir, statsMeasure='max', flagShow=False):
    """ Produce scatter plot of statistical measures (eg. max values) (resType1 und resType2),
        for one set of simulations or multiple

        Parameters
        -----------
        peakDictList: dict
            peakValues dictionary that contain max values of peak parameters and parameter variation info
        resType1: str
            result parameter 1, 'ppr', 'pft', 'pfv'
        resType2: str
            result parameter 1, 'ppr', 'pft', 'pfv'
        cfg: dict
            configuration, for now contains output location and varPar: parameter that is varied
            to perfom a set of simulations
        statsMeasure: str
            statistical measure for plotting, options: max, mean, min, std
        flagShow: bool
            if True show plot
        """

    varPar = cfg['varPar']
    # extract values from dictionaries
    varVal = []
    values1 = []
    values2 = []
    for key in peakValues:
        values1.append(peakValues[key][resType1][statsMeasure])
        values2.append(peakValues[key][resType2][statsMeasure])
        varVal.append(peakValues[key]['varPar'])

    log.info('Number of simulations is: %d' % (len(varVal)))

    # Get name and units for resTypes and parameter variation to annotate plots
    name1 = pU.cfgPlotUtils['name%s' % resType1]
    name2 = pU.cfgPlotUtils['name%s' % resType2]
    unit1 = pU.cfgPlotUtils['unit%s' % resType1]
    unit2 = pU.cfgPlotUtils['unit%s' % resType2]
    nameVar = cfg['varParName']
    unitVar = cfg['varParUnit']

    # if the varied parameter used for colorcoding is a string
    if isinstance(varVal[0], str):
        itemsList, ticksList, varValV = pU.getColorbarTicksForStrings(varVal)
    else:
        varValV = np.array(varVal)
        itemsList = ''

    # load variation colormap
    cmap, _, ticks, norm = pU.makeColorMap(pU.cmapVar, np.amin(varValV), np.amax(varValV), continuous=True)
    if isinstance(varVal[0], str):
        ticks = ticksList

    fig, ax = plt.subplots()
    plt.title('%s vs. %s' % (name1, name2))
    # set range and steps of colormap
    cc = varValV
    sc = ax.scatter(values1, values2, c=cc, cmap=cmap)
    ax.set_xlabel('%s %s [%s]' % (statsMeasure, name1, unit1))
    ax.set_ylabel('%s %s [%s]' % (statsMeasure, name2, unit2))
    pU.addColorBar(sc, ax, ticks, unitVar, nameVar, tickLabelsList=itemsList)
    pU.putAvaNameOnPlot(ax, avalancheDir)

    # shpw plot
    if flagShow:
        plt.show()

    # save fig
    outDir = cfg['outDir']
    fig.savefig(os.path.join(outDir, 'Scatter_%s_vs_%s_dist%s_%s.png' %
                (resType1, resType2, cfg['distType'], cfg['scenario'])))
    plt.close(fig)


[docs]def plotValuesScatterHist(peakValues, resType1, resType2, cfg, avalancheDir,
                          statsMeasure='max', flagShow=False, flagHue=False):
    """ Produce scatter and marginal kde plot of max values, for one set of simulations or multiple

        Parameters
        -----------
        peakValues: dict
            peakValues dictionary that contain max values of peak parameters and parameter variation info
        resType1: str
            result parameter 1, 'ppr', 'pft', 'pfv'
        resType2: str
            result parameter 1, 'ppr', 'pft', 'pfv'
        cfg: dict
            configuration, for now contains output location and varPar: parameter that is varied
            to perfom a set of simulations
        statsMeasure: str
            statistical measure for plotting, options: max, mean, min, std
        flagShow: bool
            if True show plot

    """

    varPar = cfg['varPar']
    # extract values from dictionaries
    varVal = []
    values1 = []
    values2 = []
    scenario = []

    for key in peakValues:
        values1.append(peakValues[key][resType1][statsMeasure])
        values2.append(peakValues[key][resType2][statsMeasure])
        varVal.append(peakValues[key]['varPar'])
        if 'scenario' in peakValues[key] and flagHue:
            scenario.append(peakValues[key]['scenario'])

    log.info('Number of simulations is: %d' % (len(varVal)))

    # Get name and units for resTypes and parameter variation to annotate plots
    name1 = pU.cfgPlotUtils['name%s' % resType1]
    name2 = pU.cfgPlotUtils['name%s' % resType2]
    unit1 = pU.cfgPlotUtils['unit%s' % resType1]
    unit2 = pU.cfgPlotUtils['unit%s' % resType2]
    nameVar = cfg['varParName']
    unitVar = cfg['varParUnit']
    varValV = np.array(varVal)
    title1 = statsMeasure + ' ' + name1+' [' + unit1 + ']'
    title2 = statsMeasure + ' ' + name2+' [' + unit2 + ']'

    if flagHue:
        # create pandas data frame reqiured for seaborn jointplot
        maxVals = {name1: values1, name2: values2, 'scenario': scenario}
        maxData = pd.DataFrame(maxVals)
        fig1 = sns.jointplot(data=maxData, x=name1, y=name2, hue="scenario")
    else:
        fig1 = sns.jointplot(x=values1, y=values2)

    # add title and text box
    fig1.ax_joint.set_xlabel(title1)
    fig1.ax_joint.set_ylabel(title2)
    pU.putAvaNameOnPlot(fig1.ax_joint, avalancheDir)

    # save fig
    outDir = cfg['outDir']
    plt.savefig(os.path.join(outDir, 'Scatterkde_%s_vs_%s_dist%s_%s.png' %
                (resType1, resType2, cfg['distType'], cfg['scenario'])))

    # shpw plot
    if flagShow:
        plt.show()

    plt.close()


[docs]def plotHistCDFDiff(dataDiffPlot, ax1, ax2, insert='True', title=['', '']):
    """ Produce histogram and CDF plot of the raster difference of two simulations

    Parameters
    -----------
    dataDiffPlot: 2D numpy array
        raster of the difference of the two simulations
    ax1: axes
        axes for the histogram plot
    ax2: axes
        axes for the CDF plot
    insert: boolean
        true if the plots are in inserted axes (size of the lables is then smaller)
    title: list
        if not inserts, title for the plots

    """
    # Difference between datasets
    diffMax = np.nanmax(dataDiffPlot)
    diffMin = np.nanmin(dataDiffPlot)

    sortedDiffPlot = np.sort(np.abs(dataDiffPlot))
    nSample = np.size(sortedDiffPlot)
    hist = np.array(range(nSample))/float(nSample)
    ticks = []
    centile1 = float(pU.cfg['centile1'])
    centile2 = float(pU.cfg['centile2'])
    for val in [centile1, centile2]:
        ind = np.searchsorted(hist, val)
        ind = min(ind, np.size(hist)-1)
        ax1.plot(sortedDiffPlot, hist)
        ax1.hlines(hist[ind], 0, sortedDiffPlot[ind], linestyles='--', linewidths=0.5)
        ax1.vlines(sortedDiffPlot[ind], 0, hist[ind], linestyles='--', linewidths=0.5)
        ticks.append(sortedDiffPlot[ind])

    ax2.set_xlim([-sortedDiffPlot[ind], sortedDiffPlot[ind]])
    width = diffMax - diffMin
    stepsInterval = int(pU.cfg['steps2Centile2'])
    stepWidth = 2*sortedDiffPlot[ind]/stepsInterval    # stepsInterval bins in the [-2sigma,+2sigma] interval
    bins = int(width/stepWidth)

    # reduce bins to a sensible size
    if bins > 1000:
        bins = 1000

    ax2.hist(dataDiffPlot, bins=bins, density=True, histtype="stepfilled")
    ax2.get_yaxis().set_ticks([])
    if insert:
        ax2.tick_params(axis='x', which='major', labelsize=8, rotation=45)
        ax2.set_title(title[0])

    ticks.append(np.floor(np.nanmax(np.abs(dataDiffPlot))))
    ax1.set_xticks(ticks)
    if insert:
        ax1.tick_params(axis='both', which='major', labelsize=8, rotation=45)
        ax1.set_title(title[1])

    return ticks


[docs]def plotProbMap(avaDir, inDir, cfgFull):
    """ plot probability maps including contour lines

        Parameters
        ----------
        avaDir: str
            path to avalanche directory
        inDir: str
            path to datasets that shall be plotted
        cfgFull: configParser object
            configuration settings for probAna
            keys used: name, cmapType, levels, unit

    """

    # configuration settings
    cfg = cfgFull['PLOT']

    # fetch probabiltiy map datasets in inDir
    dataFiles = list(inDir.glob('*.asc'))
    if dataFiles == []:
        message = 'No probability map dataset found in: %s' % (inDir)
        log.error(message)
        raise FileNotFoundError(message)
        log.info('Probability maps found for generating plots: %d' % len(dataFiles))

    # load levels and define colors
    levels = fU.splitIniValueToArraySteps(cfg['levels'])
    multLabel = False
    if len(levels) > 2:
        multLabel = True
        cmapType = pU.cmapGreys1
        colorBackGround = 'seashell'
    else:
        cmapType = pU.colorMaps[cfg['cmapType']]
        colorsP = ['black', 'white']
        colorBackGround = 'gainsboro'
        if len(levels) == 1 and levels[0] > 0.5:
            colorsP = ['white']
    unit = cfg['unit']

    # set colors for contourlines according to levels
    labels = []
    for lev in levels:
        labels.append('%.0f %%' % (lev*100))

    # loop over all datasets and create plots
    for data in dataFiles:

        raster = IOf.readRaster(data, noDataToNan=True)
        dataPlot = raster['rasterData']
        header = IOf.readASCheader(data)
        cellSize = header['cellsize']

        # Set dimensions of plots
        ny = dataPlot.shape[0]
        nx = dataPlot.shape[1]
        Ly = ny*cellSize
        Lx = nx*cellSize

        # constrain plot to where there is data
        rowsMin, rowsMax, colsMin, colsMax, dataConstrained = pU.constrainPlotsToData(dataPlot, cellSize,
            extentOption=True, constrainedData=True)
        dataPlot = np.ma.masked_where(dataConstrained == 0.0, dataConstrained)

        # create figure
        fig = plt.figure(figsize=(pU.figW*2, pU.figH))
        suptitle = fig.suptitle(cfg['name'], fontsize=14, color='0.5')
        ax1 = fig.add_subplot(121)
        # for now continuous color map is desired
        cmap, _, ticks, norm = pU.makeColorMap(cmapType, np.nanmin(dataPlot), np.nanmax(dataPlot), continuous=True)
        cmap.set_bad(colorBackGround)
        im1 = ax1.imshow(dataPlot, cmap=cmap, extent=[colsMin, colsMax, rowsMin, rowsMax],
                         origin='lower', aspect=nx/ny, norm=norm)

        # create meshgrid for contour plot also constrained to where there is data
        xx = np.arange(colsMin, colsMax, cellSize)
        yy = np.arange(rowsMin, rowsMax, cellSize)
        X, Y = np.meshgrid(xx, yy)

        # add contourlines for levels
        if multLabel:
            CS = ax1.contour(X, Y, dataPlot, levels=levels, cmap=pU.cmapT.reversed(), linewidths=1)
        else:
            CS = ax1.contour(X, Y, dataPlot, levels=levels, colors=colorsP, linewidths=1)
        for i in range(len(labels)):
            CS.collections[i].set_label(labels[i])

        pU.addColorBar(im1, ax1, ticks, unit)
        title = str('%s' % cfg['name'])
        ax1.set_title(title)
        ax1.set_xlabel('x [m]')
        ax1.set_ylabel('y [m]')

        # add zoom plot of runout area
        ax2 = fig.add_subplot(122)

        # determine zoom in runout area
        dataCut, xOrigin, yOrigin = pU.constrainToMinElevation(avaDir, raster['rasterData'], cfg, cellSize,
            extentOption=True)

        # constrain to where there is data
        rowsMinPlot, rowsMaxPlot, colsMinPlot, colsMaxPlot, dataCutConstrained = pU.constrainPlotsToData(dataCut,
            cellSize, extentOption=True, constrainedData=True, buffer=cfg.getfloat('constrainBuffer'))
        dataCutConstrained = np.ma.masked_where(dataCutConstrained == 0.0, dataCutConstrained)

        # set extent of zoom Plot
        x0 = xOrigin + colsMinPlot
        x1 = xOrigin + colsMaxPlot
        y0 = yOrigin + rowsMinPlot
        y1 = yOrigin + rowsMaxPlot

        # add plot
        im2 = ax2.imshow(dataCutConstrained, cmap=cmap, extent=[x0, x1, y0, y1],
                         origin='lower', aspect=nx/ny, norm=norm)

        # create meshgrid for contour plot also constrained to where there is data
        xx2 = np.arange(x0, x1, cellSize)
        yy2 = np.arange(y0, y1, cellSize)
        X2, Y2 = np.meshgrid(xx2, yy2)

        # add contourlines for levels
        if multLabel:
            CS2 = ax2.contour(X2, Y2, dataCutConstrained, levels=levels, cmap=pU.cmapT.reversed(), linewidths=1)
        else:
            CS2 = ax2.contour(X2, Y2, dataCutConstrained, levels=levels, colors=colorsP, linewidths=1)
        for i in range(len(labels)):
            CS2.collections[i].set_label(labels[i])

        pU.addColorBar(im2, ax2, ticks, unit)
        ax2.set_xlabel('x [m]')
        ax2.set_ylabel('y [m]')
        plt.legend(facecolor='black', framealpha=0.04)

        outDir = inDir / 'plots'
        fU.makeADir(outDir)
        avaName = pathlib.PurePath(avaDir).name
        outFile = outDir / ('%s_probMap_lim%s.%s' % (avaName, cfgFull['GENERAL']['peakLim'], pU.outputFormat))
        fig.savefig(outFile)
        plt.close(fig)