Source code for in1Data.computeFromDistribution

"""
    Generate sample of values following a beta-pert or a uniform distribution
"""

# load python modules
import numpy as np
import scipy.special as sc
from scipy.interpolate import interp1d
import logging
from scipy.stats import norm


# create local logger
log = logging.getLogger(__name__)


[docs]def computeParameters(a, b, c): """ Compute alpha, beta and mu """ # computation of paramters if a > b or b > c or a > c: message = 'a:%.2f must be smaller than b: %.2f must be smaller than c: %.2f' % (a, b, c) log.error(message) raise ValueError(message) mu = (a + 4*b + c) / 6.0 alpha = (4*b + c - 5*a) / (c - a) beta = (5*c - a - 4*b) / (c - a) return alpha, beta, mu
[docs]def computePert(a, b, c, x, alpha, beta): """ Compute the CDF and PDF of the Pert distribution using scipy betainc function """ # Compute pert pdf for testing if the retrieved sample fits the desired distribution PDF = (((x - a)**(alpha - 1)) * ((c - x)**(beta-1))) / \ (sc.beta(alpha, beta) * ((c - a)**(alpha + beta - 1))) # compute regularized incomplete beta function for pert distribution using scipy z = (x - a) / (c - a) CDF = sc.betainc(alpha, beta, z) # use scipy.interpolate to create a function that can be used to extract samples CDFint = interp1d(CDF, x) return PDF, CDF, CDFint
[docs]def extractFromCDF(CDF, CDFint, x, cfg): """ Extract a sample from the CDF with prescribed steps """ # extract number of samples using function generated using scipy.interpolate # more robust regarding septs vs CDFinterval if cfg.getboolean('flagMinMax'): ySampling = np.linspace(0.0, 1.0, cfg.getint('sampleSize')) sampleVect = CDFint(ySampling) else: ySampling = np.linspace(0.0, 1.0, cfg.getint('sampleSize')+2) sampleVect = CDFint(ySampling) sampleVect = sampleVect[1:-1] return sampleVect
[docs]def extractUniform(a, c, x, cfg): """ Extract sample of a uniform distriution """ # load parameters sampleSize = cfg.getint('sampleSize') if cfg.getboolean('flagMinMax'): sampleSize = sampleSize else: sampleSize = sampleSize + 2 # compute interval fullInterval = 100000.0 * ((c - a) / (sampleSize-1)) interval = np.around(fullInterval) * 0.00001 # Compute sample sampleVect = np.zeros(sampleSize) sampleVect[0] = a for m in range(1, sampleSize): sampleVect[m] = sampleVect[m-1] + interval if cfg.getboolean('flagMinMax'): sampleSize = sampleSize else: sampleVect = sampleVect[1:-1] CDF = np.linspace(0, 1, cfg.getint('support')) CDFInt = interp1d(CDF, x) return CDF, CDFInt, sampleVect
[docs]def getEmpiricalCDF(sample): """ Derive empirical CDF using numpy histogram and cumsum """ binsNo = int(len(sample) * 0.25) hist, binsEd = np.histogram(sample, bins=binsNo) CDFEmp = np.cumsum(hist) CDFEmpPlot = CDFEmp / CDFEmp[-1] return CDFEmpPlot, binsEd[1:]
[docs]def getEmpiricalCDFNEW(sample): """ Derive empirical CDF using sorted sample """ # sort sample sampleSorted = np.sort(sample) sampleSize = len(sample) ECDF = np.zeros(sampleSize) for m in range(sampleSize): cumsum = 0 for l in range(sampleSize): if sample[l] <= sampleSorted[m]: cumsum = cumsum + 1 ECDF[m] = cumsum / sampleSize return ECDF, sampleSorted
[docs]def extractNormalDist(cfg): """ create a normal distribution from given parameters and draw a sample Parameters ------------ cfg: configparser object or dict configuration settings for computing distributions, std or ci95, minMaxInterval, flagMinMax Returns -------- CDFint: function object interpolated CDF function sampleVect: numpy array sample values pdf: numpy array pdf values computed for normal distribution and support x x: numpy array support x """ # load parameters from config sampleSize = int(cfg['sampleSize']) mean = float(cfg['mean']) minMaxInterval = float(cfg['minMaxInterval']) # first check if std or ci95 is provided throw error if both if cfg['buildType'] == 'ci95': std = float(cfg['buildValue']) / 1.96 elif cfg['buildType'] == 'std': std = float(cfg['buildValue']) else: message = 'buildType: %s not a valid option' % cfg['buildType'] log.error(message) raise AssertionError log.info('Compute normal distribution with mean: %.4f and std: %.4f' % (mean, std)) # compute min and max values of range derived from 99% confidence interval # Note: final sample includes these min and max values min = norm.interval(confidence=(minMaxInterval/100.), loc=mean, scale=std)[0] max = norm.interval(confidence=(minMaxInterval/100.), loc=mean, scale=std)[1] # derive normal distribution (pdf and cdf) for range from min to max values x = np.linspace(min, max, int(cfg['support'])) cdf = norm.cdf(x, loc=mean, scale=std) pdf = norm.pdf(x, loc=mean, scale=std) # create interpolated function of cdf to draw samples from CDFint = interp1d(cdf, x) # draw sampleSize samples from distribution xSample = np.linspace(np.min(cdf), np.max(cdf), sampleSize) sampleVect = CDFint(xSample) return CDFint, sampleVect, pdf, x