-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathstats_functions.py
More file actions
140 lines (128 loc) · 4.58 KB
/
stats_functions.py
File metadata and controls
140 lines (128 loc) · 4.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import numpy as np
from scipy.stats import norm
from scipy.interpolate import interp1d
from numpy.linalg import lstsq as nplstsq
def makeInputPdf(pdfRanges, pdfParam,pdfType='norm',cont_discrete='continuous'):
'''
pdfRanges --> gives the range for each bin in the node
pdfParam --> a Nx2 vector of mean and std
pdfType --> indicates the distribution assumption (must be 'norm' for now)
cont_discrete --> indicates of node is 'continuous' or 'discrete'
(MUST BE CONTINUOUS FOR NOW!)
returns PDF
'''
[N,m] = pdfParam.shape
r = len(pdfRanges)
if cont_discrete == 'continuous':
PDF = np.zeros((N,r-1))
for i in np.arange(N):
cdf = norm.cdf(pdfRanges,pdfParam[i,0],pdfParam[i,1])
cdf[-1]=1.0
PDF[i,:] = np.diff(cdf)
return PDF
def getPy(p,pdf,ranges):
'''
return the value of y with probability p from the PDF supplied
this is the percentile (p)
'''
M = len(ranges)
N,Nbins = pdf.shape
f = np.cumsum(pdf, 1)
f = np.hstack((np.zeros((N, 1)), f))
# now make special cases for discrete or continuous
if Nbins == M:
# D I S C R E T E ((( NOT TESTED!!!!!)))
if p<0.5: # indicates lower bound
f = sum(f<p,1)
else: # upper bound
f = f>p
f = M-sum(f,1)+1
# catch any overrun
f[f>M] = M
py = ranges[f-1]
else:
# C O N T I N U O U S
py = np.nan * np.ones((N,1))
for i in np.arange(N):
funique,uniqueid = np.unique(f[i,:],return_index=True)
# note strange syntax in the next line because interp1d returns
# a function that then performs the interpolation
py[i] = interp1d(funique,ranges[uniqueid])(p)
return py
def getMeanStdMostProb(pdf,ranges,continuous,blank):
'''
return the Mean, StdDev, and MostProbably value from a pdf
'''
[Nlocs,Npdf] = pdf.shape
id1 = np.arange(Npdf)
if continuous:
id2 = np.arange(Npdf)+1
else:
id2 = np.arange(Npdf)
# MOST PROBABLE FIRST
# preallocate output
mostProb = np.nan * np.ones((Nlocs,1))
for i in np.arange(Nlocs):
cid = np.where(pdf[i,:]==np.max(pdf[i,:]))
if len(cid)>1:
cid = cid[-1]
else:
cid = cid[0]
mostProb[i] = 0.5*(ranges[id1[cid]]+ranges[id2[cid]])[0]
# NEXT, DO THE MEAN and STDDEV
retMean = np.atleast_2d(np.dot(pdf,(ranges[id1]+ranges[id2])/2.0)).T*blank
retStd = np.zeros((Nlocs,1))
# set up the sub-bin variance
binWidthVariance = 0.5*np.atleast_2d(np.dot(pdf,(ranges[id2]-ranges[id1])/np.sqrt(3.0))).T
for i in np.arange(Nlocs):
retStd[i] = np.dot(pdf[i,:],(((ranges[id1]+ranges[id2])/2.0)-retMean[i])**2.0)
retStd[i] = np.sqrt(binWidthVariance[i] + retStd[i])
return retMean, retStd, mostProb,
def LSQR_skill(x, z, w=None):
'''
function to calculate skill of a model. Performs (optionally) weighted
regression to get the coefficients.
input:
x --> vector of expected values (ML or mean)
z --> observations
w --> optional vector of std_dev weights to go on the diagonal
N.B. ==> these weights will be inverted as they are assumed to be in the
form of standard deviation
output:
sk --> the model skill
'''
# first, adjust for NaNs
keepinds = np.where(np.isnan(z) == False)[0]
x = x[keepinds]
z = z[keepinds]
n = len(x)
X = np.hstack((np.ones((n, 1)), x))
# handle the weights - assuming uncorrelated errors for now
if w:
w = w[keepinds]
Q = np.diag(1.0/w)
else:
pass
# form weighted covariance if using weights
# Q = np.diag(np.ones(n))
# form the normal equations including weights
# reserved --> XtX = np.dot(np.dot(X.T, Q), X)
# solve for b
b = nplstsq(X, z)[0]
# calculate the variance of the data N.B. the mean is assumed already subtracted off from z
obsresid = z
msz = np.dot(obsresid.T, obsresid)/n
# calculate the variance of the residuals
modresid = np.dot(X, b) - z
msr = np.dot(modresid.T, modresid)/n
# calculate the skill
sk = 1-(msr/msz)
#print 'sk --> %f' %(sk)
'''
#special case if mean already off:
msz2 = np.dot(z.T,z)/n
msr2 = msz2 - np.dot(np.dot(b.T,XtX),b)
sk2 = 1-(msr2/msz2)
print 'sk2 --> %f' %(sk2)
'''
return sk