-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcolumn_selection.py
More file actions
146 lines (114 loc) · 4.27 KB
/
column_selection.py
File metadata and controls
146 lines (114 loc) · 4.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 12 11:45:41 2017
@author: ayoubbelhadji1
"""
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.pyplot
from scipy.stats import chi2
import pylab as mp
def plot_ellipse(cloud_array,semimaj=1,semimin=1,phi=0,x_cent=0,y_cent=0,theta_num=1e3,ax=None,plot_kwargs=None,\
fill=False,fill_kwargs=None,data_out=False,cov=None,mass_level=0.68):
'''
An easy to use function for plotting ellipses in Python 2.7!
The function creates a 2D ellipse in polar coordinates then transforms to cartesian coordinates.
It can take a covariance matrix and plot contours from it.
semimaj : float
length of semimajor axis (always taken to be some phi (-90<phi<90 deg) from positive x-axis!)
semimin : float
length of semiminor axis
phi : float
angle in radians of semimajor axis above positive x axis
x_cent : float
X coordinate center
y_cent : float
Y coordinate center
theta_num : int
Number of points to sample along ellipse from 0-2pi
ax : matplotlib axis property
A pre-created matplotlib axis
plot_kwargs : dictionary
matplotlib.plot() keyword arguments
fill : bool
A flag to fill the inside of the ellipse
fill_kwargs : dictionary
Keyword arguments for matplotlib.fill()
data_out : bool
A flag to return the ellipse samples without plotting
cov : ndarray of shape (2,2)
A 2x2 covariance matrix, if given this will overwrite semimaj, semimin and phi
mass_level : float
if supplied cov, mass_level is the contour defining fractional probability mass enclosed
for example: mass_level = 0.68 is the standard 68% mass
'''
# Get Ellipse Properties from cov matrix
if cov is not None:
eig_vec,eig_val,u = np.linalg.svd(cov)
# Make sure 0th eigenvector has positive x-coordinate
if eig_vec[0][0] < 0:
eig_vec[0] *= -1
semimaj = np.sqrt(eig_val[0])
semimin = np.sqrt(eig_val[1])
if mass_level is None:
multiplier = np.sqrt(2.279)
else:
distances = np.linspace(0,20,20001)
chi2_cdf = chi2.cdf(distances,df=2)
multiplier = np.sqrt(distances[np.where(np.abs(chi2_cdf-mass_level)==np.abs(chi2_cdf-mass_level).min())[0][0]])
semimaj *= multiplier
semimin *= multiplier
phi = np.arccos(np.dot(eig_vec[0],np.array([1,0])))
if eig_vec[0][1] < 0 and phi > 0:
phi *= -1
# Generate data for ellipse structure
theta = np.linspace(0,2*np.pi,theta_num)
r = 1 / np.sqrt((np.cos(theta))**2 + (np.sin(theta))**2)
x = r*np.cos(theta)
y = r*np.sin(theta)
data = np.array([x,y])
S = np.array([[semimaj,0],[0,semimin]])
R = np.array([[np.cos(phi),-np.sin(phi)],[np.sin(phi),np.cos(phi)]])
T = np.dot(R,S)
data = np.dot(T,data)
data[0] += x_cent
data[1] += y_cent
# Output data?
if data_out == True:
return data
# matplotlib.pyplot.
# Plot!
return_fig = False
if ax is None:
return_fig = True
fig,ax = plt.subplots()
if plot_kwargs is None:
ax.plot(data[0],data[1],color='b',linestyle='-')
else:
ax.plot(data[0],data[1],**plot_kwargs)
if fill == True:
ax.fill(data[0],data[1],**fill_kwargs)
ax.scatter(cloud_array[:,0],cloud_array[:,1])
if return_fig == True:
return fig
###
N = 1000
d = 2
s = 100
mean = [0, 0]
cov = [[1, 0], [0, 1]]
###
r_X = np.random.multivariate_normal(mean, cov, N)
r_X_index = list(range(0,N))
leverage_scores_r_X = np.sum(r_X.T*r_X.T, axis=0)/(np.linalg.norm(r_X)**2)
leverage_sampling = np.random.choice(r_X_index, s, p=leverage_scores_r_X,replace=False)
#matplotlib.pyplot.scatter(r_X[:,0],r_X[:,1])
#matplotlib.pyplot.show()
#matplotlib.pyplot.scatter(r_X[leverage_sampling,0],r_X[leverage_sampling,1])
#matplotlib.pyplot.show()
empirical_cov = np.cov(r_X.T)
plot_ellipse(r_X,cov=empirical_cov)
leverage_empirical_cov = np.cov(r_X[leverage_sampling,:].T)
plot_ellipse(r_X[leverage_sampling,:],cov=leverage_empirical_cov)
###