CustomModules/grid_tools.py at master · mwfranklin/CustomModules · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
import os
import sys
import subprocess
import numpy as np
import scipy.spatial

#added 11/16 MWF from CustomModules files
def rotation_matrix(vector1, origin):
    #vector1 should be a vector between two points; origin should be the vector to which vector1 is being moved
    vector1 = vector1/np.linalg.norm(vector1)
    origin = origin/np.linalg.norm(origin)
    v = np.cross(vector1, origin)
    c = np.dot(vector1, origin)
    s = np.linalg.norm(v)
    I = np.identity(3)
    k = np.matrix([(0, -v[2], v[1]), (v[2], 0, -v[0]), (-v[1], v[0], 0)])#.reshape(3,3)
    #print(k)
    r = I + k + np.matmul(k,k) * ((1 -c)/(s**2))
    return(r)

#added 11/16 MWF from CustomModules files
def translation(new_point, origin):
    return (new_point - origin)

#added 11/16 MWF from CustomModules files
def rotate_trans_coords(a, b, a_orig, b_orig, res_coord_array):
    #a, b are current points; a_orig, b_orig are where they should move to; res_coord_array contains coordinates to be moved
    #print(a_orig - b_orig)
    rot_mat = rotation_matrix(a-b, a_orig-b_orig)
    #print(rot_mat)
    trans_vect = translation(np.asarray(np.dot(rot_mat, a))[0], a_orig )
    #print(trans_vect)
    matrix_coords = np.transpose(res_coord_array)
    matrix_rot = np.matmul(rot_mat, matrix_coords)
    #print(matrix_rot)
    rotated_coords = np.transpose(matrix_rot)
    #print(rotated_coords)
    rotated_coords = rotated_coords - trans_vect
    return(rotated_coords) #returns a matrix of n by 3 of the translated/rotated vectors

#following functions blatantly copied from http://nicky.vanforeest.com/misc/fitEllipse/fitEllipse.html
def fitEllipse(x,y):
    x = x[:,np.newaxis]
    y = y[:,np.newaxis]
    D =  np.hstack((x*x, x*y, y*y, x, y, np.ones_like(x)))
    #print("D:", D)
    S = np.dot(D.T,D)
    C = np.zeros([6,6])
    C[0,2] = C[2,0] = 2
    C[1,1] = -1
    #print("S:", S)
    #print("C:", C)
    #print( np.linalg.pinv(S) )
    E, V =  np.linalg.eig(np.dot(np.linalg.pinv(S), C))
    n = np.argmax(np.abs(E))
    a = V[:,n]
    return a

def ellipse_center(a):
    b,c,d,f,g,a = a[1]/2, a[2], a[3]/2, a[4]/2, a[5], a[0]
    num = b*b-a*c
    x0=(c*d-b*f)/num
    y0=(a*f-b*d)/num
    return np.array([x0,y0])

def ellipse_angle_of_rotation( a ):
    b,c,d,f,g,a = a[1]/2, a[2], a[3]/2, a[4]/2, a[5], a[0]
    if b == 0:
        if a > c:
            return 0
        else:
            return np.pi/2
    else:
        if a > c:
            return np.arctan(2*b/(a-c))/2
        else:
            return np.pi/2 + np.arctan(2*b/(a-c))/2

def ellipse_axis_length( a ):
    b,c,d,f,g,a = a[1]/2, a[2], a[3]/2, a[4]/2, a[5], a[0]
    up = 2*(a*f*f+c*d*d+g*b*b-2*b*d*f-a*c*g)
    down1=(b*b-a*c)*( (c-a)*np.sqrt(1+4*b*b/((a-c)*(a-c)))-(c+a))
    down2=(b*b-a*c)*( (a-c)*np.sqrt(1+4*b*b/((a-c)*(a-c)))-(c+a))
    #print(up, down1, down2)
    res1=np.sqrt(np.absolute(up/down1)) #added absolute value 1-10-18 MWF
    res2=np.sqrt(np.absolute(up/down2)) #added absolute value 1-10-18 MWF
    #print(res1, res2)
    return np.array([res1, res2])

#added 11/16 MWF

#given an array of points that have been pre-rotated to x,y,z and a pair of z-boundaries in that array
#return features describing that slice of points
#   farthest_point is the max of the distance between all the points in the slice
#   pocket_area is the area of the convex hull; in otherwords, the approx cross-sectional area of the slice
#   pocket_offset is how far the center of the ellipse is from the origin where the points have been rotated to
#   long/short_axis is the ellipse dimensions
def calc_slice_features(pocket_array, z_depth, interval = 1.4):
    upper_slice_boundary = z_depth + interval
    lower_slice_boundary = z_depth - interval
    #the z-coordinate is not considered for any of these calculations; the slice of points is flattened into just x,y space
    pocket_slice = pocket_array[ (pocket_array[:,2] <= upper_slice_boundary) & (pocket_array[:,2] >= lower_slice_boundary)]
    #print(pocket_slice)
    if len(pocket_slice) > 0:
        all_dist = scipy.spatial.distance.cdist(pocket_slice[:,0:2], pocket_slice[:,0:2])
        farthest_point = np.max(all_dist)
        #print("farthest:", farthest_point)
        if len(pocket_slice) > 2:
            #calculate the area of the convex hull that encloses the points
            hull = scipy.spatial.ConvexHull(pocket_slice[:,0:2], qhull_options="QJ")
            #print(hull)
            #fit an ellipse to the hull points
            pocket_area = hull.area
            #print("pocket area", pocket_area)
            a = fitEllipse(hull.points[hull.vertices][:,0], hull.points[hull.vertices][:,1])
            center = ellipse_center(a)
            pocket_offset = scipy.spatial.distance.cdist([[0,0]], [center])
            phi = ellipse_angle_of_rotation(a)
            axes = ellipse_axis_length(a)
            long_axis = max(axes)
            short_axis = min(axes)
            #print(long_axis, short_axis)
            return [float(farthest_point), float(pocket_area), float(pocket_offset[0][0]), float(long_axis), float(short_axis)]
        else:
            return[farthest_point, 0, 0, 0, 0]
    else:
        return [0,0,0,0,0]

##Takes in a pocket pdb file and parses down to the relevant grid points
def process_pocket(pocket_file):
    tp = []
    tpb = []
    tpe = []
    tps = []
    stp = []
    sts = []
    stb = []
    ste = []
    ts = []
    whole_pocket = []

    ##Read in the pocket PDB file
    ##Place the points into the respective lists.
    ##TP should be key target. but, you can often have issues in pits so stp is useful too.
    with open(pocket_file, "r") as pocketPDB:
        for entry in pocketPDB:
            if (entry[0:6] == "ATOM  "):
                m_id = int(entry[8:11])
                x = float(entry[31:38])
                y = float(entry[39:46])
                z = float(entry[47:54])
                resn = entry[13:16].strip() ##???
                t = np.array([x, y, z])

                if (resn == "PR"): #protein
                    pass
                elif (resn == "TP"): #target pocket
                    tp.append(t)
                    whole_pocket.append(t)
                elif (resn == "TPB"): #target pocket buried
                    tpb.append(t)
                    whole_pocket.append(t)
                elif (resn == "TPE"): #target pocket edge
                    tpe.append(t)
                    #whole_pocket.append(t)
                elif (resn == "TPS"): #target pocket surface
                    tps.append(t)
                    whole_pocket.append(t)
                elif (resn == "STP"): #small pocket
                    stp.append(t)
                    whole_pocket.append(t)
                elif (resn == "STS"): #small pocket surface
                    sts.append(t)
                    whole_pocket.append(t)
                elif (resn == "STB"): #small pocket buried
                    stb.append(t)
                    whole_pocket.append(t)
                elif (resn == "TS"): #target surface?
                    ts.append(t)
                    whole_pocket.append(t)
                elif (resn == "STE"): #small pocket edge
                    ste.append(t)
                    #whole_pocket.append(t)
    return [tp, tpb, tpe, tps, stp, sts, stb, ts, whole_pocket] #each of these is a list of np arrays; these can be converted to an n by 3 array with np.asarray(X)

def get_vectors(pocket_defs, origin, closest_res):
    #pocket_defs should be a n by 3 numpy array
    #origin should be a [x, y, z] array
    #closest res is also in the form of an [x, y, z] array

    #Find average of subset of points selected to be the pocket opening, i.e. TP/STP residues in grid file
    ##Average of all points in pocket_defs
    z_target = np.average(pocket_defs, axis = 0)
    #print(z_target)
    #Get Z
    z = z_target - origin
    z /= np.linalg.norm(z)
    #Get X
    x = closest_res - origin
    #print(x)
    #get the orthogonal vector from x
    x -= x.dot(z) * z

    ##Now the cross product to get the Y vector
    y = np.cross(x, z)

    #returns unit vectors of x, y, z
    return [x, y, z, z_target - origin]

def vectors_from_pdb(pocket_grid_name, metal_ion_center, m_closest_residue):
    pocket_defs = process_pocket(pocket_grid_name)
    endpoints = pocket_defs[0] + pocket_defs[4]
    vecs = get_vectors(np.asarray(endpoints), metal_ion_center, m_closest_residue)

    return vecs

#added 11-20-18 MWF
def calc_pocket_features(pdb_id, site_id, pocket, origin, nearest_res_coords, center_of_mass, directory):
    feature_labels = ["farPt", "PocketArea", "Offset", "LongAx", "ShortAx"]
    labels1 = [x+"Low" for x in feature_labels]
    labels2 = [x+"Mid" for x in feature_labels]
    labels3 = [x+"High" for x in feature_labels]
    all_labels = ["SEPocket", "Depth", "Vol", "LongPath"] + labels1 + labels2 + labels3

    target_pocket = pocket[0] + pocket[4] #TP and STP residue codes in pocket pdb - target pocket
    #print(target_pocket)
    if len(target_pocket) == 0:
        if len(pocket[3] + pocket[5]) > 0: #use TPS/STS - pocket surface
            target_pocket = pocket[3] + pocket[5]
            pocket_type = 2
            print("USING TPS residues")
        #print(len(pocket[1]))
        elif len(pocket[1]) > 0: #if we have no TP or STP residues, use the TPB ones instead
            pocket_type = 1
            target_pocket = farthest_residue(center_of_mass, pocket[1]) #rather than the average of all of the TPB,
            print("USING TPB residues from C.O.M.", target_pocket)
        else:
            print("NO SURFACE POCKET")
            if len(pocket[7]) > 0 :
                return(all_labels, [0] * 19)
            else:
                return(all_labels, [4] + [0]*18 )
    else:
        pocket_type = 3

    vectors = get_vectors(np.asarray(target_pocket), origin, nearest_res_coords)
    #print(vectors)
    depth_in_pocket = np.linalg.norm(vectors[-1])
    volume = subprocess.check_output(["sed", "1q;d", "%s/%s_Vol.txt"%(directory, site_id)] )
    #print(volume)
    volume = volume.decode("utf-8").strip().split("\n")[0].split()[-1]
    volume = float(volume) #volume of pocket from pocket_grid
    longest_path_out = scipy.spatial.distance.cdist([vectors[-1]], [origin], "cityblock" )[0][0] #if you traverse a grid from origin to the target pocket, how far is it?

    #vectors[2] is the unit z-axis as returned from get_vectors, [0,0,0] is the origin for this z-axis; the pocket array must be pre-translated
    rotated_pocket = np.asarray(rotate_trans_coords(np.array([0,0,0]), vectors[2], np.array([0, 0, 0]), np.array([0,0,1]), np.asarray(pocket[-1])-origin))

    #the pocket grid is currently done on 0.5A intervals;
    #an interval of 1.4 is approximately the distance between 3 stacked grid points on the diagonal;
    #a slice containing +- an interval should capture a resonable collection of points
    middle_features = calc_slice_features(rotated_pocket, depth_in_pocket/2)
    upper_features = calc_slice_features(rotated_pocket, depth_in_pocket)
    lower_features = calc_slice_features(rotated_pocket, 0)

    this_SITE_features = [pocket_type, depth_in_pocket, volume, longest_path_out]
    this_SITE_features.extend(lower_features)
    this_SITE_features.extend(middle_features)
    this_SITE_features.extend(upper_features)

    #solvent-exposed pocket, depth, vol, long_path, [farthest_point, pocket_area, pocket_offset[0][0], long_axis, short_axis] for low/mid/upper
    return(all_labels, this_SITE_features)

def closest_residue(origin, residue_list):
    distances = residue_list - origin
    nearest_neighbor = residue_list[np.argmin( np.sum(distances**2, axis=1))] #return the row that corresponds to the minimum distance
    return nearest_neighbor

def farthest_residue(origin, residue_list):
    distances = residue_list - origin
    nearest_neighbor = residue_list[np.argmax( np.sum(distances**2, axis=1))] #return the row that corresponds to the minimum distance
    return nearest_neighbor