ToyConPy/FigureCreationScript.py at master · csbl/ToyConPy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
"""This python script generates pdf documents that contain the figures associated with the ToyCon1 model. Such figures
are summaries of the reactions, genes, and simulations performed (gene KOs, FVA). Running this script requires that
the following packages are installed:
    COBRApy (made in v6.1)
    smbl
    numpy
    scipy
    pandas
    csv
    matplotlib
The program also assumes that the script SMBLToyConModelCreator.py has been previously run."""
import cobra.io
import cobra.core.arraybasedmodel
import numpy
import pandas
import csv
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors
from matplotlib.backends.backend_pdf import PdfPages

model = cobra.io.read_sbml_model('ToyCon.xml')#Read in toycon model
model.solver = 'gurobi' #set solver
model.optimize() #optimize model

Amodel = model.to_array_based_model() #change to arraybased model to extract S matrix
smatrix = numpy.matrix(Amodel.S) #extract S matrix

#make S matrix with names
sdataName = pandas.DataFrame(Amodel.S.toarray(),[x.name for x in model.metabolites],[x.name for x in model.reactions])#create data frame object
pandas.DataFrame.to_csv(sdataName,'toycon1_smatrix_names.txt',index = True,sep = ' ',quoting=csv.QUOTE_NONE,header=True,escapechar=' ',float_format='%d') #print S matrix as txt file

#make S matrix with IDs
sdataID = pandas.DataFrame(Amodel.S.toarray(),[x.id for x in model.metabolites],[x.id for x in model.reactions])#create data frame object
pandas.DataFrame.to_csv(sdataID,'toycon1_smatrix_id.txt',index = True,sep = ' ',quoting=csv.QUOTE_NONE,header=True,escapechar=' ',float_format='%d') #print S matrix as txt file

#creat a reaction decomposition of the S matrix where one reaction is represented as multiple rows
rxnDecompDict = {}
z = 0
for x in model.reactions:
    for y in x.metabolites: #create dictionary
        rxnDecompDict[z] = str.split('%s %s %d %s %s %s %s' % (y.id,x.id,x.get_coefficient(y.id),y.compartment,y.id[:-1],y.name,y.name+'['+y.compartment+']'),' ')
        rxnDecompDict[z][2] = int(rxnDecompDict[z][2])
        z += 1

toycon1_rxn_decomposition = pandas.DataFrame.from_dict(rxnDecompDict,orient='index') #create datafrane
toycon1_rxn_decomposition.columns=['met_id','rxn_id','coeff','met_compartment','met_compound','cpd_name','met_name']#add column labels
toycon1_rxn_decomposition.to_csv('toycon1_rxn_decomposition.txt',sep = ' ',float_format='%d')#output txt file
print 'The first 6 entries of the toycon1_rxn_decomposition'
print toycon1_rxn_decomposition.head(6) #print the first few rows

#create reaction formulas
rxnInfoDict = {} #create new dictionary
z=0
for x in model.reactions:
    temp = str.replace(x.build_reaction_string(True),'.0','')
    temp = temp.replace('-','=')
    rxnInfoDict[z] = [x.id,x.name,x.lower_bound,x.upper_bound,temp] #add key value pairs
    z+=1

toycon1_rxn_info = pandas.DataFrame.from_dict(rxnInfoDict,orient = 'index') #create data frame
toycon1_rxn_info.columns = ['rxn_id','rxn_name','lb','ub','rxn_formula'] #write columns
toycon1_rxn_info.to_csv('toycon1_rxn_info.txt',sep = ' ',float_format='%d',quoting=csv.QUOTE_NONE,escapechar=' ')#output formatted txt file
print 'The first 6 entries of the toycon1_rxn_info'
print toycon1_rxn_info.head(6) #output first 6 lines


print 'Objective Reaction is: '
print toycon1_rxn_info[toycon1_rxn_info.rxn_id == 'R4'].rxn_id + '  ' + toycon1_rxn_info[toycon1_rxn_info.rxn_id == 'R4'].rxn_formula##Print objective reaction

#### Function to transform positive and negative coefficient values into 1 for positive and .5 for negative.
def sign(x):
    ma = max(x)
    mi = min(x)
    result = []
    for y in x:
        if(y > 0):
            result.append(1)
        else:
            result.append(.5)
    return result
#create indexing for rxn_id and met_name
rxnUniques,indexRxn,invRxn=numpy.unique(toycon1_rxn_decomposition['rxn_id'].values,return_inverse=True,return_index=True,)
metUniques,indexMet,invMet=numpy.unique(toycon1_rxn_decomposition['met_name'].values,return_inverse=True,return_index=True)

#Create dictionaries for formatting table
rxnID2Name = pandas.Series(toycon1_rxn_info.rxn_name.values,index = toycon1_rxn_info.rxn_id).to_dict()
metName2int = pandas.Series([(len(model.metabolites)-1) - x for x in range(len(model.metabolites))],index = [x.name+'['+x.compartment+']' for x in model.metabolites]).to_dict()
int2metName = pandas.Series([x.name+'['+x.compartment+']' for x in model.metabolites],index = [(len(model.metabolites)-1) - x for x in range(len(model.metabolites))]).to_dict()

#create y coordinates for table from metabolic names
yCor = [metName2int[x] for x in toycon1_rxn_decomposition['met_name'].values]
#set coloring scheme
coloring = {.5 : 'dodgerblue',1:'firebrick'}
temp = invRxn
for i in range(len(invRxn)):
    if invRxn[i] < 5:
        invRxn[i] += 4
    else:
        invRxn[i]  -= 5
temp = list(rxnUniques)
rxnUniques = list(rxnUniques)
rxnUniques[0:4] = temp[5:9]
rxnUniques[4:9] = temp[0:5]

#create data
toycon1_rxn_decomposition2 = pandas.DataFrame({
    'w' : sign(toycon1_rxn_decomposition['coeff'].values),
    'y' : yCor,
    'x' : invRxn,
    'l' : toycon1_rxn_decomposition['coeff'].values
})

fig = plt.figure()  ##Create S matrix figure and save as a pdf
#Create 2D histogram from metabolites and reactions
counts, xedge, yedge, imag = plt.hist2d(bins = [len(rxnUniques),len(metUniques)],x=toycon1_rxn_decomposition2['x'].values,y=toycon1_rxn_decomposition2['y'].values,normed = False,weights = toycon1_rxn_decomposition2['w'].values,cmap = matplotlib.colors.LinearSegmentedColormap.from_list("custom",[(0,'White'),(.5,'dodgerblue'),(1,'firebrick')]))
#format axis labels and ticks
plt.yticks(range(len(metUniques)),[int2metName[x] for x in range(len(metUniques))])
plt.xticks([x * .95 for x in range(len(rxnUniques))],[rxnID2Name[x] for x in rxnUniques],rotation = 50)

#add the coefficient as the label
[plt.text(xedge[x]+.5,yedge[y]+.5,l,color = 'White',ha = 'center', va = 'center') for x,y,l in zip(toycon1_rxn_decomposition2['x'].values,toycon1_rxn_decomposition2['y'].values,toycon1_rxn_decomposition2['l'].values)]
plt.subplots_adjust(bottom = .25)
plt.tick_params(axis = u'both',which = u'both',length = 0)
pp = PdfPages('toycon1_smatrix.pdf') #save figure
pp.savefig(fig)
pp.close()

#function to run FVA with inputs of a required percentage, model, and table to add results

def ef_tbl_fva(pct,model,tbl = None):
    tbl = tbl.copy()
    fvaRes = cobra.flux_analysis.flux_variability_analysis(model,model.reactions[:],fraction_of_optimum=pct) #perform FVA
    ub = fvaRes.maximum.values
    lb = fvaRes.minimum.values #gather bounds
    n = len(ub)
    tbl.insert(1,'fva_lb', lb)
    tbl.insert(2,'fva_ub', ub) #insert bounds into the dataframe
    tbl['fva_pct'] = list([int(z+pct*100) for z in numpy.zeros((n,1),numpy.int64)]) #insert percentage of obj. fun. into dataframe
    def fva_req(u,l): #Function to determine if FVA is required(if one of the upper or lower bounds in either greater than 10^9 or -10^9 respectivly
        result = []
        for x,y in zip(u,l):
            if y > 1*10**-9 or x < -1*10**-9:
                result.append(True)
            else:
                result.append(False)
        return result
    tbl['fva_req'] = fva_req(ub,lb) #insert column with the determination
    def fva_on(u, l): #determine if FVA is on (one of the upper or lower bound must have an absolute value greater than 10^-9
        result = []
        for x, y in zip(u, l):
            if abs(y) > 1 * 10 ** -9 or abs(x) > 1 * 10 ** -9:
                result.append(True)
            else:
                result.append(False)
        return result
    tbl['fva_on'] = fva_on(ub,lb)
    return tbl

fva_pct_result = ef_tbl_fva(0,model,toycon1_rxn_info)#perform unconstrained FBA for initial point
for x in [(y+1)*.05 for y in range(20)]:
    fva_pct_result = fva_pct_result.append(ef_tbl_fva(x,model,toycon1_rxn_info),ignore_index = True) #perform FVA for 5-100% of obj. fun.
fva_pct_result = fva_pct_result.sort_values(by = 'rxn_id') #sort results by rxn_id
fva_pct_result = fva_pct_result.reset_index(drop=True)
print fva_pct_result.head() #print first few lines
#output result to text file
fva_pct_result.to_csv('toycon1_fva_result_percentage.txt',sep= ' ',float_format='%.2f', quoting = csv.QUOTE_NONE,escapechar=' ',index = False)#output fva result

#coloring function
def coloring(on,req):
    if on and req:
        return "firebrick"
    if not req and on:
        return "Grey"
    else:
        return "dodgerblue"
#### Make fva_percentage plot

fig = plt.figure()
toPlot = ['R1','R2']#the two reaction to plot the FVA result dependency on the percentage of the objective function
i =1
for z in toPlot:
    plt.subplot(1,2,i)
    req = fva_pct_result.loc[fva_pct_result['rxn_id'] == z]['fva_req'].values#grab fva_req value
    on = fva_pct_result.loc[fva_pct_result['rxn_id'] == z]['fva_on'].values#grab fva_on values
    xcoordl = fva_pct_result.loc[fva_pct_result['rxn_id'] == z]['fva_lb'].values
    xcoordu = fva_pct_result.loc[fva_pct_result['rxn_id'] == z]['fva_ub'].values#Grab upper, lower bounds,  and perctage
    ycoord = fva_pct_result.loc[fva_pct_result['rxn_id'] == z]['fva_pct'].values
    plt.xlabel("Units of Flux Through Reaction")
    plt.ylabel("Required Flux Through Obj. Fun.") #plot results
    plt.xlim(-0.2,2.2)
    plt.title(rxnID2Name[z])
    plt.scatter(xcoordl,ycoord,color = [coloring(x,y) for x,y in zip(on,req)],marker="s")
    plt.scatter(xcoordu,ycoord,color = [coloring(x,y) for x,y in zip(on,req)],marker = "D")
    plt.hlines(ycoord,xcoordl,xcoordu,color = [coloring(x,y) for x,y in zip(on,req)])
    i +=1
fig.tight_layout()#adjust layout
pp = PdfPages('toycon1_fva_percentage.pdf')#save figure
pp.savefig(fig)
pp.close()
### perform FVA with atp incremented requirements
fva_inc_result = ef_tbl_fva(0,model,toycon1_rxn_info) #perform initial simulation
for x in [(y+1)/16. for y in range(16)]:
    fva_inc_result = fva_inc_result.append(ef_tbl_fva(x,model,toycon1_rxn_info),ignore_index = True) #perform rest of simulation incrementing by 2 atp
fva_inc_result = fva_inc_result.sort_values(by = 'rxn_id')
fva_inc_result = fva_inc_result.reset_index(drop=True) #sort results
print fva_inc_result.head() #print first few lines
#save results
fva_inc_result.to_csv('toycon1_fva_result_increment.txt',sep= ' ',float_format='%.2f', quoting = csv.QUOTE_NONE,escapechar=' ',index = False)#output fva result

#### Make fva_inc plot

fig = plt.figure()
toPlot = ['R1','R2']
i =1
for z in toPlot:
    plt.subplot(1,2,i)
    req = fva_inc_result.loc[fva_inc_result['rxn_id'] == z]['fva_req'].values
    on = fva_inc_result.loc[fva_inc_result['rxn_id'] == z]['fva_on'].values#grab values as above
    xcoordl = fva_inc_result.loc[fva_inc_result['rxn_id'] == z]['fva_lb'].values
    xcoordu = fva_inc_result.loc[fva_inc_result['rxn_id'] == z]['fva_ub'].values
    ycoord = [y*32/100. for y in fva_inc_result.loc[fva_inc_result['rxn_id'] == z]['fva_pct'].values] #plot results
    plt.xlim(-0.2,2.2)
    plt.xlabel("Units of Flux Through Reaction")
    plt.ylabel("Required # of ATP Produced.")
    plt.title(rxnID2Name[z])
    plt.scatter(xcoordl,ycoord,color = [coloring(x,y) for x,y in zip(on,req)],marker="s")
    plt.scatter(xcoordu,ycoord,color = [coloring(x,y) for x,y in zip(on,req)],marker = "D")
    plt.hlines(ycoord,xcoordl,xcoordu,color = [coloring(x,y) for x,y in zip(on,req)])
    i +=1
fig.tight_layout()
pp = PdfPages('toycon1_fva_increment.pdf')
pp.savefig(fig) #save figure
pp.close()
#create fva_inc plot for all reactions
fig = plt.figure()
toPlot = [y.id for y in model.reactions]#grab all reactions
i =1
matplotlib.rc('xtick', labelsize=4)
matplotlib.rc('ytick', labelsize=4)
for z in toPlot:
    plt.subplot(3,3,i)
    #grab appropriate data, and plot the results
    req = fva_inc_result.loc[fva_inc_result['rxn_id'] == z]['fva_req'].values
    on = fva_inc_result.loc[fva_inc_result['rxn_id'] == z]['fva_on'].values
    xcoordl = fva_inc_result.loc[fva_inc_result['rxn_id'] == z]['fva_lb'].values
    xcoordu = fva_inc_result.loc[fva_inc_result['rxn_id'] == z]['fva_ub'].values
    ycoord = [y*32/100. for y in fva_inc_result.loc[fva_inc_result['rxn_id'] == z]['fva_pct'].values]
    plt.title(rxnID2Name[z],fontsize = 10)
    plt.xlabel("Units of Flux Through Reaction",fontsize  = 5)
    plt.ylabel("Required # of ATP Produced.",fontsize = 5)
    plt.scatter(xcoordl,ycoord,color = [coloring(x,y) for x,y in zip(on,req)],marker="s",s = 3)
    plt.scatter(xcoordu,ycoord,color = [coloring(x,y) for x,y in zip(on,req)],marker = "D",s = 3)
    plt.hlines(ycoord,xcoordl,xcoordu,color = [coloring(x,y) for x,y in zip(on,req)],linewidths = .2)
    i +=1
fig.tight_layout()#format layout
pp = PdfPages('toycon1_fva_increment_all.pdf')#save result
pp.savefig(fig)
pp.close()
######Perform Single Gene Deletion######
res = cobra.flux_analysis.single_gene_deletion(model)
rxnIDs = []
for x in res.index.values:
    for y in model.reactions:
        if y.gene_name_reaction_rule == x:
            rxnIDs.append(y.id) #match reactions and flux changes, to create same ordering
res.insert(2,'rxn_id',rxnIDs) #insert matching rxn IDs
res = res.sort_values('rxn_id') #sort values
toycon1_gene_ko = toycon1_rxn_info.copy() #make copy of rxn_info dataframe
toycon1_gene_ko.insert(0,'gene_ko_atp',res['flux'].values) #insert results into dataframe
toycon1_gene_ko.insert(0,'gene_id',res.index.values)
print toycon1_gene_ko #print dataframe
#save result
toycon1_gene_ko.to_csv('toycon1_gene_knockout_screen.txt',sep= ' ',float_format='%d', quoting = csv.QUOTE_NONE,escapechar=' ',index = False)#output ko result

### Double Gene Deletions#########

res2ko = cobra.flux_analysis.double_gene_deletion(model,return_frame=True,number_of_processes = 1)#perform double gene deletions (# of processes is dependent on system's cpu count
print res2ko#print result
g1 = res2ko.index.values.tolist()#make gene lists
g2 = res2ko.columns.values.tolist()
gene_ko2_rxnsT = pandas.DataFrame(columns = ['genes','rxn1','rxn2','name1','name2','rxns','names','atp'])#create new data frame
i = 0
for x in range(len(g1)):
    for y in range(x):
        temp = list()
        temp.append(g1[x]+'_'+g2[y])#gather genes deleted
        for z in model.reactions:
            if z.gene_name_reaction_rule == g2[y] or z.gene_name_reaction_rule == g1[x]:
                temp.append(z.id) #get matching reaction
        for z in model.reactions:
            if z.gene_name_reaction_rule == g2[y] or z.gene_name_reaction_rule == g1[x]:
                temp.append(z.name) #get matching reaction
        temp.append(temp[1]+'_'+temp[2]) #append combined gene names
        temp.append(temp[3]+' / '+temp[4]) #append combines rxn names
        temp.append(round(res2ko.iloc[x,y],1)) #append result of deletion
        gene_ko2_rxnsT.loc[i] = temp #add to dataframe
        i += 1
gene_ko2_rxns = gene_ko2_rxnsT.copy().drop('atp',1).sort_values("genes").reset_index(drop=True)#sort values
gene_ko2_rxns.to_csv('toycon1_gene_2ko_rxns.txt',sep= ' ',float_format='%d', quoting = csv.QUOTE_NONE,escapechar=' ',index = False)#output ko result

print gene_ko2_rxns.head()#print first few lines of dataframe

gene_ko2_tbl = gene_ko2_rxnsT.copy().drop('atp',1)
gene_ko2_tbl.insert(1,'atp',gene_ko2_rxnsT['atp'].values)#move ordering of dataframe to make new dataframe
atp1 = []
atp2 = []
for x,y in zip(gene_ko2_tbl['rxn1'].values,gene_ko2_tbl['rxn2'].values):
    atp1.append(toycon1_gene_ko.loc[toycon1_gene_ko["rxn_id"]== x]['gene_ko_atp'].values[0])#find matching atp affect for individual ko
    atp2.append(toycon1_gene_ko.loc[toycon1_gene_ko["rxn_id"]== y]['gene_ko_atp'].values[0])
gene_ko2_tbl.insert(8,"atp1",atp1)
gene_ko2_tbl.insert(9,"atp2",atp2)#insert results
gene_ko2_tbl.insert(10,"atp12",gene_ko2_tbl['atp'].values) #insert double ko results

gene_ko2_tbl = gene_ko2_tbl.sort_values("rxns").reset_index(drop = True) #sort dataframe
print gene_ko2_tbl.head() #print first few liens
gene_ko2_tbl.to_csv('toycon1_gene_2ko_tbl.txt',sep= ' ',float_format='%d', quoting = csv.QUOTE_NONE,escapechar= ' ',index = False) #save the result
filtered_ko2 = gene_ko2_tbl.query('atp12 < atp1 and atp12 < atp2').reset_index(drop=True) #filter results to show reactions where the double deletion had a greater effect than either single deletion
print filtered_ko2 #print filtered results

plt.show() #display figures