-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathGO_parseDefinitions.py
More file actions
66 lines (43 loc) · 2.12 KB
/
GO_parseDefinitions.py
File metadata and controls
66 lines (43 loc) · 2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Parse all definitions for go from GO XML
import Config
import sys, string
import MySQLdb
import Database
import gzip
from xml.etree import ElementTree
with Database.db as cursor :
cursor.execute( "TRUNCATE TABLE " + Config.DB_NAME + ".go_definitions" )
cursor.execute( "TRUNCATE TABLE " + Config.DB_NAME + ".go_subsets" )
cursor.execute( "TRUNCATE TABLE " + Config.DB_NAME + ".go_subset_mappings" )
Database.db.commit( )
with gzip.open( Config.GO_DEFINITIONS, 'r' ) as file :
ontology = ElementTree.parse( file ).getroot( )
goSubsets = { }
for element in ontology.findall( 'header/subsetdef' ) :
subsetID = element.find( 'id' ).text.strip( )
subsetDesc = element.find( 'name' ).text.strip( )
cursor.execute( "INSERT INTO " + Config.DB_NAME + ".go_subsets VALUES( '0', %s, %s, NOW( ), 'active' )", [subsetID, subsetDesc] )
goSubsets[subsetID.upper( )] = cursor.lastrowid
Database.db.commit( )
insertCount = 0
for element in ontology.findall( 'term' ) :
goFullID = element.find( 'id' ).text.strip( )
goName = element.find( 'name' ).text.strip( )
goNamespace = element.find( 'namespace' ).text.strip( )
goDefinition = element.find( 'def/defstr' )
if None != goDefinition :
goDefinition = goDefinition.text.strip( )
else :
goDefinition = ""
goShortID = goFullID[3:]
cursor.execute( "INSERT INTO " + Config.DB_NAME + ".go_definitions VALUES( %s, %s, %s, %s, %s, NOW( ), 'active' )", [goShortID, goFullID, goName, goDefinition, goNamespace] )
for subset in element.findall( 'subset' ) :
subsetName = subset.text.strip( )
subsetID = goSubsets[subsetName.strip( ).upper( )]
cursor.execute( "INSERT INTO " + Config.DB_NAME + ".go_subset_mappings VALUES ( '0',%s, %s,'active' )", [goShortID,subsetID] )
insertCount = insertCount + 1
if 0 == (insertCount % Config.DB_COMMIT_COUNT ) :
Database.db.commit( )
cursor.execute( "INSERT INTO " + Config.DB_STATS + ".update_tracker VALUES ( '0', 'GO_parseDefinitions', NOW( ) )" )
Database.db.commit( )
sys.exit( )