-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpySPL.py
More file actions
124 lines (104 loc) · 5.03 KB
/
pySPL.py
File metadata and controls
124 lines (104 loc) · 5.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from lxml import etree
from datetime import date
#http://www.accessdata.fda.gov/spl/stylesheet/spl-common.xsl
namespaces={"v3":"urn:hl7-org:v3",}
def normalize_date(date_string):
year = int(date_string[0:4])
month = int(date_string[4:6])
day = int(date_string[6:8])
return date(year,month,day).strftime("%b %d, %Y")
class DrugLabel(object):
"""represents a Drug Label in the SPL format.
takes one argument, spl_label, which can be either an url or a file path"""
def __init__(self, spl_label):
self.label_data = spl_label
self.xml = etree.parse(spl_label)
def actives(self):
"""returns a list of active compounds"""
#here converting to a set removes duplicates
return sorted(list(set(active.text for active in self.xml.xpath("//v3:ingredientSubstance/v3:activeMoiety/v3:activeMoiety/v3:name",namespaces=namespaces))))
actives.label = "active cmpds"
def start_date(self):
"""returns start marketing date as a strftime formatted python date object"""
date_string = self.xml.xpath("//v3:subjectOf/v3:marketingAct/v3:effectiveTime/v3:low/@value",namespaces=namespaces)[0]
return normalize_date(date_string)
start_date.label = "marketing start date"
# don't bother - it's None for all labels
# end_date.label = "marketing end date"
# def end_date(self):
# """returns end marketing date as a strftime formatted python date object or the string "None if not defined
# refers to the expiration date of the last lot released to the market
# (from http://spl-work-group.wikispaces.com/file/view/creating_otc_sp_documentsl.pdf)"""
# try:
# date_string = self.xml.xpath("//v3:subjectOf/v3:marketingAct/v3:effectiveTime/v3:high/@value",namespaces=namespaces)[0]
# return normalize_date(datestring)
# except:
# return "None"
def marketing_category(self):
"""returns the marketing category"""
return self.xml.xpath("//v3:subjectOf/v3:approval/v3:code/@displayName",namespaces=namespaces)[0]
marketing_category.label = "marketing category"
def revision_date(self):
"""returns label revision date"""
date_string = self.xml.xpath("/v3:document/v3:effectiveTime/@value",namespaces=namespaces)[0]
return normalize_date(date_string)
revision_date.label = "revision date"
def label_type(self):
"""returns the drug label type, typically 'HUMAN OTC DRUG LABEL' or 'HUMAN PRESCRIPTION DRUG LABEL' """
return self.xml.xpath("//v3:code/@displayName",namespaces=namespaces)[0]
label_type.label = "label type"
def ndc(self):
"""returns the drug's NDC number"""
#this xpath query is NOT from the SPL xsl file
return self.xml.xpath("//v3:manufacturedProduct/v3:manufacturedProduct/v3:code/@code",namespaces=namespaces)[0]
ndc.label = "ndc"
def name(self):
"""returns the drug's name"""
return self.xml.xpath("//v3:manufacturedProduct/v3:manufacturedProduct/v3:name",namespaces=namespaces)[0].text.replace("\t","").replace("\n","")
name.label = "name"
def distributor(self):
"""returns the drug's distributor"""
return self.xml.xpath("//v3:author/v3:assignedEntity/v3:representedOrganization/v3:name",namespaces=namespaces)[0].text
distributor.label = "distributor"
def dosage_form(self):
"""returns the drug's dosage form"""
return self.xml.xpath("//v3:manufacturedProduct/v3:manufacturedProduct/v3:formCode/@displayName",namespaces=namespaces)[0]
dosage_form.label = "dosage form"
#just a helper function for the next two functions, so no label
def _get_word_list(self, word):
"""returns a list of etree instances of all occurances of 'word','Word' or 'WORD' """
word = str(word)
#this query also NOT from the SPL xsl file
query = "//*[text()[contains(.,'%s') or contains(.,'%s') or contains(.,'%s')]]" %(word.lower(),word.upper(),word.capitalize())
return self.xml.xpath(query,namespaces=namespaces)
def test_word(self, word):
if self._get_word_list(word): return 1
else: return 0
#TODO test_word.label = "%s?" %self.test_word.word
def get_word_section(self, word):
#finds the first ancestor section and returns section/code/@displayName
word_section_list = []
for word in self._get_word_list(word):
word_section_list.extend(word.xpath("ancestor::v3:section[1]/v3:code/@displayName",namespaces=namespaces))
if not word_section_list:
return "n/a"
return list(set(word_section_list))
def get_word_time(self, word):
"""
returns the LATEST effectiveTime/@date for all of the instances of "word" mentioned
"""
word_time_list = []
for word in self._get_word_list(word):
word_time_list += word.xpath("ancestor::v3:section/v3:effectiveTime/@value",namespaces=namespaces)
if not word_time_list:
return "n/a"
try:
return max(list(set(normalize_date(date) for date in word_time_list)))
except:
return max(list(normalize_date(date) for date in word_time_list))
def build_url(self):
"""helper function that builds and returns the accessdata.fda.gov URL given the XML file name/directory"""
#maybe won't work on windows because slash direction?
uuid = self.label_data.split("/")[-1].split(".")[0]
return "http://www.accessdata.fda.gov/spl/data/%s/%s.xml" %(uuid,uuid)
build_url.label = "url"