-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxdfetchfiles.py
More file actions
174 lines (131 loc) · 5.13 KB
/
xdfetchfiles.py
File metadata and controls
174 lines (131 loc) · 5.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import numpy as np
from astropy.time import Time
import os
def update_BAT_list(fluxmin=20):
import requests
import lxml.html as lh
import pandas as pd
import numpy as np
from datetime import date
from astropy.time import Time
url='https://swift.gsfc.nasa.gov/results/transients/BAT_current.html'
#Create a handle, page, to take in the contents of the website
page = requests.get(url)
#Store the contents of the website under doc
doc = lh.fromstring(page.content)
#Parse data that are stored between <tr>..</tr> of HTML
tr_elements = doc.xpath('//tr')
#Create empty list
col=[]
i=0
#For each row, store each first element (header) and an empty list
for t in tr_elements[0]:
i+=1
name=t.text_content()
print (i,name)
col.append((name,[]))
#i is the index of our column
i=0
#Since out first row is the header, data is stored on the second row onwards
for j in range(1,len(tr_elements)):
#T is our j'th row
T=tr_elements[j]
#If row is not of size 13, the //tr data is not from our table
if len(T)!=13:
break
#i is the index of our column
i=0
#Iterate through each element of the row
for t in T.iterchildren():
data=t.text_content()
#Check if row is empty
if i>0:
#Convert any numerical value to integers
try:
data=int(data)
except:
pass
#Append the data to the empty list of the i'th column
col[i][1].append(data)
#Increment i for the next column
i+=1
# Define a dataframe to hold the entries
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)
#### Construct an outputtable array for today's BAT transient list
#Get the data
f1 = np.array(df['Today#'])
f2 = np.array(f1) #initialize
f3 = np.array(f1) #initialize
# Pull out entries with fluxes
for i in range(len(f1)):
f=f1[i]
n=f.find('(')-1
if n > 0:
f3[i]='True'
f2[i]=float(f[0:n]) #Flux lives here
else:
f3[i]='False'
# Screen to retain only those at F > fluxmin (definable)
for i in range(len(f3)):
if f3[i] == 'True':
if float(f2[i]) < fluxmin:
f3[i] = 'False'
FF = f2[f3 == 'True'] # the selected flux list
src = np.array(df['Source Name'])[f3 == 'True'] #selected source list etc...
RA = (np.array(df['RA J2000 Degs'])[f3 == 'True'])
DEC = (np.array(df['Dec J2000 Degs'])[f3 == 'True'])
AltName = np.array(df['Alternate Name'])[f3 == 'True']
stype = np.array(df['Source Type'])[f3 == 'True']
for i in range(len(RA)):
RA[i] = float(RA[i])
DEC[i] = float(DEC[i])
# Cleaning up the name formatting
for i in range(len(src)):
src[i]=src[i].replace(' \xa0 ','')
src[i]=src[i].replace(' ','')
if src[i][-1] == ' ':
src[i]=src[i][0:len(src[i])-1]
src[i]=src[i].replace(' ','_')
for i in range(len(src)):
AltName[i]=AltName[i].replace(' ','')
if AltName[i] != '':
if AltName[i][-1] == ' ':
AltName[i]=AltName[i][0:len(AltName[i])-1]
AltName[i]=AltName[i].replace(' ','_')
for i in range(len(src)):
stype[i]=stype[i].replace(' \xa0 ','')
stype[i]=stype[i].replace(' ','')
if stype[i][-1] == ' ':
stype[i]=stype[i][0:len(stype[i])-1]
stype[i]=stype[i].replace(' ','_')
# Getting today's date in mjd to label the table name
dstamp=str(int(Time.now().mjd))
fname='BAT_record_'+dstamp+'.dat'
# Write out today's table
fil=open(fname,"w+")
fil.write('%15a %15a %10a %30a %30a %20a \n' % ('RA', 'DEC', 'Flux(Crab)', 'Source', 'AltName', 'Type'))
for i in range(len(src)):
fil.write('%15.3f %15.3f %10.4f %30a %30a %20a \n' % (RA[i], DEC[i], FF[i]/1000., src[i], AltName[i], stype[i]))
fil.close()
def update_maxi_list(): # INGEST NEW MAXI TOPFLUX TABLES
import numpy as np
import subprocess
from astropy.time import Time
import glob
# What is today's MJD?
tnow=Time.now()
mjdnow=int(tnow.mjd)
# get and sort the present list of fluxtop files
flist=glob.glob('fluxtop*dat')
fsrt = flist.sort()
# ID the latest file
maxdate=int(flist[-1].replace('fluxtop','').replace('.dat','') )
####### Potential troubleshooting here:
## print(['wget fluxtop'+str(foo)+'.dat' for foo in np.arange(maxdate,mjdnow)])
## print('wget fluxtop'+np.arange(maxdate,mjdnow)+'.dat')
#############################################
## start a shell process to grab any newer fluxtop entries
for el in np.arange(maxdate,mjdnow+1):
print('fetching fluxtop'+str(el)+'.dat')
subprocess.Popen('wget http://maxi.riken.jp/fluxtop/fluxtop'+str(el)+'.dat',shell=True)