forked from OpenCLIM/tools-clip
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
430 lines (340 loc) · 16.4 KB
/
main.py
File metadata and controls
430 lines (340 loc) · 16.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
import subprocess
from os import listdir, getenv, mkdir, remove, walk
from os.path import isfile, join, isdir
from pathlib import Path
import logging
import glob
import json
from shutil import rmtree
import rasterio
import geopandas as gpd
import string
import random
def check_output_dir(path):
"""
Check output directory exists and create if not
"""
if isdir(path) is False:
mkdir(path)
else:
files = [f for f in listdir(path) if isfile(join(path, f))]
for file in files:
remove(join(path,file))
return
def output_file_name(input_name, output_name, number_of_input_files):
"""
Set the name for the output file from each clip process
If more than 1 input file to be clipped, default behaviour should be used
If only one input file passed, and output file name not set, use default behavior
If only one input file passed, and the output file name is passed, use output file name
"""
input_name, input_extension = input_name.split('.')
if number_of_input_files > 1 or output_name is None:
output_file = input_name + '_clip.' + input_extension
else:
output_file = output_name
return output_file
def fetch_clip_file():
"""
Check the clip extents directory for a file to clip the input data with.
Return None is no file found.
"""
clip_file = [] # set in case no file is passed
extensions = ['gpkg', 'shp', 'txt']
for extension in extensions:
for file in glob.glob(join(data_path, input_dir, clip_extent_dir, "*.%s" % extension)):
clip_file.append(file)
return clip_file
def get_data_type(file, vector_types, raster_types):
"""
Get the data type, raster or vector, of the clip file
"""
# get the file extension to identify data type
extension_text = file.split('.')[-1]
# set the data type
if extension_text in raster_types:
return 'raster'
elif extension_text in vector_types:
return 'vector'
else:
return None
def filter_input_files(input_file_list, file_extensions):
"""
Get those files from the list of input files where the file extensions is recognised as a raster or vector data type
"""
verified_file_list = []
# loop through the files
for file in input_file_list:
# fetch file extension
file_extension = file.split('.')[-1].lower()
# check if file extension in defined set of usable file types
if file_extension in file_extensions:
verified_file_list.append(file)
return verified_file_list
def find_extents_file(name, path):
for root, dirs, files in walk(path):
if name in files:
return join(root, name)
def get_crs_of_data(file, vector=False):
"""
Find the crs of the file. Checks that it exists and return it for any further required checks.
"""
if vector is False:
info = subprocess.run(["gdalinfo", "-json", file], stdout=subprocess.PIPE)#.stdout.splitlines()
print('**********')
#print(info.stdout.decode("utf-8"))
info = info.stdout.decode("utf-8")
#info = info.replace('\n','')
info_ = json.loads(info)
#print(info_.keys())
if 'coordinateSystem' in info_.keys():
proj = info_['coordinateSystem']['wkt'].split(',')[0].replace('PROJCRS[', '').replace('"', '')
else:
# no projection information available
proj = None
elif vector is True:
info = subprocess.run(["ogrinfo", "-ro", "-so", "-al", file], stdout=subprocess.PIPE)#, "glasgow_city_centre_lad"])
info = info.stdout
info = info.decode("utf-8").split('\n')
for line in info:
if 'PROJCRS' in line:
proj = line.replace('PROJCRS[', '').replace('"', '').replace(',', '')
return proj
# file paths
data_path = '/data'
input_dir = 'inputs'
clip_extent_dir = 'clip_extent'
data_to_clip_dir = 'clip'
output_dir = 'outputs'
# check input directories exist and create if not
#check_output_dir(join(data_path, input_dir))
#check_output_dir(join(data_path, input_dir, clip_extent_dir))
#check_output_dir(join(data_path, input_dir, data_to_clip_dir))
# check output dir exists and create if not
check_output_dir(join(data_path, output_dir))
# chek dir for log file exists
#check_output_dir(join(data_path, output_dir, 'log'))
logger = logging.getLogger('tool-clip')
logger.setLevel(logging.INFO)
log_file_name = 'tool-clip-%s.log' %(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)))
fh = logging.FileHandler( Path(join(data_path, output_dir)) / log_file_name)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
logger.addHandler(fh)
logger.info('Log file established!')
# a list of default options
defaults = {
'output_crs': '27700',
'crs_bng' : 'OSGB 1936 / British National Grid',
'cut_to_bounding_box': True
}
raster_accepted = ['asc', 'tiff', 'geotiff', 'jpeg']
vector_accepted = ['shp', 'gpkg', 'geojson']
# get folder structure
logger.info(glob.glob(join(data_path,'*'), recursive=True))
logger.info(glob.glob('---'))
logger.info(glob.glob(join(data_path,input_dir,'*'), recursive=True))
# get input file(s) - the data to clip
input_files = [f for f in listdir(join(data_path, input_dir, data_to_clip_dir)) if isfile(join(data_path, input_dir, data_to_clip_dir, f))]
if len(input_files) == 0:
print('Error! No input files found! Terminating')
logger.info('Error! No input files found! Terminating!')
exit(2)
logger.info('Input files found: %s' %input_files)
# get input files(s)
input_files = filter_input_files(input_files, vector_accepted+raster_accepted)
if len(input_files) == 0:
print('Error! No input files given specified data format! Terminating!')
logger.info('Error! No input files given specified data format! Terminating!')
exit(2)
logger.info('Verified input files: %s' %input_files)
# get extents for clip - file or defined extents
# clip area file
# checks the dataslot
clip_file = fetch_clip_file()
print('clip files is:', clip_file)
logger.info('Clip file: %s' % clip_file)
# check if file passed from previous step
outcome = [find_extents_file('extents.txt', data_path)]
logger.info(outcome)
print('Outcome is:', outcome)
if outcome[0] is not None:
clip_file = outcome
logger.info('Clip file set to:', clip_file)
# defined extents
extent = None
if len(clip_file) == 0: #if not files passed expect an env to be passed defiing the extents
extent = getenv('extent')
if extent == '' or extent == 'None': # if no extent passed
extent = None
print('Extent: %s' % extent)
logger.info('Extent: %s' % extent)
# if no extent string set no, presume file is passed and read in. if no file, return an error and exit
if extent is None and len(clip_file) == 1 and clip_file != None:
# if a text bounds file passed, convert to extent text so can use that existing method
# xmin,ymin,xmax,ymax
print('reading extents file')
cf_ext = clip_file[0].split('.')[1]
print(cf_ext)
if cf_ext == 'txt':
with open(join(data_path, input_dir, clip_file[0])) as ef:
extent = ef.readline()
clip_file = None
print('Extent set as:', extent)
print('Clip file is:', clip_file)
if extent is None and clip_file is None:
# if neither a clip file set or an extent passed
print('Error! No clip_file var or extent var passed. Terminating!')
logger.info('Error: No clip file found and no extent defined. At least one is required. Terminating!')
exit(2)
# get if cutting to shapefile or bounding box of shapefile (if extent shapefile passed)
clip_to_extent_bbox = getenv('clip_to_extent_bbox')
if clip_to_extent_bbox is None:
cut_to_bounding_box = defaults['cut_to_bounding_box']
elif clip_to_extent_bbox == 'clip-to-bounding-box':
cut_to_bounding_box = True
elif clip_to_extent_bbox == 'clip-to-vector-outline':
cut_to_bounding_box = False
else:
print(clip_to_extent_bbox)
# output file - this is only used if a single input file is passed
output_file = getenv('output_file')
print('Output file: %s' % output_file)
logger.info('Output file: %s' % output_file)
if len(input_files) > 1:
logger.info('Setting output file var as None as more than one input file passed')
output_file = None
elif output_file is None or output_file == 'None':
logger.info('No output file var passed')
output_file = None
elif output_file[0] == '' or output_file == '[]': # needed on DAFNI
print('Warning! Empty output file var passed.')
logger.info('Empty output file var passed')
output_file = None
# get save_logfile status
save_logfile = getenv('save_logfile') # get the type of data to be clipped. raster or vector
if save_logfile is None: # grab the default if the var hasn't been passed
print('Warning! No save_logfile env passed. Default, False, will be used.')
save_logfile = False
elif save_logfile.lower() == 'true':
save_logfile = True
elif save_logfile.lower() == 'false':
save_logfile = False
else:
print('Error! Incorrect setting for save logfile parameter (%s)' %save_logfile)
logger.info('Error! Incorrect setting for save logfile parameter (%s)' % save_logfile)
# END OF PARAMETER FETCHING
# START RUNNING THE PROCESSING
logger.info('Starting to loop through files and running clip process')
# loop through each file to clip
for input_file in input_files:
print('Input file is: %s' % input_file)
# set the data type
data_type = get_data_type(input_file, vector_types=vector_accepted, raster_types=raster_accepted)
logger.info('Data type for file %s to be clipped identified as: %s' % (input_file, data_type))
# get the crs of the input file
input_crs = get_crs_of_data(join(data_path, input_dir, 'clip', input_file))
if input_crs is None:
print('Warning! No projection information could be found for the input file.')
logger.info('Warning! No projection information could be found for the input file.')
input_crs = defaults['crs_bng']
print('Warning! Using default projection (british national grid) for input file.')
logger.info('Warning! Using default projection (british national grid) for input file.')
print('Input CRS is: %s' % input_crs)
logger.info('Input CRS is: %s' % input_crs)
# run clip process
if data_type is None:
logger.info('Data type could no be identified for the found input file %s. Skipping clip process for this file.' % input_file)
print('Error. Data type is None')
elif data_type == 'vector':
print('Running vector clip')
logger.info('Using vector methods')
output_file_name_set = output_file_name(input_file, output_file, len(input_files))
if clip_file is not None:
logger.info('Using clip file method')
logger.info("Running....")
subprocess.run(["ogr2ogr", "-clipsrc", join(data_path, input_dir, clip_file), "-f", "GPKG",
join(data_path, output_dir, output_file_name_set), join(data_path, input_dir, input_file)])
logger.info("....completed processing")
elif extent is not None:
print('Running extent method')
logger.info('Using extent method')
logger.info("Running....")
subprocess.run(["ogr2ogr", "-spat", *extent, "-f", "GPKG", join(data_path, output_dir,output_file_name_set),
join(data_path, input_dir, input_file)])
logger.info("....completed processing")
elif data_type == 'raster':
print('Running raster clip')
print('Running for input:', input_file)
output_file_name_set = output_file_name(input_file, output_file, len(input_files))
logger.info('Using raster methods')
if extent is not None:
logger.info("Using extent method")
print('Using extent method')
logger.info("Running....")
print(join(data_path, input_dir, data_to_clip_dir, input_file))
print(join(data_path, output_dir, output_file_name_set))
print(extent)
print('Running subprocess')
extents = extent.split(",")
subprocess.run(["gdalwarp", "-te", extents[0], extents[1], extents[2], extents[3], join(data_path, input_dir, data_to_clip_dir, input_file), join(data_path, output_dir, output_file_name_set)])
#subprocess.run(["gdalwarp", "-te", *extent, join(data_path, input_dir, data_to_clip_dir, input_file),
# join(data_path, output_dir, output_file_name_set)])
logger.info("....completed processing")
elif clip_file is not None and len(clip_file) > 0:
print("Using clip file method")
logger.info("Using clip file method")
# get crs of clip file
clip_crs = get_crs_of_data(clip_file[0], vector=True)
# if crs could not be found, return error
if clip_crs is None:
print('Error! No projection information could be found for the clip file.')
logger.info('Error! No projection information could be found for the clip file.')
exit()
# need to check crs of clip file is same as that for the data being clipped
if clip_crs != input_crs:
print("Error! CRS of datasets do not match!!! (input: %s ; clip: %s)" %(input_crs, clip_crs))
logger.info("Error! CRS of datasets do not match!!!")
exit()
if cut_to_bounding_box is False:
# crop to the shapefile, not just the bounding box of the shapefile
print('Clipping with cutline flag')
command_output = subprocess.run(["gdalwarp", "-cutline", clip_file[0], "-crop_to_cutline", join(data_path, input_dir, data_to_clip_dir, input_file),
join(data_path, output_dir, output_file_name_set)])
else:
print('clipping with bounding box of vector data')
print(join(data_path, input_dir, data_to_clip_dir, input_file))
print(join(data_path, output_dir, output_file_name_set)
)
# this should work but does not for some reason....
#command_output = subprocess.run(["gdalwarp", "-cutline", clip_file[0], join(data_path, input_dir, data_to_clip_dir, input_file), join(data_path, output_dir, output_file_name_set)])
# so instead using this....
# read in shapefile
t = gpd.read_file(clip_file[0])
# get bounding box for shapefile
bounds = t.geometry.total_bounds
# run clip
subprocess.run(["gdalwarp", "-te", str(bounds[0]), str(bounds[1]), str(bounds[2]), str(bounds[3]), join(data_path, input_dir, data_to_clip_dir, input_file), join(data_path, output_dir, output_file_name_set)])
# check the code returned from GDAL to see if an error occurred or not
#if command_output.returncode == 1:
# print('Error! Clip did not run. Please check for common errors such as missing projection information.')
# logger.info('Error! Clip did not run for %s. Please check for common errors such as missing projection information.' % input_file)
#elif command_output.returncode == 0:
# logger.info('Clip process ran without an error being returned (%s)' % input_file)
# add check to see if file written to directory as expected
if isfile(join(data_path, output_dir, output_file_name_set)):
logger.info("Raster clip method completed. Output saved (%s)" %join(data_path, output_dir, output_file_name_set))
print('Clip completed and file written (%s)' % join(data_path, output_dir, output_file_name_set))
else:
logger.info("Failed. Expected output not found (%s)" % join(data_path, output_dir, output_file_name_set))
# check output file is written...... and if not return an error?
files = [f for f in listdir(join(data_path, output_dir)) if
isfile(join(data_path, output_dir, f))]
logger.info('Files in output dir: %s' % files)
print('Files in output dir: %s' % files)
print('Completed running clip')
logger.info('Completed running clip. Stopping tool.')
# final step - delete the log file is requested by user
if save_logfile is False:
# delete log file dir
remove(join(data_path, output_dir, log_file_name))