Skip to content
This repository was archived by the owner on May 23, 2024. It is now read-only.

Commit d53e0d2

Browse files
authored
Merge pull request #4 from johntruckenbrodt/feature/data_dir
add data directory option
2 parents 5bcc9fd + 5e9500c commit d53e0d2

File tree

1 file changed

+52
-25
lines changed

1 file changed

+52
-25
lines changed

sentinel_api.py

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
ESA Sentinel Search & Download API
2+
Sentinel Search & Download API
33
Authors: Jonas Eberle <[email protected]>, Felix Cremer <[email protected]>, John Truckenbrodt <[email protected]>
44
55
Libraries needed: Shapely, GDAL/OGR, JSON, Progressbar, Zipfile, Datetime, Requests
@@ -9,13 +9,14 @@
99
- Documentation
1010
"""
1111

12-
__version__ = '0.5'
12+
__version__ = '0.5.1'
1313

1414
###########################################################
1515
# imports
1616
###########################################################
1717

1818
import os
19+
import re
1920
import zlib
2021
import sys
2122
import requests
@@ -38,6 +39,7 @@ class SentinelDownloader(object):
3839
__geometries = []
3940
__scenes = []
4041
__download_dir = './'
42+
__data_dirs = []
4143

4244
def __init__(self, username, password, api_url='https://scihub.copernicus.eu/apihub/'):
4345
self.__esa_api_url = api_url
@@ -51,15 +53,22 @@ def set_download_dir(self, download_dir):
5153
download_dir: Path to directory
5254
5355
"""
54-
print('Set Download directory to %s' % download_dir)
56+
print('Setting download directory to %s' % download_dir)
5557
if not os.path.exists(download_dir):
5658
os.makedirs(download_dir)
5759

58-
if download_dir[-1] != '/':
59-
download_dir += '/'
60-
6160
self.__download_dir = download_dir
6261

62+
def set_data_dir(self, data_dir):
63+
"""Set directory for check against existing downloaded files; this can be repeated multiple times to create a list of data directories
64+
65+
Args:
66+
data_dir: Path to directory
67+
68+
"""
69+
print('Adding data directory {}'.format(data_dir))
70+
self.__data_dirs.append(data_dir)
71+
6372
def set_geometries(self, geometries):
6473
"""Manually set one or more geometries for data search
6574
@@ -98,7 +107,7 @@ def load_sites(self, input_file, verbose=False):
98107
99108
"""
100109
print('===========================================================')
101-
print('Load sites from file %s' % input_file)
110+
print('Loading sites from file %s' % input_file)
102111

103112
if not os.path.exists(input_file):
104113
raise Exception('Input file does not exist: %s' % input_file)
@@ -137,7 +146,7 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en
137146
dataType: Define the type of the given dates (please select from 'beginPosition', 'endPosition', and
138147
'ingestionDate') (Default: beginPosition)
139148
**keywords: Further OpenSearch arguments can be passed to the query according to the ESA Data Hub Handbook
140-
(please see https://scihub.esa.int/twiki/do/view/SciHubUserGuide/3FullTextSearch#Search_Keywords)
149+
(please see https://scihub.copernicus.eu/twiki/do/view/SciHubUserGuide/3FullTextSearch#Search_Keywords)
141150
142151
Mandatory args:
143152
platform
@@ -147,7 +156,7 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en
147156
148157
"""
149158
print('===========================================================')
150-
print('Search data for platform %s' % platform)
159+
print('Searching data for platform %s' % platform)
151160
if platform not in ['S1A*', 'S1B*', 'S2A*', 'S2B*', 'S3A*', 'S3B*']:
152161
raise Exception('platform parameter has to be S1A*, S1B*, S2A*, S2B*, S3A* or S3B*')
153162

@@ -165,12 +174,12 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en
165174
if isinstance(start_date, (datetime, date)):
166175
start_date = start_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
167176
else:
168-
start_date = datetime.strptime(start_date, '%Y-%m-%d')\
177+
start_date = datetime.strptime(start_date, '%Y-%m-%d') \
169178
.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
170179
if isinstance(end_date, (datetime, date)):
171180
end_date = end_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
172181
else:
173-
end_date = datetime.strptime(end_date + ' 23:59:59.999', '%Y-%m-%d %H:%M:%S.%f')\
182+
end_date = datetime.strptime(end_date + ' 23:59:59.999', '%Y-%m-%d %H:%M:%S.%f') \
174183
.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
175184
date_filtering = ' AND %s:[%s TO %s]' % (date_type, start_date, end_date)
176185

@@ -184,15 +193,16 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en
184193
print('Search URL: %s' % url)
185194
subscenes = self._search_request(url)
186195
if len(subscenes) > 0:
187-
print('found %s scenes on page %s' % (len(subscenes), index//100+1))
196+
print('found %s scenes on page %s' % (len(subscenes), index // 100 + 1))
188197
scenes += subscenes
189198
index += 100
199+
print('=============================')
190200
if len(subscenes) < 100:
191201
break
192202

193203
print '%s scenes after initial search' % len(scenes)
194204
if len(scenes) > 0:
195-
scenes = self._filter_existing(scenes, self.__download_dir)
205+
scenes = self._filter_existing(scenes)
196206
scenes = self._filter_overlap(scenes, geom, min_overlap)
197207
print '%s scenes after filtering before merging' % len(scenes)
198208
self.__scenes = self._merge_scenes(self.__scenes, scenes)
@@ -207,8 +217,11 @@ def get_scenes(self):
207217

208218
def print_scenes(self):
209219
"""Print title of searched and filtered scenes"""
210-
for scene in self.__scenes:
211-
print(scene['title'])
220+
221+
def sorter(x): return re.findall('[0-9T]{15}', x)[0]
222+
223+
titles = sorted([x['title'] for x in self.__scenes], key=sorter)
224+
print '\n'.join(titles)
212225

213226
def write_results(self, file_type, filename, output=False):
214227
"""Write results to disk in different kind of formats
@@ -266,7 +279,7 @@ def download_all(self, download_dir=None):
266279
continue
267280
size = int(response.headers['Content-Length'].strip())
268281
if size < 1000000:
269-
print 'The found scene: %s is to small (%s)' % (scene['title'], size)
282+
print 'The found scene is to small: %s (%s)' % (scene['title'], size)
270283
print url
271284
continue
272285

@@ -297,7 +310,7 @@ def download_all(self, download_dir=None):
297310

298311
if not valid:
299312
downloaded_failed.append(path)
300-
print('Invalid file is being deleted.')
313+
print('invalid file is being deleted.')
301314
os.remove(path)
302315
else:
303316
downloaded.append(path)
@@ -327,7 +340,7 @@ def _is_valid(self, zipfile, minsize=1000000):
327340
print('The downloaded scene is corrupt: {}'.format(os.path.basename(zipfile)))
328341
return False
329342
else:
330-
print('File seems to be valid.')
343+
print('file seems to be valid.')
331344
return True
332345

333346
def _format_url(self, startindex, wkt_geometry, platform, date_filtering, **keywords):
@@ -394,7 +407,7 @@ def _parse_json(self, obj):
394407

395408
scenes = obj['feed']['entry']
396409
if not isinstance(scenes, list):
397-
scenes = [scenes]
410+
scenes = [scenes]
398411
scenes_dict = []
399412
for scene in scenes:
400413
item = {
@@ -416,20 +429,21 @@ def _parse_json(self, obj):
416429

417430
return scenes_dict
418431

419-
def _filter_existing(self, scenes, outputpath):
420-
"""Filter scenes based on existing files
432+
def _filter_existing(self, scenes):
433+
"""Filter scenes based on existing files in the define download directory and all further data directories
421434
422435
Args:
423-
scenes: List of scenes to filter
424-
outputpath: path to directory to check against existing files
436+
scenes: List of scenes to be filtered
425437
426438
Returns:
427439
Filtered list of scenes
428440
429441
"""
430442
filtered = []
443+
dirs = self.__data_dirs + [self.__download_dir]
431444
for scene in scenes:
432-
if not os.path.exists(outputpath + '/' + scene['title'] + '.zip'):
445+
exist = [os.path.isfile(os.path.join(dir, scene['title'] + '.zip')) for dir in dirs]
446+
if not any(exist):
433447
filtered.append(scene)
434448
return filtered
435449

@@ -454,7 +468,7 @@ def _filter_overlap(self, scenes, wkt_geometry, min_overlap=0):
454468
intersect = site.intersection(footprint)
455469
overlap = intersect.area / site.area
456470
if overlap > min_overlap or (
457-
site.area / footprint.area > 1 and intersect.area / footprint.area > min_overlap):
471+
site.area / footprint.area > 1 and intersect.area / footprint.area > min_overlap):
458472
scene['_script_overlap'] = overlap * 100
459473
filtered.append(scene)
460474

@@ -529,22 +543,35 @@ def _write_download_urls(self, path):
529543
def main(username, password):
530544
"""Example use of class:
531545
Note: please set your own username and password of ESA Data Hub
546+
532547
Args:
533548
username: Your username of ESA Data Hub
534549
password: Your password of ESA Data Hub
550+
551+
api_hub options:
552+
'https://scihub.copernicus.eu/apihub/' for fast access to recently acquired imagery in the API HUB rolling archive
553+
'https://scihub.copernicus.eu/dhus/' for slower access to the full archive of all acquired imagery
554+
535555
s1 = SentinelDownloader(username, password, api_url='https://scihub.copernicus.eu/apihub/')
536556
s1.set_geometries('POLYGON ((13.501756184061247 58.390759025092443,13.617310497771715 58.371827474899703,13.620921570075168 58.27891592167088,13.508978328668151 58.233319081414017,13.382590798047325 58.263723491583974,13.382590798047325 58.263723491583974,13.501756184061247 58.390759025092443))')
537557
s1.set_download_dir('./') # default is current directory
538558
s1.search('S1A*', 0.8, productType='GRD', sensoroperationalmode='IW')
539559
s1.write_results(type='wget', file='test.sh.neu') # use wget, urls or json as type
540560
s1.download_all()
561+
541562
"""
542563

543564
s1 = SentinelDownloader(username, password, api_url='https://scihub.copernicus.eu/apihub/')
544565
# s1.load_sites('wetlands_v8.shp')
545566
s1.set_geometries(
546567
'POLYGON ((13.501756184061247 58.390759025092443,13.617310497771715 58.371827474899703,13.620921570075168 58.27891592167088,13.508978328668151 58.233319081414017,13.382590798047325 58.263723491583974,13.382590798047325 58.263723491583974,13.501756184061247 58.390759025092443))')
547568
s1.set_download_dir('./') # default is current directory
569+
570+
# set additional directories which contain downloaded scenes.
571+
# A scene is only going to be downloaded if it does not yet exist in either of the data directories or the download directory.
572+
s1.set_data_dir('/path/to/datadir1')
573+
s1.set_data_dir('/path/to/datadir2')
574+
548575
s1.search('S1A*', 0.8, productType='GRD', sensoroperationalmode='IW')
549576
s1.write_results(file_type='wget', filename='sentinel_api_download.sh') # use wget, urls or json as type
550577
s1.download_all()

0 commit comments

Comments
 (0)