11"""
2- ESA Sentinel Search & Download API
2+ Sentinel Search & Download API
33Authors: Jonas Eberle <[email protected] >, Felix Cremer <[email protected] >, John Truckenbrodt <[email protected] > 44
55Libraries needed: Shapely, GDAL/OGR, JSON, Progressbar, Zipfile, Datetime, Requests
99- Documentation
1010"""
1111
12- __version__ = '0.5'
12+ __version__ = '0.5.1 '
1313
1414###########################################################
1515# imports
1616###########################################################
1717
1818import os
19+ import re
1920import zlib
2021import sys
2122import requests
@@ -38,6 +39,7 @@ class SentinelDownloader(object):
3839 __geometries = []
3940 __scenes = []
4041 __download_dir = './'
42+ __data_dirs = []
4143
4244 def __init__ (self , username , password , api_url = 'https://scihub.copernicus.eu/apihub/' ):
4345 self .__esa_api_url = api_url
@@ -51,15 +53,22 @@ def set_download_dir(self, download_dir):
5153 download_dir: Path to directory
5254
5355 """
54- print ('Set Download directory to %s' % download_dir )
56+ print ('Setting download directory to %s' % download_dir )
5557 if not os .path .exists (download_dir ):
5658 os .makedirs (download_dir )
5759
58- if download_dir [- 1 ] != '/' :
59- download_dir += '/'
60-
6160 self .__download_dir = download_dir
6261
62+ def set_data_dir (self , data_dir ):
63+ """Set directory for check against existing downloaded files; this can be repeated multiple times to create a list of data directories
64+
65+ Args:
66+ data_dir: Path to directory
67+
68+ """
69+ print ('Adding data directory {}' .format (data_dir ))
70+ self .__data_dirs .append (data_dir )
71+
6372 def set_geometries (self , geometries ):
6473 """Manually set one or more geometries for data search
6574
@@ -98,7 +107,7 @@ def load_sites(self, input_file, verbose=False):
98107
99108 """
100109 print ('===========================================================' )
101- print ('Load sites from file %s' % input_file )
110+ print ('Loading sites from file %s' % input_file )
102111
103112 if not os .path .exists (input_file ):
104113 raise Exception ('Input file does not exist: %s' % input_file )
@@ -137,7 +146,7 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en
137146 dataType: Define the type of the given dates (please select from 'beginPosition', 'endPosition', and
138147 'ingestionDate') (Default: beginPosition)
139148 **keywords: Further OpenSearch arguments can be passed to the query according to the ESA Data Hub Handbook
140- (please see https://scihub.esa.int /twiki/do/view/SciHubUserGuide/3FullTextSearch#Search_Keywords)
149+ (please see https://scihub.copernicus.eu /twiki/do/view/SciHubUserGuide/3FullTextSearch#Search_Keywords)
141150
142151 Mandatory args:
143152 platform
@@ -147,7 +156,7 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en
147156
148157 """
149158 print ('===========================================================' )
150- print ('Search data for platform %s' % platform )
159+ print ('Searching data for platform %s' % platform )
151160 if platform not in ['S1A*' , 'S1B*' , 'S2A*' , 'S2B*' , 'S3A*' , 'S3B*' ]:
152161 raise Exception ('platform parameter has to be S1A*, S1B*, S2A*, S2B*, S3A* or S3B*' )
153162
@@ -165,12 +174,12 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en
165174 if isinstance (start_date , (datetime , date )):
166175 start_date = start_date .strftime ('%Y-%m-%dT%H:%M:%S.%fZ' )
167176 else :
168- start_date = datetime .strptime (start_date , '%Y-%m-%d' )\
177+ start_date = datetime .strptime (start_date , '%Y-%m-%d' ) \
169178 .strftime ('%Y-%m-%dT%H:%M:%S.%fZ' )
170179 if isinstance (end_date , (datetime , date )):
171180 end_date = end_date .strftime ('%Y-%m-%dT%H:%M:%S.%fZ' )
172181 else :
173- end_date = datetime .strptime (end_date + ' 23:59:59.999' , '%Y-%m-%d %H:%M:%S.%f' )\
182+ end_date = datetime .strptime (end_date + ' 23:59:59.999' , '%Y-%m-%d %H:%M:%S.%f' ) \
174183 .strftime ('%Y-%m-%dT%H:%M:%S.%fZ' )
175184 date_filtering = ' AND %s:[%s TO %s]' % (date_type , start_date , end_date )
176185
@@ -184,15 +193,16 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en
184193 print ('Search URL: %s' % url )
185194 subscenes = self ._search_request (url )
186195 if len (subscenes ) > 0 :
187- print ('found %s scenes on page %s' % (len (subscenes ), index // 100 + 1 ))
196+ print ('found %s scenes on page %s' % (len (subscenes ), index // 100 + 1 ))
188197 scenes += subscenes
189198 index += 100
199+ print ('=============================' )
190200 if len (subscenes ) < 100 :
191201 break
192202
193203 print '%s scenes after initial search' % len (scenes )
194204 if len (scenes ) > 0 :
195- scenes = self ._filter_existing (scenes , self . __download_dir )
205+ scenes = self ._filter_existing (scenes )
196206 scenes = self ._filter_overlap (scenes , geom , min_overlap )
197207 print '%s scenes after filtering before merging' % len (scenes )
198208 self .__scenes = self ._merge_scenes (self .__scenes , scenes )
@@ -207,8 +217,11 @@ def get_scenes(self):
207217
208218 def print_scenes (self ):
209219 """Print title of searched and filtered scenes"""
210- for scene in self .__scenes :
211- print (scene ['title' ])
220+
221+ def sorter (x ): return re .findall ('[0-9T]{15}' , x )[0 ]
222+
223+ titles = sorted ([x ['title' ] for x in self .__scenes ], key = sorter )
224+ print '\n ' .join (titles )
212225
213226 def write_results (self , file_type , filename , output = False ):
214227 """Write results to disk in different kind of formats
@@ -266,7 +279,7 @@ def download_all(self, download_dir=None):
266279 continue
267280 size = int (response .headers ['Content-Length' ].strip ())
268281 if size < 1000000 :
269- print 'The found scene: %s is to small (%s)' % (scene ['title' ], size )
282+ print 'The found scene is to small: %s (%s)' % (scene ['title' ], size )
270283 print url
271284 continue
272285
@@ -297,7 +310,7 @@ def download_all(self, download_dir=None):
297310
298311 if not valid :
299312 downloaded_failed .append (path )
300- print ('Invalid file is being deleted.' )
313+ print ('invalid file is being deleted.' )
301314 os .remove (path )
302315 else :
303316 downloaded .append (path )
@@ -327,7 +340,7 @@ def _is_valid(self, zipfile, minsize=1000000):
327340 print ('The downloaded scene is corrupt: {}' .format (os .path .basename (zipfile )))
328341 return False
329342 else :
330- print ('File seems to be valid.' )
343+ print ('file seems to be valid.' )
331344 return True
332345
333346 def _format_url (self , startindex , wkt_geometry , platform , date_filtering , ** keywords ):
@@ -394,7 +407,7 @@ def _parse_json(self, obj):
394407
395408 scenes = obj ['feed' ]['entry' ]
396409 if not isinstance (scenes , list ):
397- scenes = [scenes ]
410+ scenes = [scenes ]
398411 scenes_dict = []
399412 for scene in scenes :
400413 item = {
@@ -416,20 +429,21 @@ def _parse_json(self, obj):
416429
417430 return scenes_dict
418431
419- def _filter_existing (self , scenes , outputpath ):
420- """Filter scenes based on existing files
432+ def _filter_existing (self , scenes ):
433+ """Filter scenes based on existing files in the define download directory and all further data directories
421434
422435 Args:
423- scenes: List of scenes to filter
424- outputpath: path to directory to check against existing files
436+ scenes: List of scenes to be filtered
425437
426438 Returns:
427439 Filtered list of scenes
428440
429441 """
430442 filtered = []
443+ dirs = self .__data_dirs + [self .__download_dir ]
431444 for scene in scenes :
432- if not os .path .exists (outputpath + '/' + scene ['title' ] + '.zip' ):
445+ exist = [os .path .isfile (os .path .join (dir , scene ['title' ] + '.zip' )) for dir in dirs ]
446+ if not any (exist ):
433447 filtered .append (scene )
434448 return filtered
435449
@@ -454,7 +468,7 @@ def _filter_overlap(self, scenes, wkt_geometry, min_overlap=0):
454468 intersect = site .intersection (footprint )
455469 overlap = intersect .area / site .area
456470 if overlap > min_overlap or (
457- site .area / footprint .area > 1 and intersect .area / footprint .area > min_overlap ):
471+ site .area / footprint .area > 1 and intersect .area / footprint .area > min_overlap ):
458472 scene ['_script_overlap' ] = overlap * 100
459473 filtered .append (scene )
460474
@@ -529,22 +543,35 @@ def _write_download_urls(self, path):
529543def main (username , password ):
530544 """Example use of class:
531545 Note: please set your own username and password of ESA Data Hub
546+
532547 Args:
533548 username: Your username of ESA Data Hub
534549 password: Your password of ESA Data Hub
550+
551+ api_hub options:
552+ 'https://scihub.copernicus.eu/apihub/' for fast access to recently acquired imagery in the API HUB rolling archive
553+ 'https://scihub.copernicus.eu/dhus/' for slower access to the full archive of all acquired imagery
554+
535555 s1 = SentinelDownloader(username, password, api_url='https://scihub.copernicus.eu/apihub/')
536556 s1.set_geometries('POLYGON ((13.501756184061247 58.390759025092443,13.617310497771715 58.371827474899703,13.620921570075168 58.27891592167088,13.508978328668151 58.233319081414017,13.382590798047325 58.263723491583974,13.382590798047325 58.263723491583974,13.501756184061247 58.390759025092443))')
537557 s1.set_download_dir('./') # default is current directory
538558 s1.search('S1A*', 0.8, productType='GRD', sensoroperationalmode='IW')
539559 s1.write_results(type='wget', file='test.sh.neu') # use wget, urls or json as type
540560 s1.download_all()
561+
541562 """
542563
543564 s1 = SentinelDownloader (username , password , api_url = 'https://scihub.copernicus.eu/apihub/' )
544565 # s1.load_sites('wetlands_v8.shp')
545566 s1 .set_geometries (
546567 'POLYGON ((13.501756184061247 58.390759025092443,13.617310497771715 58.371827474899703,13.620921570075168 58.27891592167088,13.508978328668151 58.233319081414017,13.382590798047325 58.263723491583974,13.382590798047325 58.263723491583974,13.501756184061247 58.390759025092443))' )
547568 s1 .set_download_dir ('./' ) # default is current directory
569+
570+ # set additional directories which contain downloaded scenes.
571+ # A scene is only going to be downloaded if it does not yet exist in either of the data directories or the download directory.
572+ s1 .set_data_dir ('/path/to/datadir1' )
573+ s1 .set_data_dir ('/path/to/datadir2' )
574+
548575 s1 .search ('S1A*' , 0.8 , productType = 'GRD' , sensoroperationalmode = 'IW' )
549576 s1 .write_results (file_type = 'wget' , filename = 'sentinel_api_download.sh' ) # use wget, urls or json as type
550577 s1 .download_all ()
0 commit comments