@@ -115,22 +115,43 @@ def _atomic_download(url: str, dest: pathlib.Path):
115115
116116
117117def _download_archive (url : str , archive_path : pathlib .Path ) -> bool :
118- """Download archive from URL with progress reporting."""
118+ """Robust streaming download with retries."""
119+
119120 logger .debug ("Archive will be saved to: %s" , archive_path )
120121
122+ session = requests .Session ()
123+ retries = Retry (
124+ total = 5 ,
125+ backoff_factor = 1.0 ,
126+ status_forcelist = [429 , 500 , 502 , 503 , 504 ],
127+ allowed_methods = ["GET" ],
128+ )
129+ session .mount ("https://" , HTTPAdapter (max_retries = retries ))
130+
121131 try :
122- urllib .request .urlretrieve (url , archive_path , _make_report_progress ())
132+ with session .get (url , stream = True ) as r :
133+ r .raise_for_status ()
134+
135+ total = int (r .headers .get ("content-length" , 0 ))
136+ downloaded = 0
137+ chunk_size = 1024 * 1024 # 1MB
138+
139+ with open (archive_path , "wb" ) as f :
140+ for chunk in r .iter_content (chunk_size ):
141+ if chunk :
142+ f .write (chunk )
143+ downloaded += len (chunk )
144+ _make_report_progress ()(downloaded , downloaded , total )
145+
123146 logger .info ("Download completed!" )
147+
124148 except Exception as e :
125- logger .exception ("Error during download: %s" , e )
149+ logger .error ("Error during download: %s" , e )
126150 return False
127151
128152 if archive_path .exists () and archive_path .stat ().st_size == 0 :
129153 logger .warning ("Downloaded file is empty!" )
130154 return False
131- elif not archive_path .exists ():
132- logger .error ("File was not downloaded!" )
133- return False
134155 return True
135156
136157
0 commit comments