1313
1414import aiofiles
1515import httpx
16- from httpx_retries import RetryTransport
1716import polars as pl
17+ from httpx_retries import RetryTransport
1818
1919
2020ARCHIVE_URL = "https://farm.cse.ucdavis.edu/~irber"
@@ -75,6 +75,8 @@ async def main(args):
7575
7676 print (to_mirror_df .collect ())
7777
78+ (args .basedir / "sigs" ).mkdir (parents = True , exist_ok = True )
79+
7880 async with httpx .AsyncClient (
7981 timeout = 30.0 ,
8082 # limits=httpx.Limits(max_connections=args.max_downloaders),
@@ -95,22 +97,25 @@ async def main(args):
9597 )
9698 )
9799 except* Exception as eg :
98- print (* [str (e )[:50 ] for e in eg .exceptions ])
99-
100- # copy manifest
101- if args .dry_run :
102- print (f"download: { manifest_url } " )
103- return
104-
105- async with client .stream ("GET" , "SOURMASH-MANIFEST.parquet" ) as response :
106- async with aiofiles .tempfile .NamedTemporaryFile () as f :
107- async for chnk in response .aiter_raw (1024 * 1024 ):
108- await f .write (chnk )
109- await f .flush ()
110-
111- await asyncio .to_thread (
112- shutil .copyfile , f .name , args .basedir / "SOURMASH-MANIFEST.parquet"
113- )
100+ print (* [str (e )[:80 ] for e in eg .exceptions ])
101+ print (len (eg .exceptions ))
102+ else :
103+ # copy manifest
104+ if args .dry_run :
105+ print (f"download: { manifest_url } " )
106+ return
107+
108+ async with client .stream ("GET" , "SOURMASH-MANIFEST.parquet" ) as response :
109+ async with aiofiles .tempfile .NamedTemporaryFile () as f :
110+ async for chnk in response .aiter_raw (1024 * 1024 ):
111+ await f .write (chnk )
112+ await f .flush ()
113+
114+ await asyncio .to_thread (
115+ shutil .copyfile ,
116+ f .name ,
117+ args .basedir / "SOURMASH-MANIFEST.parquet" ,
118+ )
114119
115120
116121async def download_sig (location , sha256 , basedir , client , limiter , dry_run ):
@@ -175,9 +180,17 @@ async def download_sig(location, sha256, basedir, client, limiter, dry_run):
175180 help = "Calculate sha256 for local files, instead of depending only on filename" ,
176181 )
177182 parser .add_argument (
178- "database" , default = "img" , choices = DATABASES , metavar = "database" , help = f"Which database to download. Available databases: { ', ' .join (DATABASES )} "
183+ "database" ,
184+ default = "img" ,
185+ choices = DATABASES ,
186+ metavar = "database" ,
187+ help = f"Which database to download. Available databases: { ', ' .join (DATABASES )} " ,
188+ )
189+ parser .add_argument (
190+ "basedir" ,
191+ type = pathlib .Path ,
192+ help = "base directory for the mirror (existing or new)" ,
179193 )
180- parser .add_argument ("basedir" , type = pathlib .Path , help = "base directory for the mirror (existing or new)" )
181194
182195 args = parser .parse_args ()
183196
0 commit comments