77import time
88from collections import defaultdict
99from functools import wraps
10+ from pprint import pprint
1011from typing import Any
1112from typing import Awaitable
1213from typing import Callable
@@ -65,7 +66,7 @@ def inner(_self: Parser, *args: Any, **kwargs: Any) -> None:
6566class TaskQueue (asyncio .Queue [Task ]):
6667 done : asyncio .Event
6768
68- def __init__ (self , num_worker : int = 16 ):
69+ def __init__ (self , num_worker : int = 64 ):
6970 super ().__init__ ()
7071 self .num_worker = num_worker
7172 self .done = asyncio .Event ()
@@ -163,6 +164,7 @@ class Parser:
163164 task_queue = TaskQueue ()
164165 graph : Graph
165166 parsed_blocks : set [str ]
167+ parsed_blocks_lock = asyncio .Lock ()
166168
167169 root_id : str
168170 max_workers : int
@@ -187,11 +189,6 @@ def parse(self) -> Graph:
187189
188190 @task
189191 async def parse_page (self , page_id : str ) -> None :
190- if page_id in self .parsed_blocks :
191- return
192- else :
193- self .parsed_blocks .add (page_id )
194-
195192 def _parse_title (page_dict : dict [str , Any ]) -> str :
196193 try :
197194 icon = cast (str , page_dict ['icon' ]['emoji' ])
@@ -239,9 +236,17 @@ async def parse_database(self, database_id: str) -> None:
239236 async def parse_children (self , block_id : str , page_id : str ) -> None :
240237 children = (await get_children (block_id = block_id ))['results' ]
241238 for block_dict in children :
242- self ._parse_block (block_dict = block_dict , block_id = block_id , page_id = page_id )
239+ await self ._parse_block (
240+ block_dict = block_dict , block_id = block_dict ['id' ], page_id = page_id
241+ )
242+
243+ async def _parse_block (self , block_dict : dict , block_id : str , page_id : str ) -> None :
244+ async with self .parsed_blocks_lock :
245+ if block_id in self .parsed_blocks :
246+ return
247+ else :
248+ self .parsed_blocks .add (block_id )
243249
244- def _parse_block (self , block_dict : dict , block_id : str , page_id : str ) -> None :
245250 def _get_relations (
246251 block_dict : dict , block_id : str , page_id : str
247252 ) -> list [Relation ]:
@@ -286,11 +291,12 @@ def _get_relations(
286291 return
287292
288293 elif block_dict ['type' ] == 'child_database' :
294+ # logger.warning('child_database not implemented')
289295 return
290296
291297 else :
292298 if block_dict ['has_children' ]:
293- self .parsed_blocks . add (block_id )
299+ self .parse_children (block_id = block_id , page_id = page_id )
294300
295301 relations = _get_relations (block_dict , block_id , page_id )
296302 for relation in relations :
0 commit comments