1919
2020user_agent = {'User-Agent' : 'FinalRecon' }
2121
22- total = []
23- r_total = []
24- sm_total = []
25- js_total = []
26- css_total = []
27- int_total = []
28- ext_total = []
29- img_total = []
30- js_crawl_total = []
31- sm_crawl_total = []
32-
3322
3423def crawler (target , protocol , netloc , output , data ):
35- global r_url , sm_url
24+ r_total = []
25+ sm_total = []
26+ css_total = []
27+ js_total = []
28+ int_total = []
29+ ext_total = []
30+ img_total = []
31+ sm_crawl_total = []
32+ js_crawl_total = []
33+ total = []
34+
3635 print (f'\n { Y } [!] Starting Crawler...{ W } \n ' )
3736
3837 try :
@@ -46,26 +45,27 @@ def crawler(target, protocol, netloc, output, data):
4645 if status == 200 :
4746 page = rqst .content
4847 soup = bs4 .BeautifulSoup (page , 'lxml' )
49-
5048 r_url = f'{ protocol } ://{ netloc } /robots.txt'
5149 sm_url = f'{ protocol } ://{ netloc } /sitemap.xml'
5250 base_url = f'{ protocol } ://{ netloc } '
53-
5451 loop = asyncio .new_event_loop ()
5552 asyncio .set_event_loop (loop )
5653 tasks = asyncio .gather (
57- robots (r_url , base_url , data , output ),
58- sitemap (sm_url , data , output ),
59- css (target , data , soup , output ),
60- js_scan (target , data , soup , output ),
61- internal_links (target , data , soup , output ),
62- external_links (target , data , soup , output ),
63- images (target , data , soup , output ),
64- sm_crawl (data , output ),
65- js_crawl (data , output ))
54+ robots (r_url , r_total , sm_total , base_url , data , output ),
55+ sitemap (sm_url , sm_total , data , output ),
56+ css (target , css_total , data , soup , output ),
57+ js_scan (target , js_total , data , soup , output ),
58+ internal_links (target , int_total , data , soup , output ),
59+ external_links (target , ext_total , data , soup , output ),
60+ images (target , img_total , data , soup , output ),
61+ sm_crawl (data , sm_crawl_total , sm_total , sm_url , output ),
62+ js_crawl (data , js_crawl_total , js_total , output ))
6663 loop .run_until_complete (tasks )
6764 loop .close ()
68- stats (output , data , soup )
65+ stats (output , r_total , sm_total , css_total , js_total ,
66+ int_total , ext_total , img_total , sm_crawl_total ,
67+ js_crawl_total , total , data , soup
68+ )
6969 log_writer ('[crawler] Completed' )
7070 else :
7171 print (f'{ R } [-] { C } Status : { W } { status } ' )
@@ -102,8 +102,7 @@ def url_filter(target, link):
102102 return link
103103
104104
105- async def robots (robo_url , base_url , data , output ):
106- global r_total
105+ async def robots (robo_url , r_total , sm_total , base_url , data , output ):
107106 print (f'{ G } [+] { C } Looking for robots.txt{ W } ' , end = '' , flush = True )
108107
109108 try :
@@ -144,8 +143,7 @@ async def robots(robo_url, base_url, data, output):
144143 log_writer (f'[crawler.robots] Exception = { exc } ' )
145144
146145
147- async def sitemap (target_url , data , output ):
148- global sm_total
146+ async def sitemap (target_url , sm_total , data , output ):
149147 print (f'{ G } [+] { C } Looking for sitemap.xml{ W } ' , end = '' , flush = True )
150148 try :
151149 sm_rqst = requests .get (target_url , headers = user_agent , verify = False , timeout = 10 )
@@ -173,8 +171,7 @@ async def sitemap(target_url, data, output):
173171 log_writer (f'[crawler.sitemap] Exception = { exc } ' )
174172
175173
176- async def css (target , data , soup , output ):
177- global css_total
174+ async def css (target , css_total , data , soup , output ):
178175 print (f'{ G } [+] { C } Extracting CSS Links{ W } ' , end = '' , flush = True )
179176 css_links = soup .find_all ('link' , href = True )
180177
@@ -188,8 +185,7 @@ async def css(target, data, soup, output):
188185 exporter (data , output , css_total , 'css' )
189186
190187
191- async def js_scan (target , data , soup , output ):
192- global js_total
188+ async def js_scan (target , js_total , data , soup , output ):
193189 print (f'{ G } [+] { C } Extracting Javascript Links{ W } ' , end = '' , flush = True )
194190 scr_tags = soup .find_all ('script' , src = True )
195191
@@ -205,8 +201,7 @@ async def js_scan(target, data, soup, output):
205201 exporter (data , output , js_total , 'javascripts' )
206202
207203
208- async def internal_links (target , data , soup , output ):
209- global int_total
204+ async def internal_links (target , int_total , data , soup , output ):
210205 print (f'{ G } [+] { C } Extracting Internal Links{ W } ' , end = '' , flush = True )
211206
212207 ext = tldextract .extract (target )
@@ -224,8 +219,7 @@ async def internal_links(target, data, soup, output):
224219 exporter (data , output , int_total , 'internal_urls' )
225220
226221
227- async def external_links (target , data , soup , output ):
228- global ext_total
222+ async def external_links (target , ext_total , data , soup , output ):
229223 print (f'{ G } [+] { C } Extracting External Links{ W } ' , end = '' , flush = True )
230224
231225 ext = tldextract .extract (target )
@@ -243,8 +237,7 @@ async def external_links(target, data, soup, output):
243237 exporter (data , output , ext_total , 'external_urls' )
244238
245239
246- async def images (target , data , soup , output ):
247- global img_total
240+ async def images (target , img_total , data , soup , output ):
248241 print (f'{ G } [+] { C } Extracting Images{ W } ' , end = '' , flush = True )
249242 image_tags = soup .find_all ('img' )
250243
@@ -258,8 +251,7 @@ async def images(target, data, soup, output):
258251 exporter (data , output , img_total , 'images' )
259252
260253
261- async def sm_crawl (data , output ):
262- global sm_crawl_total
254+ async def sm_crawl (data , sm_crawl_total , sm_total , sm_url , output ):
263255 print (f'{ G } [+] { C } Crawling Sitemaps{ W } ' , end = '' , flush = True )
264256
265257 threads = []
@@ -302,8 +294,7 @@ def fetch(site_url):
302294 exporter (data , output , sm_crawl_total , 'urls_inside_sitemap' )
303295
304296
305- async def js_crawl (data , output ):
306- global js_crawl_total
297+ async def js_crawl (data , js_crawl_total , js_total , output ):
307298 print (f'{ G } [+] { C } Crawling Javascripts{ W } ' , end = '' , flush = True )
308299
309300 threads = []
@@ -347,9 +338,7 @@ def exporter(data, output, list_name, file_name):
347338 export (output , data )
348339
349340
350- def stats (output , data , soup ):
351- global total
352-
341+ def stats (output , r_total , sm_total , css_total , js_total , int_total , ext_total , img_total , sm_crawl_total , js_crawl_total , total , data , soup ):
353342 total .extend (r_total )
354343 total .extend (sm_total )
355344 total .extend (css_total )
0 commit comments