Skip to content

Commit 88c85d2

Browse files
committed
refactoring
1 parent 31dc268 commit 88c85d2

File tree

1 file changed

+34
-45
lines changed

1 file changed

+34
-45
lines changed

modules/crawler.py

Lines changed: 34 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,19 @@
1919

2020
user_agent = {'User-Agent': 'FinalRecon'}
2121

22-
total = []
23-
r_total = []
24-
sm_total = []
25-
js_total = []
26-
css_total = []
27-
int_total = []
28-
ext_total = []
29-
img_total = []
30-
js_crawl_total = []
31-
sm_crawl_total = []
32-
3322

3423
def crawler(target, protocol, netloc, output, data):
35-
global r_url, sm_url
24+
r_total = []
25+
sm_total = []
26+
css_total = []
27+
js_total = []
28+
int_total = []
29+
ext_total = []
30+
img_total = []
31+
sm_crawl_total = []
32+
js_crawl_total = []
33+
total = []
34+
3635
print(f'\n{Y}[!] Starting Crawler...{W}\n')
3736

3837
try:
@@ -46,26 +45,27 @@ def crawler(target, protocol, netloc, output, data):
4645
if status == 200:
4746
page = rqst.content
4847
soup = bs4.BeautifulSoup(page, 'lxml')
49-
5048
r_url = f'{protocol}://{netloc}/robots.txt'
5149
sm_url = f'{protocol}://{netloc}/sitemap.xml'
5250
base_url = f'{protocol}://{netloc}'
53-
5451
loop = asyncio.new_event_loop()
5552
asyncio.set_event_loop(loop)
5653
tasks = asyncio.gather(
57-
robots(r_url, base_url, data, output),
58-
sitemap(sm_url, data, output),
59-
css(target, data, soup, output),
60-
js_scan(target, data, soup, output),
61-
internal_links(target, data, soup, output),
62-
external_links(target, data, soup, output),
63-
images(target, data, soup, output),
64-
sm_crawl(data, output),
65-
js_crawl(data, output))
54+
robots(r_url, r_total, sm_total, base_url, data, output),
55+
sitemap(sm_url, sm_total, data, output),
56+
css(target, css_total, data, soup, output),
57+
js_scan(target, js_total, data, soup, output),
58+
internal_links(target, int_total, data, soup, output),
59+
external_links(target, ext_total, data, soup, output),
60+
images(target, img_total, data, soup, output),
61+
sm_crawl(data, sm_crawl_total, sm_total, sm_url, output),
62+
js_crawl(data, js_crawl_total, js_total, output))
6663
loop.run_until_complete(tasks)
6764
loop.close()
68-
stats(output, data, soup)
65+
stats(output, r_total, sm_total, css_total, js_total,
66+
int_total, ext_total, img_total, sm_crawl_total,
67+
js_crawl_total, total, data, soup
68+
)
6969
log_writer('[crawler] Completed')
7070
else:
7171
print(f'{R}[-] {C}Status : {W}{status}')
@@ -102,8 +102,7 @@ def url_filter(target, link):
102102
return link
103103

104104

105-
async def robots(robo_url, base_url, data, output):
106-
global r_total
105+
async def robots(robo_url, r_total, sm_total, base_url, data, output):
107106
print(f'{G}[+] {C}Looking for robots.txt{W}', end='', flush=True)
108107

109108
try:
@@ -144,8 +143,7 @@ async def robots(robo_url, base_url, data, output):
144143
log_writer(f'[crawler.robots] Exception = {exc}')
145144

146145

147-
async def sitemap(target_url, data, output):
148-
global sm_total
146+
async def sitemap(target_url, sm_total, data, output):
149147
print(f'{G}[+] {C}Looking for sitemap.xml{W}', end='', flush=True)
150148
try:
151149
sm_rqst = requests.get(target_url, headers=user_agent, verify=False, timeout=10)
@@ -173,8 +171,7 @@ async def sitemap(target_url, data, output):
173171
log_writer(f'[crawler.sitemap] Exception = {exc}')
174172

175173

176-
async def css(target, data, soup, output):
177-
global css_total
174+
async def css(target, css_total, data, soup, output):
178175
print(f'{G}[+] {C}Extracting CSS Links{W}', end='', flush=True)
179176
css_links = soup.find_all('link', href=True)
180177

@@ -188,8 +185,7 @@ async def css(target, data, soup, output):
188185
exporter(data, output, css_total, 'css')
189186

190187

191-
async def js_scan(target, data, soup, output):
192-
global js_total
188+
async def js_scan(target, js_total, data, soup, output):
193189
print(f'{G}[+] {C}Extracting Javascript Links{W}', end='', flush=True)
194190
scr_tags = soup.find_all('script', src=True)
195191

@@ -205,8 +201,7 @@ async def js_scan(target, data, soup, output):
205201
exporter(data, output, js_total, 'javascripts')
206202

207203

208-
async def internal_links(target, data, soup, output):
209-
global int_total
204+
async def internal_links(target, int_total, data, soup, output):
210205
print(f'{G}[+] {C}Extracting Internal Links{W}', end='', flush=True)
211206

212207
ext = tldextract.extract(target)
@@ -224,8 +219,7 @@ async def internal_links(target, data, soup, output):
224219
exporter(data, output, int_total, 'internal_urls')
225220

226221

227-
async def external_links(target, data, soup, output):
228-
global ext_total
222+
async def external_links(target, ext_total, data, soup, output):
229223
print(f'{G}[+] {C}Extracting External Links{W}', end='', flush=True)
230224

231225
ext = tldextract.extract(target)
@@ -243,8 +237,7 @@ async def external_links(target, data, soup, output):
243237
exporter(data, output, ext_total, 'external_urls')
244238

245239

246-
async def images(target, data, soup, output):
247-
global img_total
240+
async def images(target, img_total, data, soup, output):
248241
print(f'{G}[+] {C}Extracting Images{W}', end='', flush=True)
249242
image_tags = soup.find_all('img')
250243

@@ -258,8 +251,7 @@ async def images(target, data, soup, output):
258251
exporter(data, output, img_total, 'images')
259252

260253

261-
async def sm_crawl(data, output):
262-
global sm_crawl_total
254+
async def sm_crawl(data, sm_crawl_total, sm_total, sm_url, output):
263255
print(f'{G}[+] {C}Crawling Sitemaps{W}', end='', flush=True)
264256

265257
threads = []
@@ -302,8 +294,7 @@ def fetch(site_url):
302294
exporter(data, output, sm_crawl_total, 'urls_inside_sitemap')
303295

304296

305-
async def js_crawl(data, output):
306-
global js_crawl_total
297+
async def js_crawl(data, js_crawl_total, js_total, output):
307298
print(f'{G}[+] {C}Crawling Javascripts{W}', end='', flush=True)
308299

309300
threads = []
@@ -347,9 +338,7 @@ def exporter(data, output, list_name, file_name):
347338
export(output, data)
348339

349340

350-
def stats(output, data, soup):
351-
global total
352-
341+
def stats(output, r_total, sm_total, css_total, js_total, int_total, ext_total, img_total, sm_crawl_total, js_crawl_total, total, data, soup):
353342
total.extend(r_total)
354343
total.extend(sm_total)
355344
total.extend(css_total)

0 commit comments

Comments
 (0)