Spaces:
Configuration error
Configuration error
Create scanner.py
Browse files- scanner.py +128 -0
scanner.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# scanner.py
|
2 |
+
|
3 |
+
import datetime
|
4 |
+
import json
|
5 |
+
import socket
|
6 |
+
import ssl
|
7 |
+
from urllib.parse import urlparse
|
8 |
+
|
9 |
+
import certifi
|
10 |
+
import dns.resolver
|
11 |
+
import requests
|
12 |
+
import whois
|
13 |
+
from bs4 import BeautifulSoup
|
14 |
+
|
15 |
+
COMMON_SUBDOMAINS = ['www', 'mail', 'ftp', 'cpanel', 'webmail', 'admin', 'test', 'blog', 'dev', 'portal', 'shop', 'api']
|
16 |
+
TECH_KEYWORDS = ['wordpress', 'joomla', 'drupal', 'laravel', 'django', 'angular', 'react', 'vue',
|
17 |
+
'jquery', 'html', 'php', 'css', 'sqlite', 'javascript', 'mysql', 'oracle', 'python', 'c+', 'c#']
|
18 |
+
|
19 |
+
def get_page_title(url):
|
20 |
+
try:
|
21 |
+
response = requests.get(url, timeout=5)
|
22 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
23 |
+
return soup.title.string if soup.title else None
|
24 |
+
except:
|
25 |
+
return None
|
26 |
+
|
27 |
+
def get_last_modified(url):
|
28 |
+
try:
|
29 |
+
response = requests.head(url, timeout=5)
|
30 |
+
return response.headers.get('Last-Modified')
|
31 |
+
except:
|
32 |
+
return None
|
33 |
+
|
34 |
+
def get_creation_date(domain):
|
35 |
+
try:
|
36 |
+
whois_info = whois.whois(domain)
|
37 |
+
creation_date = whois_info.creation_date
|
38 |
+
return creation_date[0] if isinstance(creation_date, list) else creation_date
|
39 |
+
except:
|
40 |
+
return None
|
41 |
+
|
42 |
+
def get_dns_info(domain):
|
43 |
+
try:
|
44 |
+
answers = dns.resolver.resolve(domain, 'A')
|
45 |
+
return [rdata.address for rdata in answers]
|
46 |
+
except:
|
47 |
+
return []
|
48 |
+
|
49 |
+
def get_subdomains(domain):
|
50 |
+
found = []
|
51 |
+
for sub in COMMON_SUBDOMAINS:
|
52 |
+
subdomain = f"{sub}.{domain}"
|
53 |
+
try:
|
54 |
+
dns.resolver.resolve(subdomain, 'A')
|
55 |
+
found.append(subdomain)
|
56 |
+
except:
|
57 |
+
continue
|
58 |
+
return found
|
59 |
+
|
60 |
+
def get_firewall_info(headers):
|
61 |
+
waf_headers = ['x-firewall', 'x-sucuri-id', 'server', 'x-cdn', 'cf-ray']
|
62 |
+
found = []
|
63 |
+
for key in headers:
|
64 |
+
for waf in waf_headers:
|
65 |
+
if waf in key.lower():
|
66 |
+
found.append(f"{key}: {headers[key]}")
|
67 |
+
return found
|
68 |
+
|
69 |
+
def get_technologies(url):
|
70 |
+
try:
|
71 |
+
response = requests.get(url, timeout=5)
|
72 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
73 |
+
text_blob = response.text.lower()
|
74 |
+
|
75 |
+
found_tech = set()
|
76 |
+
for tech in TECH_KEYWORDS:
|
77 |
+
if tech in text_blob:
|
78 |
+
found_tech.add(tech)
|
79 |
+
|
80 |
+
for tag in soup.find_all(['script', 'link']):
|
81 |
+
src = tag.get('src') or tag.get('href')
|
82 |
+
if src:
|
83 |
+
for tech in TECH_KEYWORDS:
|
84 |
+
if tech in src.lower():
|
85 |
+
found_tech.add(tech)
|
86 |
+
|
87 |
+
return sorted(found_tech)
|
88 |
+
except:
|
89 |
+
return []
|
90 |
+
|
91 |
+
def get_certificate_info(url):
|
92 |
+
try:
|
93 |
+
hostname = urlparse(url).netloc
|
94 |
+
context = ssl.create_default_context(cafile=certifi.where())
|
95 |
+
with context.wrap_socket(socket.socket(), server_hostname=hostname) as s:
|
96 |
+
s.settimeout(5)
|
97 |
+
s.connect((hostname, 443))
|
98 |
+
cert = s.getpeercert()
|
99 |
+
issuer = dict(x[0] for x in cert['issuer'])['organizationName']
|
100 |
+
start_date = datetime.datetime.strptime(cert['notBefore'], "%b %d %H:%M:%S %Y %Z")
|
101 |
+
end_date = datetime.datetime.strptime(cert['notAfter'], "%b %d %H:%M:%S %Y %Z")
|
102 |
+
return {
|
103 |
+
'Issuer': issuer,
|
104 |
+
'Start Date': str(start_date),
|
105 |
+
'Expiration Date': str(end_date),
|
106 |
+
'Validity Days': (end_date - start_date).days
|
107 |
+
}
|
108 |
+
except:
|
109 |
+
return None
|
110 |
+
|
111 |
+
def get_site_info(url):
|
112 |
+
try:
|
113 |
+
parsed = urlparse(url)
|
114 |
+
domain = parsed.netloc or parsed.path
|
115 |
+
headers = requests.get(url, timeout=5).headers
|
116 |
+
except:
|
117 |
+
return {"error": "Unable to reach the URL."}
|
118 |
+
|
119 |
+
return {
|
120 |
+
'Title': get_page_title(url),
|
121 |
+
'Last Updated': get_last_modified(url),
|
122 |
+
'Domain Creation Date': str(get_creation_date(domain)),
|
123 |
+
'DNS A Records': get_dns_info(domain),
|
124 |
+
'Subdomains Found': get_subdomains(domain),
|
125 |
+
'Firewall Headers': get_firewall_info(headers),
|
126 |
+
'Detected Technologies': get_technologies(url),
|
127 |
+
'SSL Certificate': get_certificate_info(url)
|
128 |
+
}
|