Spaces:
Configuration error
Configuration error
# scanner.py | |
import datetime | |
import json | |
import socket | |
import ssl | |
from urllib.parse import urlparse | |
import certifi | |
import dns.resolver | |
import requests | |
import whois | |
from bs4 import BeautifulSoup | |
COMMON_SUBDOMAINS = ['www', 'mail', 'ftp', 'cpanel', 'webmail', 'admin', 'test', 'blog', 'dev', 'portal', 'shop', 'api'] | |
TECH_KEYWORDS = ['wordpress', 'joomla', 'drupal', 'laravel', 'django', 'angular', 'react', 'vue', | |
'jquery', 'html', 'php', 'css', 'sqlite', 'javascript', 'mysql', 'oracle', 'python', 'c+', 'c#'] | |
def get_page_title(url): | |
try: | |
response = requests.get(url, timeout=5) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
return soup.title.string if soup.title else None | |
except: | |
return None | |
def get_last_modified(url): | |
try: | |
response = requests.head(url, timeout=5) | |
return response.headers.get('Last-Modified') | |
except: | |
return None | |
def get_creation_date(domain): | |
try: | |
whois_info = whois.whois(domain) | |
creation_date = whois_info.creation_date | |
return creation_date[0] if isinstance(creation_date, list) else creation_date | |
except: | |
return None | |
def get_dns_info(domain): | |
try: | |
answers = dns.resolver.resolve(domain, 'A') | |
return [rdata.address for rdata in answers] | |
except: | |
return [] | |
def get_subdomains(domain): | |
found = [] | |
for sub in COMMON_SUBDOMAINS: | |
subdomain = f"{sub}.{domain}" | |
try: | |
dns.resolver.resolve(subdomain, 'A') | |
found.append(subdomain) | |
except: | |
continue | |
return found | |
def get_firewall_info(headers): | |
waf_headers = ['x-firewall', 'x-sucuri-id', 'server', 'x-cdn', 'cf-ray'] | |
found = [] | |
for key in headers: | |
for waf in waf_headers: | |
if waf in key.lower(): | |
found.append(f"{key}: {headers[key]}") | |
return found | |
def get_technologies(url): | |
try: | |
response = requests.get(url, timeout=5) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
text_blob = response.text.lower() | |
found_tech = set() | |
for tech in TECH_KEYWORDS: | |
if tech in text_blob: | |
found_tech.add(tech) | |
for tag in soup.find_all(['script', 'link']): | |
src = tag.get('src') or tag.get('href') | |
if src: | |
for tech in TECH_KEYWORDS: | |
if tech in src.lower(): | |
found_tech.add(tech) | |
return sorted(found_tech) | |
except: | |
return [] | |
def get_certificate_info(url): | |
try: | |
hostname = urlparse(url).netloc | |
context = ssl.create_default_context(cafile=certifi.where()) | |
with context.wrap_socket(socket.socket(), server_hostname=hostname) as s: | |
s.settimeout(5) | |
s.connect((hostname, 443)) | |
cert = s.getpeercert() | |
issuer = dict(x[0] for x in cert['issuer'])['organizationName'] | |
start_date = datetime.datetime.strptime(cert['notBefore'], "%b %d %H:%M:%S %Y %Z") | |
end_date = datetime.datetime.strptime(cert['notAfter'], "%b %d %H:%M:%S %Y %Z") | |
return { | |
'Issuer': issuer, | |
'Start Date': str(start_date), | |
'Expiration Date': str(end_date), | |
'Validity Days': (end_date - start_date).days | |
} | |
except: | |
return None | |
def get_site_info(url): | |
try: | |
parsed = urlparse(url) | |
domain = parsed.netloc or parsed.path | |
headers = requests.get(url, timeout=5).headers | |
except: | |
return {"error": "Unable to reach the URL."} | |
return { | |
'Title': get_page_title(url), | |
'Last Updated': get_last_modified(url), | |
'Domain Creation Date': str(get_creation_date(domain)), | |
'DNS A Records': get_dns_info(domain), | |
'Subdomains Found': get_subdomains(domain), | |
'Firewall Headers': get_firewall_info(headers), | |
'Detected Technologies': get_technologies(url), | |
'SSL Certificate': get_certificate_info(url) | |
} |