# scanner.py import datetime import json import socket import ssl from urllib.parse import urlparse import certifi import dns.resolver import requests import whois from bs4 import BeautifulSoup COMMON_SUBDOMAINS = ['www', 'mail', 'ftp', 'cpanel', 'webmail', 'admin', 'test', 'blog', 'dev', 'portal', 'shop', 'api'] TECH_KEYWORDS = ['wordpress', 'joomla', 'drupal', 'laravel', 'django', 'angular', 'react', 'vue', 'jquery', 'html', 'php', 'css', 'sqlite', 'javascript', 'mysql', 'oracle', 'python', 'c+', 'c#'] def get_page_title(url): try: response = requests.get(url, timeout=5) soup = BeautifulSoup(response.content, 'html.parser') return soup.title.string if soup.title else None except: return None def get_last_modified(url): try: response = requests.head(url, timeout=5) return response.headers.get('Last-Modified') except: return None def get_creation_date(domain): try: whois_info = whois.whois(domain) creation_date = whois_info.creation_date return creation_date[0] if isinstance(creation_date, list) else creation_date except: return None def get_dns_info(domain): try: answers = dns.resolver.resolve(domain, 'A') return [rdata.address for rdata in answers] except: return [] def get_subdomains(domain): found = [] for sub in COMMON_SUBDOMAINS: subdomain = f"{sub}.{domain}" try: dns.resolver.resolve(subdomain, 'A') found.append(subdomain) except: continue return found def get_firewall_info(headers): waf_headers = ['x-firewall', 'x-sucuri-id', 'server', 'x-cdn', 'cf-ray'] found = [] for key in headers: for waf in waf_headers: if waf in key.lower(): found.append(f"{key}: {headers[key]}") return found def get_technologies(url): try: response = requests.get(url, timeout=5) soup = BeautifulSoup(response.content, 'html.parser') text_blob = response.text.lower() found_tech = set() for tech in TECH_KEYWORDS: if tech in text_blob: found_tech.add(tech) for tag in soup.find_all(['script', 'link']): src = tag.get('src') or tag.get('href') if src: for tech in TECH_KEYWORDS: if tech in src.lower(): found_tech.add(tech) return sorted(found_tech) except: return [] def get_certificate_info(url): try: hostname = urlparse(url).netloc context = ssl.create_default_context(cafile=certifi.where()) with context.wrap_socket(socket.socket(), server_hostname=hostname) as s: s.settimeout(5) s.connect((hostname, 443)) cert = s.getpeercert() issuer = dict(x[0] for x in cert['issuer'])['organizationName'] start_date = datetime.datetime.strptime(cert['notBefore'], "%b %d %H:%M:%S %Y %Z") end_date = datetime.datetime.strptime(cert['notAfter'], "%b %d %H:%M:%S %Y %Z") return { 'Issuer': issuer, 'Start Date': str(start_date), 'Expiration Date': str(end_date), 'Validity Days': (end_date - start_date).days } except: return None def get_site_info(url): try: parsed = urlparse(url) domain = parsed.netloc or parsed.path headers = requests.get(url, timeout=5).headers except: return {"error": "Unable to reach the URL."} return { 'Title': get_page_title(url), 'Last Updated': get_last_modified(url), 'Domain Creation Date': str(get_creation_date(domain)), 'DNS A Records': get_dns_info(domain), 'Subdomains Found': get_subdomains(domain), 'Firewall Headers': get_firewall_info(headers), 'Detected Technologies': get_technologies(url), 'SSL Certificate': get_certificate_info(url) }