File size: 4,079 Bytes
5d38dcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# scanner.py

import datetime
import json
import socket
import ssl
from urllib.parse import urlparse

import certifi
import dns.resolver
import requests
import whois
from bs4 import BeautifulSoup

COMMON_SUBDOMAINS = ['www', 'mail', 'ftp', 'cpanel', 'webmail', 'admin', 'test', 'blog', 'dev', 'portal', 'shop', 'api']
TECH_KEYWORDS = ['wordpress', 'joomla', 'drupal', 'laravel', 'django', 'angular', 'react', 'vue',
                 'jquery', 'html', 'php', 'css', 'sqlite', 'javascript', 'mysql', 'oracle', 'python', 'c+', 'c#']

def get_page_title(url):
    try:
        response = requests.get(url, timeout=5)
        soup = BeautifulSoup(response.content, 'html.parser')
        return soup.title.string if soup.title else None
    except:
        return None

def get_last_modified(url):
    try:
        response = requests.head(url, timeout=5)
        return response.headers.get('Last-Modified')
    except:
        return None

def get_creation_date(domain):
    try:
        whois_info = whois.whois(domain)
        creation_date = whois_info.creation_date
        return creation_date[0] if isinstance(creation_date, list) else creation_date
    except:
        return None

def get_dns_info(domain):
    try:
        answers = dns.resolver.resolve(domain, 'A')
        return [rdata.address for rdata in answers]
    except:
        return []

def get_subdomains(domain):
    found = []
    for sub in COMMON_SUBDOMAINS:
        subdomain = f"{sub}.{domain}"
        try:
            dns.resolver.resolve(subdomain, 'A')
            found.append(subdomain)
        except:
            continue
    return found

def get_firewall_info(headers):
    waf_headers = ['x-firewall', 'x-sucuri-id', 'server', 'x-cdn', 'cf-ray']
    found = []
    for key in headers:
        for waf in waf_headers:
            if waf in key.lower():
                found.append(f"{key}: {headers[key]}")
    return found

def get_technologies(url):
    try:
        response = requests.get(url, timeout=5)
        soup = BeautifulSoup(response.content, 'html.parser')
        text_blob = response.text.lower()

        found_tech = set()
        for tech in TECH_KEYWORDS:
            if tech in text_blob:
                found_tech.add(tech)

        for tag in soup.find_all(['script', 'link']):
            src = tag.get('src') or tag.get('href')
            if src:
                for tech in TECH_KEYWORDS:
                    if tech in src.lower():
                        found_tech.add(tech)

        return sorted(found_tech)
    except:
        return []

def get_certificate_info(url):
    try:
        hostname = urlparse(url).netloc
        context = ssl.create_default_context(cafile=certifi.where())
        with context.wrap_socket(socket.socket(), server_hostname=hostname) as s:
            s.settimeout(5)
            s.connect((hostname, 443))
            cert = s.getpeercert()
            issuer = dict(x[0] for x in cert['issuer'])['organizationName']
            start_date = datetime.datetime.strptime(cert['notBefore'], "%b %d %H:%M:%S %Y %Z")
            end_date = datetime.datetime.strptime(cert['notAfter'], "%b %d %H:%M:%S %Y %Z")
            return {
                'Issuer': issuer,
                'Start Date': str(start_date),
                'Expiration Date': str(end_date),
                'Validity Days': (end_date - start_date).days
            }
    except:
        return None

def get_site_info(url):
    try:
        parsed = urlparse(url)
        domain = parsed.netloc or parsed.path
        headers = requests.get(url, timeout=5).headers
    except:
        return {"error": "Unable to reach the URL."}

    return {
        'Title': get_page_title(url),
        'Last Updated': get_last_modified(url),
        'Domain Creation Date': str(get_creation_date(domain)),
        'DNS A Records': get_dns_info(domain),
        'Subdomains Found': get_subdomains(domain),
        'Firewall Headers': get_firewall_info(headers),
        'Detected Technologies': get_technologies(url),
        'SSL Certificate': get_certificate_info(url)
    }