# CREATED BY NOWMEEE - ECLIPSE SECURITY LABS
# =========================================

import requests
from bs4 import BeautifulSoup
import zipfile
import os

NRD_URL = "https://www.newlyregistereddomains.com/"
WHOISDS_URL = "https://www.whoisds.com/newly-registered-domains"

DOWNLOAD_DIR = "downloads"
EXTRACT_DIR = "extracted"
OUTPUT_FILE = "final.txt"

os.makedirs(DOWNLOAD_DIR, exist_ok=True)
os.makedirs(EXTRACT_DIR, exist_ok=True)


# ================================
# NEWLYREGISTEREDDOMAINS.COM
# ================================
def get_links_nrd():
    print("\n[+] Checking newlyregistereddomains.com ...")
    r = requests.get(NRD_URL, timeout=15)
    soup = BeautifulSoup(r.text, "html.parser")

    links = []
    section = soup.find("div", {"class": "col-md-6"})
    if not section:
        return links

    table = section.find("table")
    rows = table.find_all("tr")[1:]

    for row in rows:
        a = row.find("a")
        if a:
            link = NRD_URL.rstrip("/") + "/" + a["href"].lstrip("/")
            links.append(link)

    print(f"[+] Found {len(links)} NRD links")
    return links


# ================================
# WHOISDS.COM
# ================================
def get_links_whoisds():
    print("\n[+] Checking whoisds.com ...")
    r = requests.get(WHOISDS_URL, timeout=15)
    soup = BeautifulSoup(r.text, "html.parser")

    links = []
    table = soup.find("table")

    if not table:
        return links

    rows = table.find_all("tr")[1:]

    for row in rows:
        a = row.find("a")
        if a:
            link = a["href"]

            if link.startswith("//"):
                link = "https:" + link
            elif link.startswith("/"):
                link = "https://www.whoisds.com" + link

            links.append(link)

    print(f"[+] Found {len(links)} WHOISDS links")
    return links


# ================================
# DOWNLOAD ZIP
# ================================
def download_zip(url):
    filename = url.split("/")[-2] + ".zip"
    save_path = os.path.join(DOWNLOAD_DIR, filename)

    print(f"[+] Downloading: {url}")

    try:
        r = requests.get(url, timeout=30)
        with open(save_path, "wb") as f:
            f.write(r.content)
        print(f"[✓] Saved {save_path}")
        return save_path
    except Exception as e:
        print(f"[-] Download failed: {e}")
        return None


# ================================
# EXTRACT ZIP
# ================================
def extract_zip(zip_path):
    try:
        with zipfile.ZipFile(zip_path, "r") as z:
            z.extractall(EXTRACT_DIR)
        print(f"[✓] Extracted {zip_path}")
    except:
        print(f"[-] Failed to extract: {zip_path}")


# ================================
# CLEANER: Deduplicate + Normalize
# ================================
def clean_domain(domain):
    domain = domain.strip().lower()

    # Remove invalid baris
    if not domain or " " in domain or len(domain) < 3:
        return None

    # Remove protocol
    domain = domain.replace("http://", "").replace("https://", "")

    # Remove slash trailing
    domain = domain.split("/")[0]

    return domain


# ================================
# MERGE ALL FILES
# ================================
def merge_all():
    domains = set()

    for root, _, files in os.walk(EXTRACT_DIR):
        for file in files:
            if file.endswith(".txt"):
                fpath = os.path.join(root, file)
                with open(fpath, "r", errors="ignore") as f:
                    for line in f:
                        clean = clean_domain(line)
                        if clean:
                            domains.add(clean)

    print(f"\n[+] Unique domains: {len(domains)}")

    with open(OUTPUT_FILE, "w") as out:
        for d in sorted(domains):
            out.write(d + "\n")

    print(f"[✓] Merged output saved → {OUTPUT_FILE}")


# ================================
# MAIN
# ================================
def main():
    print("=== NRD Scraper (2 sources + auto dedupe) ===\n")

    all_links = get_links_nrd() + get_links_whoisds()

    print(f"\n[+] Total downloadable ZIPs found: {len(all_links)}\n")

    zip_files = []
    for url in all_links:
        z = download_zip(url)
        if z:
            zip_files.append(z)

    print("\n[+] Extracting ZIP files...")
    for z in zip_files:
        extract_zip(z)

    merge_all()
    print("\n[✓] All Done!")


if __name__ == "__main__":
    main()
