From 8c83ea1db9da75adf43d187b35b5c48db941aa79 Mon Sep 17 00:00:00 2001 From: Nester <101259687+Nester420@users.noreply.github.com> Date: Wed, 20 May 2026 18:26:13 -0700 Subject: [PATCH] Add files via upload --- firmware_hunter_pro_v4.py | 1363 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1363 insertions(+) create mode 100644 firmware_hunter_pro_v4.py diff --git a/firmware_hunter_pro_v4.py b/firmware_hunter_pro_v4.py new file mode 100644 index 0000000..89b8739 --- /dev/null +++ b/firmware_hunter_pro_v4.py @@ -0,0 +1,1363 @@ +#!/usr/bin/env python3 + + +import os +import re +import csv +import sys +import json +import math +import time +import html +import shutil +import hashlib +import argparse +import tempfile +import subprocess +import importlib.util +from pathlib import Path +from datetime import datetime +from collections import Counter, defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from threading import Lock + +VERSION = "4.0" +REPORT = defaultdict(list) +REPORT_LOCK = Lock() + +# ============================================================================= +# CONFIG +# ============================================================================= + +TEXT_EXTENSIONS = { + ".txt", ".log", ".conf", ".cfg", ".ini", ".json", ".xml", ".html", ".htm", + ".js", ".php", ".asp", ".cgi", ".lua", ".sh", ".service", ".default", + ".profile", ".passwd", ".shadow", ".pem", ".key", ".crt", ".pub", ".yaml", + ".yml", ".env", ".properties", ".rc", ".rules" +} + +INTERESTING_NAMES = { + "passwd", "shadow", "group", "hosts", "resolv.conf", "inittab", "fstab", + "rcS", "rc.local", "profile", "authorized_keys", "known_hosts", "motd", + "issue", "services", "inetd", "inetd.conf", "udhcpd.conf", "dnsmasq.conf", + "dropbear", "telnetd", "lighttpd.conf", "boa.conf", "httpd.conf", + "mini_httpd.conf", "uhttpd.conf", "nginx.conf", "config.xml", "nvram", + "default.cfg", "wpa_supplicant.conf", "os-release", "version", "release", + "syslog.conf", "crontab" +} + +EXECUTABLE_NAMES = { + "busybox", "telnetd", "dropbear", "sshd", "httpd", "boa", "lighttpd", + "mini_httpd", "uhttpd", "nginx", "nc", "netcat", "wget", "curl", "tftp", + "ftpget", "ftpput", "iptables", "ip6tables", "dnsmasq", "udhcpd", "pppd", + "openvpn", "openssl", "sqlite3", "ash", "sh" +} + +SUSPICIOUS_KEYWORDS = { + "telnetd", "dropbear", "busybox telnet", "nc -l", "netcat", "/bin/sh", + "/bin/ash", "reverse shell", "backdoor", "wget http", "curl http", "tftp", + "ftpget", "ftpput", "chmod 777", "0.0.0.0", "admin:admin", "root:root", + "password", "passwd", "shadow", "debug", "test", "factory", "developer", + "enable telnet", "remote shell", "hardcoded", "debug shell", "diagnostic", + "support account", "superuser", "hidden", "maintenance" +} + +MALWARE_IOC_KEYWORDS = { + "mirai": ["mirai", "busybox MIRAI", "/bin/busybox", "report.%s", "scanListen"], + "gafgyt/bashlite": ["gafgyt", "bashlite", "gayfgt", "loligang", "telnet scanner"], + "mozi": ["mozi", "Mozi.m", "dht.transmissionbt.com", "router.bittorrent.com"], + "xorddos": ["xorddos", "x0r", "/tmp/.x", "BB2FA36AAA9541F0"], + "miner": ["stratum+tcp", "xmrig", "minerd", "cryptonight", "monero"], + "generic_bot": ["CNC", "C2", "botnet", "udp flood", "syn flood", "http flood"] +} + +REGEX_PATTERNS = { + "ipv4": r"\b(?:\d{1,3}\.){3}\d{1,3}\b", + "mac_address": r"\b(?:[0-9a-fA-F]{2}[:-]){5}[0-9a-fA-F]{2}\b", + "url": r"https?://[^\s'\"<>]{4,}", + "email": r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", + "domain": r"\b(?:[a-zA-Z0-9-]+\.)+(?:com|net|org|io|cn|ru|info|biz|us|uk|co|dev|cloud|local|lan)\b", + "md5": r"\b[a-fA-F0-9]{32}\b", + "sha1": r"\b[a-fA-F0-9]{40}\b", + "sha256": r"\b[a-fA-F0-9]{64}\b", + "jwt": r"eyJ[a-zA-Z0-9_\-]+\.[a-zA-Z0-9_\-]+\.[a-zA-Z0-9_\-]+", + "api_key": r"(?i)(api[_-]?key|apikey|access[_-]?key|secret[_-]?key|client_secret)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-]{12,})", + "wifi_psk": r"(?i)(passphrase|psk|wpa_pass|wifi_pass|wifi_password|wireless_key|wpakey)\s*[:=]\s*['\"]?([^\s'\";&]{8,63})", + "possible_password": r"(?i)(password|passwd|pwd|pass|admin_pass|root_pass|web_pass|wifi_pass|secret|token)\s*[:=]\s*['\"]?([^\s'\";&]{4,})", + "possible_username": r"(?i)(username|user|login|admin_user|root_user)\s*[:=]\s*['\"]?([^\s'\";&]{3,})", + "mqtt_credential": r"(?i)(mqtt_(?:user|username|pass|password)|mqttUser|mqttPass)\s*[:=]\s*['\"]?([^\s'\";&]{3,})", + "basic_auth": r"(?i)authorization:\s*basic\s+([a-zA-Z0-9+/=]{8,})", +} + +PRIVATE_KEY_MARKERS = { + "-----BEGIN RSA PRIVATE KEY-----", + "-----BEGIN DSA PRIVATE KEY-----", + "-----BEGIN EC PRIVATE KEY-----", + "-----BEGIN OPENSSH PRIVATE KEY-----", + "-----BEGIN PRIVATE KEY-----" +} + +CERT_MARKERS = {"-----BEGIN CERTIFICATE-----"} + +COMPONENT_PATTERNS = { + "busybox": [ + r"BusyBox v([\w.\-]+)", + r"busybox\s+v([\w.\-]+)" + ], + "linux_kernel": [ + r"Linux version ([\w.\-]+)", + r"kernel version[:= ]+([\w.\-]+)" + ], + "openssl": [ + r"OpenSSL\s+([0-9][\w.\-]+[a-z]?)", + r"openssl-([0-9][\w.\-]+)" + ], + "dropbear": [ + r"Dropbear sshd v?([0-9][\w.\-]+)", + r"dropbear[_ -]?([0-9]{4}\.[0-9]{2})" + ], + "dnsmasq": [ + r"dnsmasq-?([0-9][\w.\-]+)", + r"dnsmasq version ([0-9][\w.\-]+)" + ], + "uClibc": [ + r"uClibc-?([0-9][\w.\-]+)" + ], + "musl": [ + r"musl-?([0-9][\w.\-]+)" + ], + "glibc": [ + r"GNU C Library.*?release version ([0-9][\w.\-]+)", + r"GLIBC_([0-9.]+)" + ], + "lighttpd": [ + r"lighttpd/([0-9][\w.\-]+)" + ], + "boa": [ + r"Boa/([0-9][\w.\-]+)" + ], + "uhttpd": [ + r"uhttpd[-/ ]([0-9][\w.\-]+)" + ] +} + +CVE_HINTS = { + "busybox": { + "note": "BusyBox is frequently old in embedded firmware. Verify exact version against NVD/vendor advisories.", + "keywords": ["busybox", "ash", "udhcp", "telnetd"] + }, + "openssl": { + "note": "OpenSSL version detected. Check for old TLS/crypto CVEs and weak certificate/key usage.", + "keywords": ["openssl", "libssl", "libcrypto"] + }, + "dropbear": { + "note": "Dropbear SSH detected. Check version against Dropbear security advisories.", + "keywords": ["dropbear", "sshd"] + }, + "dnsmasq": { + "note": "dnsmasq detected. Check version for DNS/DHCP vulnerabilities.", + "keywords": ["dnsmasq"] + }, + "boa": { + "note": "Boa web server is commonly outdated in IoT firmware.", + "keywords": ["boa"] + } +} + +IMPORTANT_SECTIONS = [ + ("Credential Findings", "credential_findings", "credential_findings.txt"), + ("Possible Passwords", "possible_password", "possible_passwords.txt"), + ("Wi-Fi PSKs", "wifi_psk", "wifi_psks.txt"), + ("Possible Usernames", "possible_username", "possible_usernames.txt"), + ("API Keys / Secrets", "api_key", "api_keys.txt"), + ("MQTT Credentials", "mqtt_credential", "mqtt_credentials.txt"), + ("JWT Tokens", "jwt", "jwt_tokens.txt"), + ("Private Keys", "private_keys", "private_keys.txt"), + ("Certificates", "certificates", "certificates.txt"), + ("IP Addresses", "ipv4", "ip_addresses.txt"), + ("MAC Addresses", "mac_address", "mac_addresses.txt"), + ("URLs", "url", "urls.txt"), + ("Domains", "domain", "domains.txt"), + ("Emails", "email", "emails.txt"), + ("Component Versions", "components", "components.txt"), + ("Version Strings", "version_strings", "version_strings.txt"), + ("Firmware Identity", "firmware_identity", "firmware_identity.txt"), + ("Startup Scripts", "startup_scripts", "startup_scripts.txt"), + ("Interesting Files", "interesting_files", "interesting_files.txt"), + ("Interesting Binaries", "interesting_binaries", "interesting_binaries.txt"), + ("ELF Binaries", "elf_binaries", "elf_binaries.txt"), + ("Architecture Summary", "architecture_summary", "architecture_summary.txt"), + ("Web Files", "web_files", "web_files.txt"), + ("Web Routes", "web_routes", "web_routes.txt"), + ("Web Endpoints", "web_endpoints", "web_endpoints.txt"), + ("Cron Jobs", "cron_jobs", "cron_jobs.txt"), + ("Users / Groups", "users_groups", "users_groups.txt"), + ("SSH Related Files", "ssh_related", "ssh_related.txt"), + ("Suspicious Keywords", "suspicious_keywords", "suspicious_keywords.txt"), + ("Malware IOC Matches", "malware_iocs", "malware_iocs.txt"), + ("High Entropy Files", "high_entropy_files", "high_entropy_files.txt"), + ("Largest Files", "largest_files", "largest_files.txt"), + ("BusyBox Findings", "busybox", "busybox.txt"), + ("CVE References", "cve_references", "cve_references.txt"), + ("CVE Hints", "cve_hints", "cve_hints.txt"), + ("YARA Matches", "yara_matches", "yara_matches.txt"), + ("Plugin Findings", "plugin_findings", "plugin_findings.txt"), + ("Skipped Large Files", "skipped_large_files", "skipped_large_files.txt"), + ("Tool Warnings", "tool_warnings", "tool_warnings.txt"), + ("Scan Errors", "scan_errors", "scan_errors.txt"), +] + + +# ============================================================================= +# BASIC HELPERS +# ============================================================================= + +def add(category, data): + """Thread-safe de-duplicated report append.""" + with REPORT_LOCK: + if data not in REPORT[category]: + REPORT[category].append(data) + + +def safe_rel(path, root): + try: + return str(Path(path).resolve().relative_to(Path(root).resolve())) + except Exception: + return str(path) + + +def now_iso(): + return datetime.now().isoformat(timespec="seconds") + + +def check_dependencies(yara_requested=False, extract_requested=False): + dependencies = ["strings"] + if yara_requested: + dependencies.append("yara") + if extract_requested: + dependencies.append("binwalk") + + for tool in dependencies: + if shutil.which(tool) is None: + add("tool_warnings", { + "tool": tool, + "warning": f"'{tool}' was not found in PATH. Some features may be limited." + }) + print(f"[!] Warning: '{tool}' not found. Some features may be limited.") + + +def sha256_file(path): + try: + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest() + except Exception: + return None + + +def entropy_file(path, max_bytes=1024 * 1024): + try: + with open(path, "rb") as f: + data = f.read(max_bytes) + if not data: + return 0.0 + counts = Counter(data) + length = len(data) + entropy = -sum((count / length) * math.log2(count / length) for count in counts.values()) + return round(entropy, 4) + except Exception: + return None + + +def is_text_file(path): + if path.suffix.lower() in TEXT_EXTENSIONS: + return True + try: + with open(path, "rb") as f: + chunk = f.read(4096) + if not chunk: + return False + return chunk.count(b"\x00") / len(chunk) < 0.08 + except Exception: + return False + + +def read_text(path, max_chars=None): + try: + text = path.read_text(errors="ignore") + if max_chars: + return text[:max_chars] + return text + except Exception: + return "" + + +def is_elf(path): + try: + with open(path, "rb") as f: + return f.read(4) == b"\x7fELF" + except Exception: + return False + + +def elf_info(path): + try: + with open(path, "rb") as f: + header = f.read(20) + + if len(header) < 20: + return {} + + elf_class = header[4] + endian = header[5] + byte_order = "little" if endian == 1 else "big" if endian == 2 else "little" + machine = int.from_bytes(header[18:20], byte_order) + + arch_map = { + 3: "x86", + 8: "MIPS", + 20: "PowerPC", + 40: "ARM", + 62: "x86_64", + 183: "AArch64", + 243: "RISC-V" + } + + return { + "class": "32-bit" if elf_class == 1 else "64-bit" if elf_class == 2 else "unknown", + "endian": "little" if endian == 1 else "big" if endian == 2 else "unknown", + "machine": arch_map.get(machine, f"unknown-{machine}") + } + except Exception: + return {} + + +def run_strings(path, limit=1200): + try: + result = subprocess.run( + ["strings", str(path)], + capture_output=True, + text=True, + timeout=20, + errors="ignore" + ) + return result.stdout.splitlines()[:limit] + except Exception: + return [] + + +def run_file_cmd(path): + if shutil.which("file") is None: + return None + try: + result = subprocess.run( + ["file", "-b", str(path)], + capture_output=True, + text=True, + timeout=5, + errors="ignore" + ) + return result.stdout.strip() + except Exception: + return None + + +# ============================================================================= +# EXTRACTION +# ============================================================================= + +def extract_with_binwalk(firmware_image, output_base): + firmware_image = Path(firmware_image).resolve() + extract_dir = Path(output_base).resolve() / f"binwalk_extract_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}" + extract_dir.mkdir(parents=True, exist_ok=True) + + if shutil.which("binwalk") is None: + raise RuntimeError("binwalk not found. Install it or scan an already extracted rootfs directory.") + + print(f"[+] Extracting with binwalk into: {extract_dir}") + try: + subprocess.run( + ["binwalk", "-Me", str(firmware_image), "--directory", str(extract_dir)], + check=False, + timeout=1800 + ) + except subprocess.TimeoutExpired: + add("tool_warnings", {"tool": "binwalk", "warning": "binwalk extraction timed out"}) + except Exception as e: + add("tool_warnings", {"tool": "binwalk", "warning": f"binwalk extraction failed: {e}"}) + + candidates = find_rootfs_candidates(extract_dir) + if candidates: + print("[+] Possible rootfs candidates:") + for idx, cand in enumerate(candidates[:10], 1): + print(f" {idx}. {cand}") + return candidates[0] + + print("[!] No obvious rootfs found. Scanning extraction directory instead.") + return extract_dir + + +def find_rootfs_candidates(base_dir): + base_dir = Path(base_dir) + candidates = [] + + for dirpath, dirnames, filenames in os.walk(base_dir): + d = Path(dirpath) + names = set(filenames) | set(dirnames) + score = 0 + + if "etc" in names: + score += 3 + if "bin" in names: + score += 2 + if "sbin" in names: + score += 2 + if "www" in names or "web" in names or "htdocs" in names: + score += 2 + if "passwd" in names or "shadow" in names: + score += 2 + if "init.d" in names: + score += 2 + + if score >= 5: + candidates.append((score, d)) + + candidates.sort(key=lambda x: x[0], reverse=True) + return [c[1] for c in candidates] + + +# ============================================================================= +# SCANNERS +# ============================================================================= + +def extract_version_strings(text): + patterns = [ + r"(?i)(version|build|release|fw_ver|firmware|software|sw_ver|hardware|hw_ver|model|vendor|product)\s*[:=]\s*['\"]?([\w.\-/ ]{3,80})", + r"(?i)(busybox\s+v\d{1,3}\.\d{1,3}(?:\.\d{1,5})?)", + r"(?i)(linux\s+version\s+[\w.\-]+)", + r"\bV(\d{1,3}\.\d{1,3}\.\d{1,5})\b", + r"\b(\d{4}-\d{2}-\d{2})\b", + ] + + versions = set() + + for pattern in patterns: + for match in re.findall(pattern, text): + if isinstance(match, tuple): + value = match[-1] + else: + value = match + value = " ".join(value.strip().split()) + if 3 <= len(value) <= 120: + versions.add(value) + + return sorted(versions)[:40] + + +def detect_components(text, rel_path): + for component, patterns in COMPONENT_PATTERNS.items(): + for pattern in patterns: + for match in re.findall(pattern, text, re.I): + version = match[-1] if isinstance(match, tuple) else match + version = str(version).strip() + if version: + add("components", { + "component": component, + "version": version, + "file": rel_path + }) + if component in CVE_HINTS: + add("cve_hints", { + "component": component, + "version": version, + "file": rel_path, + "note": CVE_HINTS[component]["note"] + }) + + +def detect_malware_iocs(text, rel_path): + low = text.lower() + for family, indicators in MALWARE_IOC_KEYWORDS.items(): + hits = [] + for indicator in indicators: + if indicator.lower() in low: + hits.append(indicator) + if hits: + add("malware_iocs", { + "family_or_category": family, + "file": rel_path, + "indicators": sorted(set(hits)), + "confidence": "medium" if len(hits) == 1 else "high" + }) + + +def scan_text_content(path, content, rel_path): + for name, pattern in REGEX_PATTERNS.items(): + for match in re.findall(pattern, content): + if isinstance(match, tuple): + finding = {"file": rel_path, "key": match[0], "value": match[1]} + value_for_validation = match[1] + else: + finding = {"file": rel_path, "value": match} + value_for_validation = match + + if name == "ipv4": + parts = value_for_validation.split(".") + if not all(p.isdigit() and 0 <= int(p) <= 255 for p in parts): + continue + + add(name, finding) + + if name in {"possible_password", "wifi_psk", "api_key", "mqtt_credential"}: + add_credential_finding(name, finding) + + for marker in PRIVATE_KEY_MARKERS: + if marker in content: + add("private_keys", {"file": rel_path, "marker": marker}) + add("credential_findings", { + "file": rel_path, + "type": "private_key", + "value": marker, + "confidence": "high", + "severity": "critical", + "reason": "Private key material marker found" + }) + + for marker in CERT_MARKERS: + if marker in content: + add("certificates", {"file": rel_path, "marker": marker}) + + lowered = content.lower() + for keyword in SUSPICIOUS_KEYWORDS: + if keyword.lower() in lowered: + add("suspicious_keywords", {"file": rel_path, "keyword": keyword}) + + versions = extract_version_strings(content) + if versions: + add("version_strings", {"file": rel_path, "versions": versions}) + + detect_components(content, rel_path) + detect_malware_iocs(content, rel_path) + + +def add_credential_finding(kind, finding): + value = str(finding.get("value", "")) + key = str(finding.get("key", "")) + file = finding.get("file", "") + + confidence = "medium" + severity = "medium" + reasons = [] + + if kind in {"api_key", "wifi_psk"}: + confidence = "high" + severity = "high" + reasons.append(f"{kind} pattern matched") + + if key.lower() in {"password", "passwd", "pwd", "admin_pass", "root_pass", "secret", "token"}: + confidence = "high" + severity = "high" + reasons.append("credential-like key name") + + if value.lower() in {"admin", "root", "password", "123456", "12345678", "admin123", "root123"}: + confidence = "high" + severity = "high" + reasons.append("common/default credential value") + + if any(x in file.lower() for x in ["shadow", "passwd", "default", "config", "nvram", "wpa"]): + reasons.append("sensitive config path") + if confidence == "medium": + confidence = "high" + + add("credential_findings", { + "file": file, + "type": kind, + "key": key, + "value": value, + "confidence": confidence, + "severity": severity, + "reason": "; ".join(reasons) if reasons else "credential-like pattern matched" + }) + + +def classify_path(path, root, size, rel_path): + lower = str(path).lower() + name = path.name.lower() + + info = {"file": rel_path, "sha256": sha256_file(path), "size": size} + + if name in INTERESTING_NAMES: + add("interesting_files", info) + + if any(p in lower for p in ["/etc/init.d", "/etc/rc", "/etc/inittab", "/etc/services", "/lib/systemd"]): + add("startup_scripts", info) + + if any(p in lower for p in ["/www", "/web", "/htdocs", "/cgi-bin", "/var/www"]): + add("web_files", info) + + if any(x in name for x in ["config", ".conf", ".cfg", ".ini", ".json", ".xml", ".yaml", ".yml"]): + add("config_files", info) + + if "cron" in lower or "crontab" in lower: + add("cron_jobs", info) + + if name in {"passwd", "shadow", "group"}: + add("users_groups", info) + + if any(x in lower for x in ["ssh", "dropbear", "authorized_keys", "host_key"]): + add("ssh_related", info) + + if name in EXECUTABLE_NAMES: + add("interesting_binaries", {**info, "reason": "Common embedded Linux service or utility"}) + + +def scan_busybox(path, text, rel_path): + if "busybox" not in str(path).lower() and "busybox" not in text.lower(): + return + + banner = re.search(r"BusyBox v[\w.\-]+", text) + applet_hits = [] + applet_keywords = ["telnet", "wget", "tftp", "httpd", "ash", "sh", "nc", "ftpget", "ftpput"] + + for line in text.splitlines(): + low = line.lower() + if any(x in low for x in applet_keywords) and len(line.strip()) < 250: + applet_hits.append(line.strip()) + + risky_applets = [] + for app in applet_keywords: + if app in text.lower(): + risky_applets.append(app) + + add("busybox", { + "file": rel_path, + "banner": banner.group(0) if banner else None, + "risky_applet_hints": sorted(set(risky_applets)), + "possible_applets_or_strings": sorted(set(applet_hits))[:100] + }) + + +def scan_web_endpoints(path, content, rel_path): + lower_path = str(path).lower() + + if not any(x in lower_path for x in ["/www", "/web", "/htdocs", "/cgi-bin", ".html", ".js", ".php", ".cgi", ".asp"]): + return + + endpoints = re.findall(r"[\"'](/[^\"'\s<>]{2,})[\"']", content) + forms = re.findall(r"(?i)