#!/usr/bin/env python3 """ Firmware Hunter Pro v4.0 A safer offline firmware triage framework for extracted router / IoT firmware. What it does: - Scans extracted firmware directories safely without executing firmware binaries. - Optionally extracts a firmware image with binwalk if binwalk is installed. - Finds credentials, keys, certs, IPs, domains, URLs, MACs, JWTs, API keys, hashes, CVEs. - Maps web UI files, CGI handlers, forms, scripts, endpoints, and admin-looking routes. - Detects ELF binaries, architecture hints, BusyBox, Linux kernel, OpenSSL, Dropbear, dnsmasq, uClibc/musl/glibc. - Scores findings with severity and confidence. - Produces TXT, JSON, HTML, Markdown, CSV, and separate evidence files. - Supports simple external plugins from a plugins directory. Safe by default: - Does not execute firmware binaries. - Only reads files and optionally runs host tools like strings, file, binwalk, yara. Usage: python3 firmware_hunter_pro.py /path/to/squashfs-root python3 firmware_hunter_pro.py firmware.bin --extract python3 firmware_hunter_pro.py /path/to/rootfs --quick python3 firmware_hunter_pro.py /path/to/rootfs --yara rules.yar python3 firmware_hunter_pro.py /path/to/rootfs --plugins plugins/ """ import os import re import csv import sys import json import math import time import html import shutil import hashlib import argparse import tempfile import subprocess import importlib.util from pathlib import Path from datetime import datetime from collections import Counter, defaultdict from concurrent.futures import ThreadPoolExecutor, as_completed from threading import Lock VERSION = "4.0" REPORT = defaultdict(list) REPORT_LOCK = Lock() # ============================================================================= # CONFIG # ============================================================================= TEXT_EXTENSIONS = { ".txt", ".log", ".conf", ".cfg", ".ini", ".json", ".xml", ".html", ".htm", ".js", ".php", ".asp", ".cgi", ".lua", ".sh", ".service", ".default", ".profile", ".passwd", ".shadow", ".pem", ".key", ".crt", ".pub", ".yaml", ".yml", ".env", ".properties", ".rc", ".rules" } INTERESTING_NAMES = { "passwd", "shadow", "group", "hosts", "resolv.conf", "inittab", "fstab", "rcS", "rc.local", "profile", "authorized_keys", "known_hosts", "motd", "issue", "services", "inetd", "inetd.conf", "udhcpd.conf", "dnsmasq.conf", "dropbear", "telnetd", "lighttpd.conf", "boa.conf", "httpd.conf", "mini_httpd.conf", "uhttpd.conf", "nginx.conf", "config.xml", "nvram", "default.cfg", "wpa_supplicant.conf", "os-release", "version", "release", "syslog.conf", "crontab" } EXECUTABLE_NAMES = { "busybox", "telnetd", "dropbear", "sshd", "httpd", "boa", "lighttpd", "mini_httpd", "uhttpd", "nginx", "nc", "netcat", "wget", "curl", "tftp", "ftpget", "ftpput", "iptables", "ip6tables", "dnsmasq", "udhcpd", "pppd", "openvpn", "openssl", "sqlite3", "ash", "sh" } SUSPICIOUS_KEYWORDS = { "telnetd", "dropbear", "busybox telnet", "nc -l", "netcat", "/bin/sh", "/bin/ash", "reverse shell", "backdoor", "wget http", "curl http", "tftp", "ftpget", "ftpput", "chmod 777", "0.0.0.0", "admin:admin", "root:root", "password", "passwd", "shadow", "debug", "test", "factory", "developer", "enable telnet", "remote shell", "hardcoded", "debug shell", "diagnostic", "support account", "superuser", "hidden", "maintenance" } MALWARE_IOC_KEYWORDS = { "mirai": ["mirai", "busybox MIRAI", "/bin/busybox", "report.%s", "scanListen"], "gafgyt/bashlite": ["gafgyt", "bashlite", "gayfgt", "loligang", "telnet scanner"], "mozi": ["mozi", "Mozi.m", "dht.transmissionbt.com", "router.bittorrent.com"], "xorddos": ["xorddos", "x0r", "/tmp/.x", "BB2FA36AAA9541F0"], "miner": ["stratum+tcp", "xmrig", "minerd", "cryptonight", "monero"], "generic_bot": ["CNC", "C2", "botnet", "udp flood", "syn flood", "http flood"] } REGEX_PATTERNS = { "ipv4": r"\b(?:\d{1,3}\.){3}\d{1,3}\b", "mac_address": r"\b(?:[0-9a-fA-F]{2}[:-]){5}[0-9a-fA-F]{2}\b", "url": r"https?://[^\s'\"<>]{4,}", "email": r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", "domain": r"\b(?:[a-zA-Z0-9-]+\.)+(?:com|net|org|io|cn|ru|info|biz|us|uk|co|dev|cloud|local|lan)\b", "md5": r"\b[a-fA-F0-9]{32}\b", "sha1": r"\b[a-fA-F0-9]{40}\b", "sha256": r"\b[a-fA-F0-9]{64}\b", "jwt": r"eyJ[a-zA-Z0-9_\-]+\.[a-zA-Z0-9_\-]+\.[a-zA-Z0-9_\-]+", "api_key": r"(?i)(api[_-]?key|apikey|access[_-]?key|secret[_-]?key|client_secret)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-]{12,})", "wifi_psk": r"(?i)(passphrase|psk|wpa_pass|wifi_pass|wifi_password|wireless_key|wpakey)\s*[:=]\s*['\"]?([^\s'\";&]{8,63})", "possible_password": r"(?i)(password|passwd|pwd|pass|admin_pass|root_pass|web_pass|wifi_pass|secret|token)\s*[:=]\s*['\"]?([^\s'\";&]{4,})", "possible_username": r"(?i)(username|user|login|admin_user|root_user)\s*[:=]\s*['\"]?([^\s'\";&]{3,})", "mqtt_credential": r"(?i)(mqtt_(?:user|username|pass|password)|mqttUser|mqttPass)\s*[:=]\s*['\"]?([^\s'\";&]{3,})", "basic_auth": r"(?i)authorization:\s*basic\s+([a-zA-Z0-9+/=]{8,})", } PRIVATE_KEY_MARKERS = { "-----BEGIN RSA PRIVATE KEY-----", "-----BEGIN DSA PRIVATE KEY-----", "-----BEGIN EC PRIVATE KEY-----", "-----BEGIN OPENSSH PRIVATE KEY-----", "-----BEGIN PRIVATE KEY-----" } CERT_MARKERS = {"-----BEGIN CERTIFICATE-----"} COMPONENT_PATTERNS = { "busybox": [ r"BusyBox v([\w.\-]+)", r"busybox\s+v([\w.\-]+)" ], "linux_kernel": [ r"Linux version ([\w.\-]+)", r"kernel version[:= ]+([\w.\-]+)" ], "openssl": [ r"OpenSSL\s+([0-9][\w.\-]+[a-z]?)", r"openssl-([0-9][\w.\-]+)" ], "dropbear": [ r"Dropbear sshd v?([0-9][\w.\-]+)", r"dropbear[_ -]?([0-9]{4}\.[0-9]{2})" ], "dnsmasq": [ r"dnsmasq-?([0-9][\w.\-]+)", r"dnsmasq version ([0-9][\w.\-]+)" ], "uClibc": [ r"uClibc-?([0-9][\w.\-]+)" ], "musl": [ r"musl-?([0-9][\w.\-]+)" ], "glibc": [ r"GNU C Library.*?release version ([0-9][\w.\-]+)", r"GLIBC_([0-9.]+)" ], "lighttpd": [ r"lighttpd/([0-9][\w.\-]+)" ], "boa": [ r"Boa/([0-9][\w.\-]+)" ], "uhttpd": [ r"uhttpd[-/ ]([0-9][\w.\-]+)" ] } CVE_HINTS = { "busybox": { "note": "BusyBox is frequently old in embedded firmware. Verify exact version against NVD/vendor advisories.", "keywords": ["busybox", "ash", "udhcp", "telnetd"] }, "openssl": { "note": "OpenSSL version detected. Check for old TLS/crypto CVEs and weak certificate/key usage.", "keywords": ["openssl", "libssl", "libcrypto"] }, "dropbear": { "note": "Dropbear SSH detected. Check version against Dropbear security advisories.", "keywords": ["dropbear", "sshd"] }, "dnsmasq": { "note": "dnsmasq detected. Check version for DNS/DHCP vulnerabilities.", "keywords": ["dnsmasq"] }, "boa": { "note": "Boa web server is commonly outdated in IoT firmware.", "keywords": ["boa"] } } IMPORTANT_SECTIONS = [ ("Credential Findings", "credential_findings", "credential_findings.txt"), ("Possible Passwords", "possible_password", "possible_passwords.txt"), ("Wi-Fi PSKs", "wifi_psk", "wifi_psks.txt"), ("Possible Usernames", "possible_username", "possible_usernames.txt"), ("API Keys / Secrets", "api_key", "api_keys.txt"), ("MQTT Credentials", "mqtt_credential", "mqtt_credentials.txt"), ("JWT Tokens", "jwt", "jwt_tokens.txt"), ("Private Keys", "private_keys", "private_keys.txt"), ("Certificates", "certificates", "certificates.txt"), ("IP Addresses", "ipv4", "ip_addresses.txt"), ("MAC Addresses", "mac_address", "mac_addresses.txt"), ("URLs", "url", "urls.txt"), ("Domains", "domain", "domains.txt"), ("Emails", "email", "emails.txt"), ("Component Versions", "components", "components.txt"), ("Version Strings", "version_strings", "version_strings.txt"), ("Firmware Identity", "firmware_identity", "firmware_identity.txt"), ("Startup Scripts", "startup_scripts", "startup_scripts.txt"), ("Interesting Files", "interesting_files", "interesting_files.txt"), ("Interesting Binaries", "interesting_binaries", "interesting_binaries.txt"), ("ELF Binaries", "elf_binaries", "elf_binaries.txt"), ("Architecture Summary", "architecture_summary", "architecture_summary.txt"), ("Web Files", "web_files", "web_files.txt"), ("Web Routes", "web_routes", "web_routes.txt"), ("Web Endpoints", "web_endpoints", "web_endpoints.txt"), ("Cron Jobs", "cron_jobs", "cron_jobs.txt"), ("Users / Groups", "users_groups", "users_groups.txt"), ("SSH Related Files", "ssh_related", "ssh_related.txt"), ("Suspicious Keywords", "suspicious_keywords", "suspicious_keywords.txt"), ("Malware IOC Matches", "malware_iocs", "malware_iocs.txt"), ("High Entropy Files", "high_entropy_files", "high_entropy_files.txt"), ("Largest Files", "largest_files", "largest_files.txt"), ("BusyBox Findings", "busybox", "busybox.txt"), ("CVE References", "cve_references", "cve_references.txt"), ("CVE Hints", "cve_hints", "cve_hints.txt"), ("YARA Matches", "yara_matches", "yara_matches.txt"), ("Plugin Findings", "plugin_findings", "plugin_findings.txt"), ("Skipped Large Files", "skipped_large_files", "skipped_large_files.txt"), ("Tool Warnings", "tool_warnings", "tool_warnings.txt"), ("Scan Errors", "scan_errors", "scan_errors.txt"), ] # ============================================================================= # BASIC HELPERS # ============================================================================= def add(category, data): """Thread-safe de-duplicated report append.""" with REPORT_LOCK: if data not in REPORT[category]: REPORT[category].append(data) def safe_rel(path, root): try: return str(Path(path).resolve().relative_to(Path(root).resolve())) except Exception: return str(path) def now_iso(): return datetime.now().isoformat(timespec="seconds") def check_dependencies(yara_requested=False, extract_requested=False): dependencies = ["strings"] if yara_requested: dependencies.append("yara") if extract_requested: dependencies.append("binwalk") for tool in dependencies: if shutil.which(tool) is None: add("tool_warnings", { "tool": tool, "warning": f"'{tool}' was not found in PATH. Some features may be limited." }) print(f"[!] Warning: '{tool}' not found. Some features may be limited.") def sha256_file(path): try: h = hashlib.sha256() with open(path, "rb") as f: for chunk in iter(lambda: f.read(1024 * 1024), b""): h.update(chunk) return h.hexdigest() except Exception: return None def entropy_file(path, max_bytes=1024 * 1024): try: with open(path, "rb") as f: data = f.read(max_bytes) if not data: return 0.0 counts = Counter(data) length = len(data) entropy = -sum((count / length) * math.log2(count / length) for count in counts.values()) return round(entropy, 4) except Exception: return None def is_text_file(path): if path.suffix.lower() in TEXT_EXTENSIONS: return True try: with open(path, "rb") as f: chunk = f.read(4096) if not chunk: return False return chunk.count(b"\x00") / len(chunk) < 0.08 except Exception: return False def read_text(path, max_chars=None): try: text = path.read_text(errors="ignore") if max_chars: return text[:max_chars] return text except Exception: return "" def is_elf(path): try: with open(path, "rb") as f: return f.read(4) == b"\x7fELF" except Exception: return False def elf_info(path): try: with open(path, "rb") as f: header = f.read(20) if len(header) < 20: return {} elf_class = header[4] endian = header[5] byte_order = "little" if endian == 1 else "big" if endian == 2 else "little" machine = int.from_bytes(header[18:20], byte_order) arch_map = { 3: "x86", 8: "MIPS", 20: "PowerPC", 40: "ARM", 62: "x86_64", 183: "AArch64", 243: "RISC-V" } return { "class": "32-bit" if elf_class == 1 else "64-bit" if elf_class == 2 else "unknown", "endian": "little" if endian == 1 else "big" if endian == 2 else "unknown", "machine": arch_map.get(machine, f"unknown-{machine}") } except Exception: return {} def run_strings(path, limit=1200): try: result = subprocess.run( ["strings", str(path)], capture_output=True, text=True, timeout=20, errors="ignore" ) return result.stdout.splitlines()[:limit] except Exception: return [] def run_file_cmd(path): if shutil.which("file") is None: return None try: result = subprocess.run( ["file", "-b", str(path)], capture_output=True, text=True, timeout=5, errors="ignore" ) return result.stdout.strip() except Exception: return None # ============================================================================= # EXTRACTION # ============================================================================= def extract_with_binwalk(firmware_image, output_base): firmware_image = Path(firmware_image).resolve() extract_dir = Path(output_base).resolve() / f"binwalk_extract_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}" extract_dir.mkdir(parents=True, exist_ok=True) if shutil.which("binwalk") is None: raise RuntimeError("binwalk not found. Install it or scan an already extracted rootfs directory.") print(f"[+] Extracting with binwalk into: {extract_dir}") try: subprocess.run( ["binwalk", "-Me", str(firmware_image), "--directory", str(extract_dir)], check=False, timeout=1800 ) except subprocess.TimeoutExpired: add("tool_warnings", {"tool": "binwalk", "warning": "binwalk extraction timed out"}) except Exception as e: add("tool_warnings", {"tool": "binwalk", "warning": f"binwalk extraction failed: {e}"}) candidates = find_rootfs_candidates(extract_dir) if candidates: print("[+] Possible rootfs candidates:") for idx, cand in enumerate(candidates[:10], 1): print(f" {idx}. {cand}") return candidates[0] print("[!] No obvious rootfs found. Scanning extraction directory instead.") return extract_dir def find_rootfs_candidates(base_dir): base_dir = Path(base_dir) candidates = [] for dirpath, dirnames, filenames in os.walk(base_dir): d = Path(dirpath) names = set(filenames) | set(dirnames) score = 0 if "etc" in names: score += 3 if "bin" in names: score += 2 if "sbin" in names: score += 2 if "www" in names or "web" in names or "htdocs" in names: score += 2 if "passwd" in names or "shadow" in names: score += 2 if "init.d" in names: score += 2 if score >= 5: candidates.append((score, d)) candidates.sort(key=lambda x: x[0], reverse=True) return [c[1] for c in candidates] # ============================================================================= # SCANNERS # ============================================================================= def extract_version_strings(text): patterns = [ r"(?i)(version|build|release|fw_ver|firmware|software|sw_ver|hardware|hw_ver|model|vendor|product)\s*[:=]\s*['\"]?([\w.\-/ ]{3,80})", r"(?i)(busybox\s+v\d{1,3}\.\d{1,3}(?:\.\d{1,5})?)", r"(?i)(linux\s+version\s+[\w.\-]+)", r"\bV(\d{1,3}\.\d{1,3}\.\d{1,5})\b", r"\b(\d{4}-\d{2}-\d{2})\b", ] versions = set() for pattern in patterns: for match in re.findall(pattern, text): if isinstance(match, tuple): value = match[-1] else: value = match value = " ".join(value.strip().split()) if 3 <= len(value) <= 120: versions.add(value) return sorted(versions)[:40] def detect_components(text, rel_path): for component, patterns in COMPONENT_PATTERNS.items(): for pattern in patterns: for match in re.findall(pattern, text, re.I): version = match[-1] if isinstance(match, tuple) else match version = str(version).strip() if version: add("components", { "component": component, "version": version, "file": rel_path }) if component in CVE_HINTS: add("cve_hints", { "component": component, "version": version, "file": rel_path, "note": CVE_HINTS[component]["note"] }) def detect_malware_iocs(text, rel_path): low = text.lower() for family, indicators in MALWARE_IOC_KEYWORDS.items(): hits = [] for indicator in indicators: if indicator.lower() in low: hits.append(indicator) if hits: add("malware_iocs", { "family_or_category": family, "file": rel_path, "indicators": sorted(set(hits)), "confidence": "medium" if len(hits) == 1 else "high" }) def scan_text_content(path, content, rel_path): for name, pattern in REGEX_PATTERNS.items(): for match in re.findall(pattern, content): if isinstance(match, tuple): finding = {"file": rel_path, "key": match[0], "value": match[1]} value_for_validation = match[1] else: finding = {"file": rel_path, "value": match} value_for_validation = match if name == "ipv4": parts = value_for_validation.split(".") if not all(p.isdigit() and 0 <= int(p) <= 255 for p in parts): continue add(name, finding) if name in {"possible_password", "wifi_psk", "api_key", "mqtt_credential"}: add_credential_finding(name, finding) for marker in PRIVATE_KEY_MARKERS: if marker in content: add("private_keys", {"file": rel_path, "marker": marker}) add("credential_findings", { "file": rel_path, "type": "private_key", "value": marker, "confidence": "high", "severity": "critical", "reason": "Private key material marker found" }) for marker in CERT_MARKERS: if marker in content: add("certificates", {"file": rel_path, "marker": marker}) lowered = content.lower() for keyword in SUSPICIOUS_KEYWORDS: if keyword.lower() in lowered: add("suspicious_keywords", {"file": rel_path, "keyword": keyword}) versions = extract_version_strings(content) if versions: add("version_strings", {"file": rel_path, "versions": versions}) detect_components(content, rel_path) detect_malware_iocs(content, rel_path) def add_credential_finding(kind, finding): value = str(finding.get("value", "")) key = str(finding.get("key", "")) file = finding.get("file", "") confidence = "medium" severity = "medium" reasons = [] if kind in {"api_key", "wifi_psk"}: confidence = "high" severity = "high" reasons.append(f"{kind} pattern matched") if key.lower() in {"password", "passwd", "pwd", "admin_pass", "root_pass", "secret", "token"}: confidence = "high" severity = "high" reasons.append("credential-like key name") if value.lower() in {"admin", "root", "password", "123456", "12345678", "admin123", "root123"}: confidence = "high" severity = "high" reasons.append("common/default credential value") if any(x in file.lower() for x in ["shadow", "passwd", "default", "config", "nvram", "wpa"]): reasons.append("sensitive config path") if confidence == "medium": confidence = "high" add("credential_findings", { "file": file, "type": kind, "key": key, "value": value, "confidence": confidence, "severity": severity, "reason": "; ".join(reasons) if reasons else "credential-like pattern matched" }) def classify_path(path, root, size, rel_path): lower = str(path).lower() name = path.name.lower() info = {"file": rel_path, "sha256": sha256_file(path), "size": size} if name in INTERESTING_NAMES: add("interesting_files", info) if any(p in lower for p in ["/etc/init.d", "/etc/rc", "/etc/inittab", "/etc/services", "/lib/systemd"]): add("startup_scripts", info) if any(p in lower for p in ["/www", "/web", "/htdocs", "/cgi-bin", "/var/www"]): add("web_files", info) if any(x in name for x in ["config", ".conf", ".cfg", ".ini", ".json", ".xml", ".yaml", ".yml"]): add("config_files", info) if "cron" in lower or "crontab" in lower: add("cron_jobs", info) if name in {"passwd", "shadow", "group"}: add("users_groups", info) if any(x in lower for x in ["ssh", "dropbear", "authorized_keys", "host_key"]): add("ssh_related", info) if name in EXECUTABLE_NAMES: add("interesting_binaries", {**info, "reason": "Common embedded Linux service or utility"}) def scan_busybox(path, text, rel_path): if "busybox" not in str(path).lower() and "busybox" not in text.lower(): return banner = re.search(r"BusyBox v[\w.\-]+", text) applet_hits = [] applet_keywords = ["telnet", "wget", "tftp", "httpd", "ash", "sh", "nc", "ftpget", "ftpput"] for line in text.splitlines(): low = line.lower() if any(x in low for x in applet_keywords) and len(line.strip()) < 250: applet_hits.append(line.strip()) risky_applets = [] for app in applet_keywords: if app in text.lower(): risky_applets.append(app) add("busybox", { "file": rel_path, "banner": banner.group(0) if banner else None, "risky_applet_hints": sorted(set(risky_applets)), "possible_applets_or_strings": sorted(set(applet_hits))[:100] }) def scan_web_endpoints(path, content, rel_path): lower_path = str(path).lower() if not any(x in lower_path for x in ["/www", "/web", "/htdocs", "/cgi-bin", ".html", ".js", ".php", ".cgi", ".asp"]): return endpoints = re.findall(r"[\"'](/[^\"'\s<>]{2,})[\"']", content) forms = re.findall(r"(?i)