Merge pull request #2 from nerdymark/fix/scan-reliability-and-rdp-screenshots F...

File: backend/database.py
 def cleanup_orphaned_scans(db_path: str) -> int:
         conn.close()
+def cleanup_stuck_scans(db_path: str, stuck_threshold_minutes: int = 30) -> int:
+    """Mark scans stuck in 'running' for longer than threshold as failed."""
+    conn = get_connection(db_path)
+    try:
+        cursor = conn.execute(
+            "UPDATE scans SET status = 'failed', "
+            "error = 'Timed out (exceeded ' || ? || ' min)', "
+            "finished_at = ? "
+            "WHERE status = 'running' AND started_at IS NOT NULL "
+            "AND started_at < datetime('now', '-' || ? || ' minutes')",
+            (stuck_threshold_minutes, _now(), stuck_threshold_minutes),
+        )
+        conn.commit()
+        return cursor.rowcount
+    finally:
+        conn.close()
+
+
 # --- Host CRUD ---
 def upsert_host(db_path: str, scan_id: int, subnet_id: int, ip: str, **kwargs) -> dict:
File: backend/main.py
 from fastapi.responses import FileResponse, StreamingResponse
 from backend.config import Config, load_config
-from backend.database import init_db, cleanup_orphaned_scans
+from backend.database import init_db, cleanup_orphaned_scans, cleanup_stuck_scans
 from backend.geoip_db import init_geoip_db
 from backend.scanner import NetworkScanner
 from backend.atproto_client import BlueskyAnnouncer
 async def lifespan(app: FastAPI):
     logging.getLogger(__name__).info(
         f"RDP Lottery started on {_config.app.host}:{_config.app.port}"
     )
+
+    # Start watchdog that marks stuck scans as failed
+    watchdog_task = asyncio.create_task(_scan_watchdog(_config.app.database_path))
     yield
+    watchdog_task.cancel()
+
+
+# Scans running longer than this are considered stuck and marked failed
+WATCHDOG_STUCK_THRESHOLD_MINUTES = 30
+WATCHDOG_CHECK_INTERVAL_SECONDS = 60
+
+
+async def _scan_watchdog(db_path: str) -> None:
+    """Periodically check for scans stuck in 'running' state and mark them failed."""
+    log = logging.getLogger(__name__)
+    while True:
+        try:
+            await asyncio.sleep(WATCHDOG_CHECK_INTERVAL_SECONDS)
+            cleaned = cleanup_stuck_scans(db_path, WATCHDOG_STUCK_THRESHOLD_MINUTES)
+            if cleaned:
+                log.warning(f"Watchdog: marked {cleaned} stuck scan(s) as failed")
+        except asyncio.CancelledError:
+            break
+        except Exception as e:
+            log.error(f"Watchdog error: {e}")
 app = FastAPI(title="RDP Lottery", version="0.1.0", lifespan=lifespan)
File: backend/routers/scans.py
 import json
 import logging
 import concurrent.futures
+import time
 import urllib.request
 from typing import Optional
 router = APIRouter(prefix="/api/scans", tags=["scans"])
 MAX_CONCURRENT_SCANS = 4
+# Maximum wall-clock time for a single scan before it bails out (seconds)
+SCAN_MAX_DURATION_SECONDS = 20 * 60  # 20 minutes
 _scan_pool = concurrent.futures.ThreadPoolExecutor(
     max_workers=MAX_CONCURRENT_SCANS,
     thread_name_prefix="scan",
 def _run_scan(scan_id: int, subnet: dict, db_path: str) -> None:
     announcer = _get_announcer()
     db.update_scan(db_path, scan_id, status="running", started_at=db._now())
+    scan_start = time.monotonic()
+
+    def _scan_timed_out() -> bool:
+        elapsed = time.monotonic() - scan_start
+        if elapsed > SCAN_MAX_DURATION_SECONDS:
+            logger.warning(f"Scan {scan_id} exceeded {SCAN_MAX_DURATION_SECONDS}s "
+                          f"({elapsed:.0f}s elapsed), stopping early")
+            return True
+        return False
     try:
         # Phase 1: Host discovery (RDP + VNC)
 def _run_scan(scan_id: int, subnet: dict, db_path: str) -> None:
         # Phase 2: Full scan on RDP hosts
         full_scanned_ips = set()
         for host_info in rdp_hosts:
+            if _scan_timed_out():
+                # Any RDP hits we never got to validate are unverified — clear
+                # so we don't record tarpit/firewall false positives as HITs.
+                host_info["rdp_open"] = 0
+                continue
             rdp_port = host_info.get("rdp_port", 3389)
             try:
                 full_data = scanner.full_scan(host_info["ip"])
 def _run_scan(scan_id: int, subnet: dict, db_path: str) -> None:
             except Exception as e:
                 logger.error(f"NLA check failed for {host_info['ip']}: {e}")
-            # Phase 3: Screenshot non-NLA hosts
-            if host_info.get("nla_required") == 0:
+            # Phase 3: Screenshot hosts where NLA is not explicitly required.
+            # Includes nla_required=0 (confirmed no-NLA) and None (inconclusive —
+            # rdp-enum-encryption gave no output, but a no-NLA login screen may
+            # still render). NLA-required hosts will reject empty creds fast.
+            if host_info.get("nla_required") != 1:
                 try:
                     screenshot = scanner.capture_screenshot(host_info["ip"], "screenshots", port=rdp_port)
                     if screenshot:
 def _run_scan(scan_id: int, subnet: dict, db_path: str) -> None:
         # Phase 4: VNC host processing
         for host_info in vnc_hosts:
+            if _scan_timed_out():
+                # Unverified VNC hits — clear to avoid false positives
+                host_info["vnc_open"] = 0
+                continue
             # Full scan VNC-only hosts that weren't already scanned
             if host_info["ip"] not in full_scanned_ips:
                 try:
 def _run_scan(scan_id: int, subnet: dict, db_path: str) -> None:
                 except Exception as e:
                     logger.error(f"Full scan failed for VNC host {host_info['ip']}: {e}")
+            # Confirm at least one VNC port is actually open in full scan
+            open_ports = {p["port"] for p in host_info.get("all_ports", [])}
+            vnc_ports_from_discovery = host_info.get("vnc_ports", [5900])
+            confirmed_vnc = [p for p in vnc_ports_from_discovery if p in open_ports]
+            if not confirmed_vnc:
+                logger.info(f"VNC ports {vnc_ports_from_discovery} not confirmed open for "
+                            f"{host_info['ip']} (ports: {open_ports}), clearing vnc_open")
+                host_info["vnc_open"] = 0
+                host_info["vnc_ports"] = []
+                continue
+            host_info["vnc_ports"] = confirmed_vnc
+
             # Check each open VNC port — a host may have multiple displays
             vnc_ports = host_info.get("vnc_ports", [5900])
             for vnc_port in vnc_ports:
                 # VNC auth check
+                vnc_info = {"vnc_auth_required": None, "vnc_desktop_name": ""}
                 try:
                     vnc_info = scanner.check_vnc_auth(host_info["ip"], port=vnc_port)
                     # Merge results — keep the most interesting (no-auth wins over auth)
 def _run_scan(scan_id: int, subnet: dict, db_path: str) -> None:
         # Phase 4.5: Web screenshots
         for host_info in discovered:
+            if _scan_timed_out():
+                break
             all_ports = host_info.get("all_ports", [])
             web_ports = scanner.detect_web_ports(all_ports)
             if web_ports:
 def _run_scan(scan_id: int, subnet: dict, db_path: str) -> None:
         # Phase 5: Host enrichment (ASN, GeoIP, reverse DNS, IP type)
         for host_info in discovered:
+            if _scan_timed_out():
+                break
             try:
                 enrichment = scanner.enrich_host(host_info["ip"])
                 host_info.update(enrichment)
             except Exception as e:
                 logger.error(f"Enrichment failed for {host_info['ip']}: {e}")
-        # Upsert all discovered hosts
-        for host_info in discovered:
+        # Upsert only hosts with a confirmed open RDP or VNC port. A host whose
+        # discovery hit couldn't be verified (timeout, tarpit, firewall that
+        # SYN-ACKs everything) is dropped rather than saved as a ghost HIT.
+        interesting = [h for h in discovered if h.get("rdp_open") or h.get("vnc_open")]
+        dropped = len(discovered) - len(interesting)
+        if dropped:
+            logger.info(f"Scan {scan_id}: dropping {dropped} unverified hosts")
+        for host_info in interesting:
             host_record = db.upsert_host(
                 db_path, scan_id, subnet["id"], host_info["ip"],
                 hostname=host_info.get("hostname", ""),
 def _run_scan(scan_id: int, subnet: dict, db_path: str) -> None:
                 if announcer.announce_host(host_info, screenshot_path=host_info.get("vnc_screenshot_path"), proto="VNC"):
                     db.mark_host_announced(db_path, host_record["id"])
-        verified_rdp = sum(1 for h in discovered if h.get("rdp_open"))
-        verified_vnc = sum(1 for h in discovered if h.get("vnc_open"))
+        verified_rdp = sum(1 for h in interesting if h.get("rdp_open"))
+        verified_vnc = sum(1 for h in interesting if h.get("vnc_open"))
         db.update_scan(
             db_path, scan_id,
             status="completed",
-            hosts_found=len(discovered),
+            hosts_found=len(interesting),
             rdp_found=verified_rdp,
             vnc_found=verified_vnc,
             finished_at=db._now(),
         )
-        logger.info(f"Scan {scan_id} completed: {len(discovered)} hosts, "
-                     f"{verified_rdp} RDP, {verified_vnc} VNC")
+        logger.info(f"Scan {scan_id} completed: {len(interesting)} hosts "
+                     f"({verified_rdp} RDP, {verified_vnc} VNC); "
+                     f"dropped {dropped} unverified")
     except Exception as e:
         logger.error(f"Scan {scan_id} failed: {e}")
 def active_scans():
     return db.get_active_scans(_get_db_path())
+@router.post("/retry-failed", response_model=list[ScanResponse], status_code=201)
+def retry_failed_scans(hours: int = Query(24, ge=1, le=168)):
+    """Re-queue scans for subnets that had failed scans in the last N hours."""
+    db_path = _get_db_path()
+    conn = db.get_connection(db_path)
+    try:
+        rows = conn.execute(
+            """SELECT DISTINCT s.subnet_id, sub.cidr, sub.label, sub.is_active
+               FROM scans s
+               JOIN subnets sub ON s.subnet_id = sub.id
+               WHERE s.status = 'failed'
+                 AND s.created_at > datetime('now', '-' || ? || ' hours')
+                 AND sub.is_active = 1
+            """,
+            (hours,),
+        ).fetchall()
+    finally:
+        conn.close()
+
+    if not rows:
+        return []
+
+    scans = []
+    for row in rows:
+        subnet = {"id": row["subnet_id"], "cidr": row["cidr"]}
+        scan = db.create_scan(db_path, subnet["id"])
+        scans.append(scan)
+        _scan_pool.submit(_run_scan, scan["id"], subnet, db_path)
+
+    logger.info(f"Retrying {len(scans)} failed scan(s) from last {hours}h")
+    return scans
+
+
 FEED_URL = "https://nerdymark.com/404"
Read more...