#! /usr/bin/python3
# -*- coding: utf-8 -*-

import os
import re
import json
import math
import html
import time
import subprocess
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from collections import Counter, defaultdict
from ipaddress import ip_network

from django.core.management.base import BaseCommand
from django.conf import settings


# -----------------------------
# Defaults (override via settings)
# -----------------------------
DEFAULTS = {
    "LOG_PATH": getattr(settings, "NGINX_ACCESS_LOG", "/var/log/nginx/access.log"),
    "STATE_PATH": getattr(settings, "NOISE_ANALYZER_STATE", "/var/lib/ideolab/noise_analyzer/state.json"),
    "OUT_DIR": getattr(settings, "NOISE_ANALYZER_OUT_DIR", "/var/lib/ideolab/noise_analyzer/reports"),
    "WINDOW_MINUTES": getattr(settings, "NOISE_ANALYZER_WINDOW_MINUTES", 5),
    "TAIL_BYTES": getattr(settings, "NOISE_ANALYZER_TAIL_BYTES", 6_000_000),  # ~ last few MB
    "TOP_N": getattr(settings, "NOISE_ANALYZER_TOP_N", 50),

    # Heuristics thresholds
    "MAX_REQ_PER_IP": getattr(settings, "NOISE_ANALYZER_MAX_REQ_PER_IP", 80),
    "MAX_404_PER_IP": getattr(settings, "NOISE_ANALYZER_MAX_404_PER_IP", 30),
    "MAX_4XX_PER_IP": getattr(settings, "NOISE_ANALYZER_MAX_4XX_PER_IP", 40),
    "MAX_IPS_PER_24": getattr(settings, "NOISE_ANALYZER_MAX_IPS_PER_24", 25),
    "SUSPECT_UA_KEYWORDS": getattr(
        settings,
        "NOISE_ANALYZER_SUSPECT_UA_KEYWORDS",
        ["", "-", "curl", "wget", "python", "aiohttp", "httpclient", "bot", "scan", "spider", "scrapy"],
    ),

    # Actions (optional)
    "MODE": getattr(settings, "NOISE_ANALYZER_MODE", "nft"),  # "nft" or "ipset"
    # NOTE: your server shows rules in "table ip filter" / "chain INPUT" (Docker/iptables-nft style)
    "NFT_TABLE": getattr(settings, "NOISE_ANALYZER_NFT_TABLE", "inet filter"),   # fallback; auto-detect will override if present
    "NFT_SET": getattr(settings, "NOISE_ANALYZER_NFT_SET", "ideo_noise_block4"),
    "NFT_CHAIN": getattr(settings, "NOISE_ANALYZER_NFT_CHAIN", "input"),        # fallback; auto-detect will override if present
    "NFT_RULE_COMMENT": getattr(settings, "NOISE_ANALYZER_NFT_RULE_COMMENT", "IDEO-Lab noise drop (443)"),

    "IPSET_NAME": getattr(settings, "NOISE_ANALYZER_IPSET_NAME", "ideo_noise_block4"),
    "BLOCK_PORTS": getattr(settings, "NOISE_ANALYZER_BLOCK_PORTS", [443]),

    # Blocking policy
    "BLOCK_TTL_SECONDS": getattr(settings, "NOISE_ANALYZER_BLOCK_TTL_SECONDS", 6 * 3600),
    "BLOCK_SCORE_THRESHOLD": getattr(settings, "NOISE_ANALYZER_BLOCK_SCORE_THRESHOLD", 80),
    "BLOCK_MAX_PER_RUN": getattr(settings, "NOISE_ANALYZER_BLOCK_MAX_PER_RUN", 200),

    # TCP noise heuristics
    "TCP_PORT": getattr(settings, "NOISE_ANALYZER_TCP_PORT", 443),
    "TCP_ALERT_DELTA_DROPS": getattr(settings, "NOISE_ANALYZER_TCP_ALERT_DELTA_DROPS", 500),  # drops since last run
    "TCP_ALERT_SYN_RECV": getattr(settings, "NOISE_ANALYZER_TCP_ALERT_SYN_RECV", 200),        # syn_recv count
}


# -----------------------------
# LOG PARSERS
# -----------------------------
# Custom access.log example:
# 54.37.118.66 - GET /portfolio/domaine/AI_ML/ HTTP/2.0 status=200 rt=0.390 urt=0.390 cache=MISS ru=PASSED
LOG_RE_CUSTOM = re.compile(
    r'^(?P<ip>\d+\.\d+\.\d+\.\d+)\s+-\s+'
    r'(?P<method>GET|POST|HEAD|PUT|DELETE|OPTIONS)\s+'
    r'(?P<url>\S+)\s+'
    r'HTTP/(?P<httpver>[\d.]+)\s+'
    r'status=(?P<status>\d{3})\b'
)

# Optional: standard combined format
LOG_RE_COMBINED = re.compile(
    r'^(?P<ip>\d+\.\d+\.\d+\.\d+)\s+\S+\s+\S+\s+\[(?P<ts>[^\]]+)\]\s+'
    r'"(?P<method>[A-Z]+)\s+(?P<url>\S+)\s+HTTP/(?P<httpver>[^"]+)"\s+'
    r'(?P<status>\d{3})\s+(?P<size>\S+)\s+"(?P<ref>[^"]*)"\s+"(?P<ua>[^"]*)"(?:\s|$)'
)

# nft chain parsing: match "tcp dport 443 counter packets X bytes Y drop"
NFT_DROP_RE = re.compile(
    r'tcp dport (?P<port>\d+)\s+counter packets (?P<packets>\d+)\s+bytes (?P<bytes>\d+)\s+drop'
)


def parse_nginx_ts(ts: str) -> datetime | None:
    """Parse combined timestamp like: 29/Dec/2025:11:22:33 +0000"""
    try:
        dt_part, tz_part = ts.rsplit(" ", 1)
        dt = datetime.strptime(dt_part, "%d/%b/%Y:%H:%M:%S")
        sign = 1 if tz_part.startswith("+") else -1
        hours = int(tz_part[1:3])
        mins = int(tz_part[3:5])
        offset = timezone(sign * timedelta(hours=hours, minutes=mins))
        return dt.replace(tzinfo=offset).astimezone(timezone.utc)
    except Exception:
        return None


def ensure_dirs(path: str) -> None:
    os.makedirs(os.path.dirname(path), exist_ok=True)


def tail_file_bytes(path: str, tail_bytes: int) -> list[str]:
    try:
        with open(path, "rb") as f:
            f.seek(0, os.SEEK_END)
            size = f.tell()
            start = max(0, size - tail_bytes)
            f.seek(start)
            data = f.read()

        text = data.decode(errors="ignore")
        lines = text.splitlines()
        if start > 0 and lines:
            lines = lines[1:]  # drop partial first line
        return lines
    except (FileNotFoundError, PermissionError):
        return []


@dataclass
class IpStats:
    ip: str
    req: int = 0
    s404: int = 0
    s4xx: int = 0
    s5xx: int = 0
    ua_sus: int = 0
    methods: Counter = None

    def __post_init__(self):
        if self.methods is None:
            self.methods = Counter()


def ua_is_suspect(ua: str, keywords: list[str]) -> bool:
    ua_low = (ua or "").strip().lower()
    if ua_low in ("", "-"):
        return True
    return any(k and k in ua_low for k in keywords if k not in ("", "-"))


def score_ip(st: IpStats) -> int:
    score = 0
    score += int(10 * math.log1p(st.req))           # volume
    score += min(40, st.s404 * 2)                   # 404 scan
    score += min(40, st.s4xx * 1)                   # other 4xx
    score += min(50, st.s5xx * 5)                   # 5xx is heavy
    score += min(30, st.ua_sus * 3)                 # UA suspect

    if st.methods.get("HEAD", 0) > 10:
        score += 10
    if st.methods.get("OPTIONS", 0) > 10:
        score += 10

    return min(999, score)


# -----------------------------
# Firewall integration (optional)
# -----------------------------
def run(cmd: list[str]) -> tuple[int, str]:
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    return p.returncode, (p.stdout or "").strip()


def is_root() -> bool:
    try:
        return os.geteuid() == 0
    except AttributeError:
        return False


def nft_init(table: str, set_name: str, chain: str, ports: list[int], comment: str) -> dict:
    family, tbl = table.split()
    out = {"actions": [], "errors": []}

    rc, _ = run(["nft", "list", "set", family, tbl, set_name])
    if rc != 0:
        rc2, o2 = run(["nft", "add", "set", family, tbl, set_name, "{", "type", "ipv4_addr", ";", "flags", "timeout", ";", "}"])
        if rc2 == 0:
            out["actions"].append(f"nft: created set {family} {tbl} {set_name}")
        else:
            out["errors"].append(f"nft: failed to create set: {o2}")

    rc3, rules = run(["nft", "list", "chain", family, tbl, chain])
    if rc3 != 0:
        out["errors"].append(f"nft: cannot list chain {family} {tbl} {chain}: {rules}")
        return out

    if comment in rules:
        out["actions"].append("nft: rule already present")
        return out

    ports_expr = "{" + ",".join(str(p) for p in ports) + "}"
    cmd = ["nft", "add", "rule", family, tbl, chain, "ip", "saddr", f"@{set_name}", "tcp", "dport", ports_expr, "drop", "comment", comment]
    rc4, o4 = run(cmd)
    if rc4 == 0:
        out["actions"].append("nft: added drop rule")
    else:
        out["errors"].append(f"nft: failed to add rule: {o4}")
    return out


def nft_add_ips(table: str, set_name: str, ips: list[str], ttl_seconds: int) -> dict:
    family, tbl = table.split()
    out = {"added": [], "errors": []}
    ttl = f"{ttl_seconds}s"

    for ip in ips:
        rc, o = run(["nft", "add", "element", family, tbl, set_name, "{", ip, "timeout", ttl, "}"])
        if rc == 0:
            out["added"].append(ip)
        else:
            if "File exists" in o or "exists" in o.lower():
                continue
            out["errors"].append(f"{ip}: {o}")
    return out


def ipset_init(set_name: str, ports: list[int]) -> dict:
    out = {"actions": [], "errors": []}
    rc, _ = run(["ipset", "list", set_name])
    if rc != 0:
        rc2, o2 = run(["ipset", "create", set_name, "hash:ip", "timeout", "0", "-exist"])
        if rc2 == 0:
            out["actions"].append(f"ipset: created {set_name}")
        else:
            out["errors"].append(f"ipset: create failed: {o2}")

    for port in ports:
        check = ["iptables", "-C", "INPUT", "-p", "tcp", "--dport", str(port), "-m", "set", "--match-set", set_name, "src", "-j", "DROP"]
        rc3, _ = run(check)
        if rc3 != 0:
            rc4, o4 = run(["iptables", "-I", "INPUT", "-p", "tcp", "--dport", str(port), "-m", "set", "--match-set", set_name, "src", "-j", "DROP"])
            if rc4 == 0:
                out["actions"].append(f"iptables: inserted DROP for port {port} + set {set_name}")
            else:
                out["errors"].append(f"iptables: insert failed (port {port}): {o4}")
    return out


def ipset_add_ips(set_name: str, ips: list[str], ttl_seconds: int) -> dict:
    out = {"added": [], "errors": []}
    for ip in ips:
        rc, o = run(["ipset", "add", set_name, ip, "timeout", str(ttl_seconds), "-exist"])
        if rc == 0:
            out["added"].append(ip)
        else:
            out["errors"].append(f"{ip}: {o}")
    return out


# -----------------------------
# NEW: TCP noise helpers (nft counters + ss)
# -----------------------------
def nft_autodetect_table_chain(preferred_table: str, preferred_chain: str) -> tuple[str, str, list[str]]:
    """
    Tries to find an existing table/chain with hook input.
    On your server, we saw: table ip filter { chain INPUT { type filter hook input ... } }
    Returns (table, chain, debug_actions)
    """
    actions = []
    rc, ruleset = run(["nft", "list", "ruleset"])
    if rc != 0 or not ruleset:
        return preferred_table, preferred_chain, actions

    # Fast heuristic: search for "type filter hook input"
    table = None
    chain = None
    current_table = None

    for line in ruleset.splitlines():
        line_stripped = line.strip()

        # detect table lines: "table ip filter {" or "table inet filter {"
        if line_stripped.startswith("table "):
            parts = line_stripped.split()
            if len(parts) >= 3:
                fam = parts[1]
                tbl = parts[2]
                current_table = f"{fam} {tbl}"

        if "type filter hook input" in line_stripped:
            # look backwards for a "chain XYZ {"
            # simplest approach: rely on current_table and parse chain name from current line's nearest chain earlier:
            # we do a small backward scan using the ruleset text is expensive; instead parse "chain NAME {" lines too
            pass

    # Better parse: second pass to find chain names within tables
    current_table = None
    for line in ruleset.splitlines():
        ls = line.strip()
        if ls.startswith("table "):
            parts = ls.split()
            if len(parts) >= 3:
                current_table = f"{parts[1]} {parts[2]}"
        if ls.startswith("chain ") and ls.endswith("{"):
            # "chain INPUT {"
            chain_name = ls.split()[1]
            # peek next few lines? can't easily here; so we rely on later scanning of chain content
            # We'll validate by listing chain and checking if it contains "hook input".
            if current_table:
                rc2, out = run(["nft", "list", "chain", *current_table.split(), chain_name])
                if rc2 == 0 and "hook input" in out:
                    actions.append(f"autodetect: using nft table='{current_table}' chain='{chain_name}'")
                    return current_table, chain_name, actions

    # fallback
    return preferred_table, preferred_chain, actions


def nft_read_drop_counters(table: str, chain: str, ports: list[int]) -> dict:
    """
    Sums all rules that contain: tcp dport <port> counter packets X bytes Y drop
    Returns totals for those ports (default 443).
    """
    family, tbl = table.split()
    rc, out = run(["nft", "list", "chain", family, tbl, chain])
    if rc != 0:
        return {"ok": False, "error": out, "ports": ports, "packets": 0, "bytes": 0, "rules": 0}

    pset = set(int(p) for p in ports)
    packets = 0
    bytes_ = 0
    rules = 0

    for line in out.splitlines():
        m = NFT_DROP_RE.search(line)
        if not m:
            continue
        port = int(m.group("port"))
        if port not in pset:
            continue
        rules += 1
        packets += int(m.group("packets"))
        bytes_ += int(m.group("bytes"))

    return {"ok": True, "ports": ports, "packets": packets, "bytes": bytes_, "rules": rules, "table": table, "chain": chain}


def ss_syn_counts(port: int) -> dict:
    """
    Counts SYN_RECV/SYN_SENT/ESTAB for dport=port via `ss -tan`.
    Works without root in most setups.
    """
    rc, out = run(["ss", "-tan"])
    if rc != 0:
        return {"ok": False, "error": out, "port": port}

    syn_recv = 0
    syn_sent = 0
    estab = 0

    needle = f":{port}"
    for line in out.splitlines():
        # State Recv-Q Send-Q Local Address:Port Peer Address:Port
        if needle not in line:
            continue
        if line.startswith("SYN-RECV") or line.startswith("SYN_RECV"):
            syn_recv += 1
        elif line.startswith("SYN-SENT") or line.startswith("SYN_SENT"):
            syn_sent += 1
        elif line.startswith("ESTAB") or line.startswith("ESTABLISHED"):
            estab += 1

    return {"ok": True, "port": port, "syn_recv": syn_recv, "syn_sent": syn_sent, "estab": estab}


# -----------------------------
# Reporting (JSON + simple HTML)
# -----------------------------
def write_json(path: str, data: dict) -> None:
    ensure_dirs(path)
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)


def write_html(path: str, report: dict) -> None:
    ensure_dirs(path)

    def esc(x): return html.escape(str(x))

    top_ips = report.get("top_ips", [])
    top_subnets = report.get("top_subnets", [])
    blocks = report.get("blocking", {})
    tcp = report.get("tcp_noise", {})

    rows_ip = "\n".join(
        f"<tr><td>{esc(i['ip'])}</td><td>{i['req']}</td><td>{i['s404']}</td><td>{i['s4xx']}</td><td>{i['ua_sus']}</td><td><b>{i['score']}</b></td></tr>"
        for i in top_ips
    )
    rows_sn = "\n".join(
        f"<tr><td>{esc(s['subnet'])}</td><td>{s['unique_ips']}</td><td>{s['total_req']}</td></tr>"
        for s in top_subnets
    )

    tcp_html = esc(json.dumps(tcp, indent=2, ensure_ascii=False))

    html_doc = f"""<!doctype html>
<html><head><meta charset="utf-8"/>
<title>IDEO-Lab Noise Analyzer</title>
<style>
body{{font-family:system-ui,Segoe UI,Roboto,Arial;background:#0f1320;color:#e8ecff;margin:0;padding:24px}}
.card{{background:#141a2a;border:1px solid rgba(255,255,255,.12);border-radius:14px;padding:16px;margin-bottom:16px;box-shadow:0 10px 30px rgba(0,0,0,.25)}}
h1{{margin:0 0 8px 0;font-size:20px}}
.meta{{opacity:.8}}
table{{width:100%;border-collapse:collapse;margin-top:10px}}
th,td{{border-bottom:1px solid rgba(255,255,255,.10);padding:8px;text-align:left;font-size:13px}}
th{{opacity:.85}}
.badge{{display:inline-block;padding:2px 10px;border-radius:999px;border:1px solid rgba(255,255,255,.18);margin-left:8px;font-size:12px}}
.ok{{color:#62ffb6}}
.bad{{color:#ff6b7a}}
</style></head>
<body>
<div class="card">
  <h1>Noise Report <span class="badge">{esc(report['window_minutes'])} min</span></h1>
  <div class="meta">UTC: {esc(report['timestamp_utc'])} • lines_parsed={esc(report['lines_parsed'])} • lines_matched={esc(report['lines_matched'])}</div>
  <div class="meta">unique_ips={esc(report['unique_ips'])} • unique_/24={esc(report['unique_subnets_24'])}</div>
  <div class="meta">status: <b class="{ 'bad' if report['status']=='ALERT' else 'ok' }">{esc(report['status'])}</b></div>
  <div class="meta">note: {esc(report.get('note',''))}</div>
</div>

<div class="card">
  <h1>TCP noise (kernel-level)</h1>
  <pre style="white-space:pre-wrap;opacity:.85">{tcp_html}</pre>
</div>

<div class="card">
  <h1>Top IPs (HTTP logs)</h1>
  <table>
    <thead><tr><th>IP</th><th>Req</th><th>404</th><th>4xx</th><th>UA sus</th><th>Score</th></tr></thead>
    <tbody>{rows_ip or "<tr><td colspan='6'>No data (possible TCP-only noise)</td></tr>"}</tbody>
  </table>
</div>

<div class="card">
  <h1>Top /24 subnets (HTTP logs)</h1>
  <table>
    <thead><tr><th>/24</th><th>Unique IPs</th><th>Total req</th></tr></thead>
    <tbody>{rows_sn or "<tr><td colspan='3'>No data</td></tr>"}</tbody>
  </table>
</div>

<div class="card">
  <h1>Blocking</h1>
  <div class="meta">mode={esc(blocks.get('mode','-'))} • apply={esc(blocks.get('apply',False))} • ttl={esc(blocks.get('ttl_seconds','-'))}</div>
  <div class="meta">blocked_count={esc(blocks.get('blocked_count',0))} • errors={esc(len(blocks.get('errors',[])))}</div>
  <pre style="white-space:pre-wrap;opacity:.85">{esc(json.dumps(blocks, indent=2, ensure_ascii=False))}</pre>
</div>
</body></html>
"""
    with open(path, "w", encoding="utf-8") as f:
        f.write(html_doc)


# -----------------------------
# State diff
# -----------------------------
def load_state(path: str) -> dict:
    try:
        with open(path, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        return {}


def save_state(path: str, data: dict) -> None:
    ensure_dirs(path)
    tmp = path + ".tmp"
    with open(tmp, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    os.replace(tmp, path)


# -----------------------------
# Command
# -----------------------------
class Command(BaseCommand):
    help = "Detect network noise/scanners from Nginx logs (custom or combined), report + optional TTL blocking (nftables/ipset). Adds TCP-noise view + verbose/watch."

    def add_arguments(self, parser):
        parser.add_argument("--window", type=int, default=DEFAULTS["WINDOW_MINUTES"], help="Window in minutes (best-effort if no timestamps)")
        parser.add_argument("--tail-bytes", type=int, default=DEFAULTS["TAIL_BYTES"], help="Read last N bytes of access log")
        parser.add_argument("--apply", action="store_true", help="Actually apply blocking (requires root)")
        parser.add_argument("--init-firewall", action="store_true", help="Ensure nft/ipset set + drop rule exists (requires root)")
        parser.add_argument("--mode", choices=["nft", "ipset"], default=DEFAULTS["MODE"], help="Blocking backend")
        parser.add_argument("--ttl", type=int, default=DEFAULTS["BLOCK_TTL_SECONDS"], help="Block TTL seconds")
        parser.add_argument("--score-threshold", type=int, default=DEFAULTS["BLOCK_SCORE_THRESHOLD"], help="Block if score >= threshold")
        parser.add_argument("--max-block", type=int, default=DEFAULTS["BLOCK_MAX_PER_RUN"], help="Max IPs to block per run")

        # NEW UX
        parser.add_argument("--verbose", action="store_true", help="Print top findings to console")
        parser.add_argument("--watch", type=int, default=0, help="Run in loop every N seconds (debug, not for cron)")
        parser.add_argument("--tcp", action="store_true", help="Also analyze TCP-level noise (nft drop counters + ss states)")
        parser.add_argument("--no-http", action="store_true", help="Skip HTTP log analysis (useful if you're only looking at TCP noise)")

    def handle(self, *args, **opts):
        watch = int(opts["watch"])
        if watch and watch > 0:
            while True:
                self._run_once(opts)
                time.sleep(watch)
        else:
            self._run_once(opts)

    def _run_once(self, opts):
        log_path = DEFAULTS["LOG_PATH"]
        state_path = DEFAULTS["STATE_PATH"]
        out_dir = DEFAULTS["OUT_DIR"]

        window_minutes = int(opts["window"])
        tail_bytes = int(opts["tail_bytes"])
        mode = opts["mode"]
        apply = bool(opts["apply"])
        init_firewall = bool(opts["init_firewall"])
        ttl = int(opts["ttl"])
        score_threshold = int(opts["score_threshold"])
        max_block = int(opts["max_block"])
        verbose = bool(opts["verbose"])
        tcp_mode = bool(opts["tcp"])
        no_http = bool(opts["no_http"])

        now_utc = datetime.now(timezone.utc)
        since = now_utc - timedelta(minutes=window_minutes)

        prev = load_state(state_path)

        # -----------------------------
        # HTTP analysis (access.log)
        # -----------------------------
        lines = [] if no_http else tail_file_bytes(log_path, tail_bytes)

        ip_stats: dict[str, IpStats] = {}
        subnet_unique_ips = defaultdict(set)
        subnet_req = Counter()

        lines_parsed = 0
        lines_matched = 0
        used_combined_ts = 0

        sus_keywords = DEFAULTS["SUSPECT_UA_KEYWORDS"]

        if not no_http:
            for line in lines:
                lines_parsed += 1
                line = line.strip()
                if not line:
                    continue

                # Try custom first
                m = LOG_RE_CUSTOM.match(line)
                ua = ""   # custom format doesn't contain UA
                ts = None

                if not m:
                    # Try combined
                    m2 = LOG_RE_COMBINED.match(line)
                    if not m2:
                        continue
                    m = m2
                    ua = m.groupdict().get("ua", "") or ""
                    ts = parse_nginx_ts(m.groupdict().get("ts", "") or "")
                    used_combined_ts += 1

                lines_matched += 1

                # If we have timestamps (combined), apply window filter
                if ts is not None and ts < since:
                    continue

                ip = m.group("ip")
                status = int(m.group("status"))
                method = m.group("method")

                st = ip_stats.get(ip)
                if not st:
                    st = IpStats(ip=ip)
                    ip_stats[ip] = st

                st.req += 1
                st.methods[method] += 1

                if status == 404:
                    st.s404 += 1
                if 400 <= status <= 499:
                    st.s4xx += 1
                if 500 <= status <= 599:
                    st.s5xx += 1

                if ua and ua_is_suspect(ua, sus_keywords):
                    st.ua_sus += 1

                sn = str(ip_network(ip + "/24", strict=False))
                subnet_unique_ips[sn].add(ip)
                subnet_req[sn] += 1

        scored = [(score_ip(st), st) for st in ip_stats.values()]
        scored.sort(key=lambda x: x[0], reverse=True)

        top_ips = []
        for sc, st in scored[: DEFAULTS["TOP_N"]]:
            top_ips.append({
                "ip": st.ip,
                "req": st.req,
                "s404": st.s404,
                "s4xx": st.s4xx,
                "s5xx": st.s5xx,
                "ua_sus": st.ua_sus,
                "score": sc,
                "methods": dict(st.methods),
            })

        subnets_rank = []
        for sn, ips in subnet_unique_ips.items():
            subnets_rank.append({"subnet": sn, "unique_ips": len(ips), "total_req": subnet_req[sn]})
        subnets_rank.sort(key=lambda x: (x["unique_ips"], x["total_req"]), reverse=True)
        top_subnets = subnets_rank[: DEFAULTS["TOP_N"]]

        noisy_subnets = [s for s in subnets_rank if s["unique_ips"] >= DEFAULTS["MAX_IPS_PER_24"]]
        noisy_ips = [t for t in top_ips if t["req"] >= DEFAULTS["MAX_REQ_PER_IP"] or t["score"] >= score_threshold]
        status = "ALERT" if (noisy_subnets or noisy_ips) else "OK"

        diff = {}
        if not no_http:
            prev_top = set(prev.get("top_ips", []))
            curr_top = [x["ip"] for x in top_ips[:20]]
            curr_top_set = set(curr_top)
            diff = {
                "new_top_ips": sorted(list(curr_top_set - prev_top)),
                "gone_top_ips": sorted(list(prev_top - curr_top_set)),
            }
        else:
            curr_top = prev.get("top_ips", [])

        # -----------------------------
        # Blocking candidates (HTTP-based)
        # -----------------------------
        candidates = []
        for item in top_ips:
            if item["score"] >= score_threshold:
                candidates.append(item)
        if noisy_subnets:
            noisy_set = {s["subnet"] for s in noisy_subnets}
            for item in top_ips:
                sn = str(ip_network(item["ip"] + "/24", strict=False))
                if sn in noisy_set and item["score"] >= max(40, score_threshold // 2):
                    candidates.append(item)

        seen = set()
        block_ips = []
        for item in sorted(candidates, key=lambda x: x["score"], reverse=True):
            ip = item["ip"]
            if ip in seen:
                continue
            seen.add(ip)
            block_ips.append(ip)
            if len(block_ips) >= max_block:
                break

        blocking = {
            "mode": mode,
            "apply": apply,
            "ttl_seconds": ttl,
            "score_threshold": score_threshold,
            "max_block_per_run": max_block,
            "would_block": block_ips,
            "blocked_count": 0,
            "actions": [],
            "errors": [],
        }

        # -----------------------------
        # TCP noise (kernel-level)
        # -----------------------------
        tcp_noise = {}
        if tcp_mode:
            # Autodetect the real INPUT chain/table (your prod likely: "ip filter" / "INPUT")
            detected_table, detected_chain, det_actions = nft_autodetect_table_chain(DEFAULTS["NFT_TABLE"], DEFAULTS["NFT_CHAIN"])
            tcp_noise["autodetect"] = det_actions
            tcp_noise["nft_table"] = detected_table
            tcp_noise["nft_chain"] = detected_chain

            nft_stats = nft_read_drop_counters(detected_table, detected_chain, DEFAULTS["BLOCK_PORTS"])
            tcp_noise["nft_drop"] = nft_stats

            # Delta vs previous run
            prev_tcp = prev.get("tcp_noise", {}).get("nft_drop", {})
            prev_packets = int(prev_tcp.get("packets", 0)) if isinstance(prev_tcp, dict) else 0
            prev_bytes = int(prev_tcp.get("bytes", 0)) if isinstance(prev_tcp, dict) else 0
            if nft_stats.get("ok"):
                tcp_noise["nft_drop"]["delta_packets"] = max(0, int(nft_stats.get("packets", 0)) - prev_packets)
                tcp_noise["nft_drop"]["delta_bytes"] = max(0, int(nft_stats.get("bytes", 0)) - prev_bytes)

                if tcp_noise["nft_drop"]["delta_packets"] >= DEFAULTS["TCP_ALERT_DELTA_DROPS"]:
                    status = "ALERT"

            # ss view (SYN_RECV/SYN_SENT/ESTAB) on 443
            tcp_noise["ss"] = ss_syn_counts(DEFAULTS["TCP_PORT"])
            if tcp_noise["ss"].get("ok") and tcp_noise["ss"].get("syn_recv", 0) >= DEFAULTS["TCP_ALERT_SYN_RECV"]:
                status = "ALERT"

        # -----------------------------
        # Firewall actions (only if root)
        # -----------------------------
        if (init_firewall or apply) and not is_root():
            blocking["actions"].append("skip firewall: not running as root (use sudo for --init-firewall/--apply)")
        else:
            if init_firewall:
                if mode == "nft":
                    res = nft_init(DEFAULTS["NFT_TABLE"], DEFAULTS["NFT_SET"], DEFAULTS["NFT_CHAIN"], DEFAULTS["BLOCK_PORTS"], DEFAULTS["NFT_RULE_COMMENT"])
                    blocking["actions"] += res.get("actions", [])
                    blocking["errors"] += res.get("errors", [])
                else:
                    res = ipset_init(DEFAULTS["IPSET_NAME"], DEFAULTS["BLOCK_PORTS"])
                    blocking["actions"] += res.get("actions", [])
                    blocking["errors"] += res.get("errors", [])

            if apply and block_ips:
                if mode == "nft":
                    res = nft_add_ips(DEFAULTS["NFT_TABLE"], DEFAULTS["NFT_SET"], block_ips, ttl)
                    blocking["blocked_count"] = len(res.get("added", []))
                    blocking["errors"] += res.get("errors", [])
                else:
                    res = ipset_add_ips(DEFAULTS["IPSET_NAME"], block_ips, ttl)
                    blocking["blocked_count"] = len(res.get("added", []))
                    blocking["errors"] += res.get("errors", [])

        note = (
            "Custom access.log supported. Important: if noise is TCP-only (SYN flood / handshake), it may NOT appear in access.log. "
            "Use --tcp + --verbose/--watch to see kernel-level drops/states. "
            f"combined_ts_lines={used_combined_ts}"
        )

        report = {
            "timestamp_utc": now_utc.isoformat(),
            "window_minutes": window_minutes,
            "log_path": log_path,
            "lines_parsed": lines_parsed,
            "lines_matched": lines_matched,
            "unique_ips": len(ip_stats),
            "unique_subnets_24": len(subnet_unique_ips),
            "status": status,
            "thresholds": {
                "max_req_per_ip": DEFAULTS["MAX_REQ_PER_IP"],
                "max_ips_per_24": DEFAULTS["MAX_IPS_PER_24"],
                "score_threshold": score_threshold,
                "tcp_alert_delta_drops": DEFAULTS["TCP_ALERT_DELTA_DROPS"],
                "tcp_alert_syn_recv": DEFAULTS["TCP_ALERT_SYN_RECV"],
            },
            "diff": diff,
            "top_ips": top_ips,
            "top_subnets": top_subnets,
            "noisy_subnets": noisy_subnets[:50],
            "blocking": blocking,
            "tcp_noise": tcp_noise,
            "note": note,
        }

        ts = now_utc.strftime("%Y%m%d_%H%M%S")
        json_path = os.path.join(out_dir, f"noise_report_{ts}.json")
        html_path = os.path.join(out_dir, f"noise_report_{ts}.html")
        write_json(json_path, report)
        write_html(html_path, report)

        save_state(state_path, {"timestamp_utc": now_utc.isoformat(), "top_ips": curr_top, "tcp_noise": tcp_noise})

        # -----------------------------
        # Console output (cron-friendly + verbose)
        # -----------------------------
        self.stdout.write(
            f"[cron_noise_analyzer] status={status} window={window_minutes}m "
            f"ips={len(ip_stats)} /24={len(subnet_unique_ips)} matched={lines_matched}/{lines_parsed}"
        )
        self.stdout.write(f"[report] json={json_path} html={html_path}")

        if verbose:
            # Print TCP summary first (this is what you want when tcptrack explodes)
            if tcp_mode:
                nftd = tcp_noise.get("nft_drop", {})
                ssd = tcp_noise.get("ss", {})
                if isinstance(nftd, dict) and nftd.get("ok"):
                    self.stdout.write(
                        f"[tcp:nft] table={nftd.get('table')} chain={nftd.get('chain')} "
                        f"ports={nftd.get('ports')} rules={nftd.get('rules')} "
                        f"packets={nftd.get('packets')} delta={nftd.get('delta_packets', 0)}"
                    )
                else:
                    self.stdout.write(f"[tcp:nft] not available: {nftd.get('error') if isinstance(nftd, dict) else nftd}")

                if isinstance(ssd, dict) and ssd.get("ok"):
                    self.stdout.write(
                        f"[tcp:ss] port={ssd.get('port')} syn_recv={ssd.get('syn_recv')} syn_sent={ssd.get('syn_sent')} estab={ssd.get('estab')}"
                    )
                else:
                    self.stdout.write(f"[tcp:ss] not available: {ssd.get('error') if isinstance(ssd, dict) else ssd}")

            # Then HTTP top IPs
            if top_ips:
                self.stdout.write("[http:top_ips]")
                for i, row in enumerate(top_ips[:10], start=1):
                    self.stdout.write(
                        f"  #{i:02d} {row['ip']} req={row['req']} 4xx={row['s4xx']} 404={row['s404']} score={row['score']}"
                    )
            else:
                self.stdout.write("[http] no top_ips (possible TCP-only noise — use --tcp)")

        if blocking["would_block"]:
            self.stdout.write(f"[blocking] candidates={len(blocking['would_block'])} apply={apply} mode={mode} ttl={ttl}s")
            if not apply:
                self.stdout.write("[blocking] DRY-RUN: add --apply to enforce (sudo required)")
        if blocking["errors"]:
            self.stdout.write("[blocking] errors:")
            for e in blocking["errors"][:20]:
                self.stdout.write(f" - {e}")
