ai-lawyer-agent/testing/reports/charts.py

import json
from pathlib import Path
from collections import Counter, defaultdict

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

matplotlib.use("Agg")

RESULTS_FILE = Path(__file__).parent.parent / "results.json"
CHARTS_DIR   = Path(__file__).parent.parent / "charts"

STATUS_COLORS = {
    "passed":  "#3ddc84",
    "failed":  "#ff5c5c",
    "skipped": "#e0c84a",
    "error":   "#ff8a8a",
}

BG_COLOR      = "#1a1a1a"
PANEL_COLOR   = "#242424"
TEXT_COLOR    = "#e0e0e0"
GRID_COLOR    = "#333333"
FONT_MONO     = "monospace"


def _style_ax(ax, title: str):
    ax.set_facecolor(PANEL_COLOR)
    ax.set_title(title, color=TEXT_COLOR, fontsize=11, fontweight="bold", pad=10)
    ax.tick_params(colors=TEXT_COLOR, labelsize=8)
    ax.spines[:].set_color(GRID_COLOR)
    ax.yaxis.grid(True, color=GRID_COLOR, linewidth=0.5, linestyle="--")
    ax.set_axisbelow(True)
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_color(TEXT_COLOR)
        label.set_fontfamily(FONT_MONO)


def load_data() -> tuple[pd.DataFrame, dict]:
    if not RESULTS_FILE.exists():
        raise FileNotFoundError(f"results.json not found at {RESULTS_FILE}")

    raw = json.loads(RESULTS_FILE.read_text(encoding="utf-8"))
    tests = raw.get("tests", [])

    rows = []
    for t in tests:
        node   = t["nodeid"]
        parts  = node.split("::")
        module = parts[0].replace("tests/", "").replace("/", ".").replace(".py", "")
        cls    = parts[1] if len(parts) >= 3 else "unknown"
        name   = parts[-1]

        duration = t.get("call", {}).get("duration", 0.0) if t.get("call") else 0.0

        rows.append({
            "module":   module,
            "class":    cls,
            "name":     name,
            "outcome":  t["outcome"],
            "duration": duration,
        })

    df = pd.DataFrame(rows)
    summary = raw.get("summary", {})
    return df, summary


def chart_overall_status(df: pd.DataFrame, ax: plt.Axes):
    counts = df["outcome"].value_counts()
    colors = [STATUS_COLORS.get(k, "#888") for k in counts.index]

    wedges, texts, pcts = ax.pie(
        counts.values,
        labels=counts.index,
        colors=colors,
        autopct="%1.1f%%",
        startangle=90,
        pctdistance=0.78,
        wedgeprops={"edgecolor": BG_COLOR, "linewidth": 2},
    )
    for t in texts:
        t.set_color(TEXT_COLOR)
        t.set_fontsize(9)
        t.set_fontfamily(FONT_MONO)
    for p in pcts:
        p.set_color("#111")
        p.set_fontsize(8)
        p.set_fontweight("bold")

    ax.set_facecolor(PANEL_COLOR)
    ax.set_title("Overall Results", color=TEXT_COLOR, fontsize=11, fontweight="bold", pad=10)


def chart_by_module(df: pd.DataFrame, ax: plt.Axes):
    pivot = (
        df.groupby(["module", "outcome"])
        .size()
        .unstack(fill_value=0)
        .reindex(columns=["passed", "failed", "skipped", "error"], fill_value=0)
    )

    x      = np.arange(len(pivot))
    width  = 0.2
    offset = -(len(pivot.columns) - 1) / 2 * width

    for i, col in enumerate(pivot.columns):
        bars = ax.bar(
            x + offset + i * width,
            pivot[col],
            width,
            label=col,
            color=STATUS_COLORS.get(col, "#888"),
            edgecolor=BG_COLOR,
            linewidth=0.8,
        )

    ax.set_xticks(x)
    ax.set_xticklabels(pivot.index, rotation=25, ha="right", fontsize=7, fontfamily=FONT_MONO)
    ax.set_ylabel("Tests", color=TEXT_COLOR, fontsize=9)
    ax.legend(
        fontsize=7,
        labelcolor=TEXT_COLOR,
        facecolor=PANEL_COLOR,
        edgecolor=GRID_COLOR,
    )
    _style_ax(ax, "Results by Module")


def chart_duration_histogram(df: pd.DataFrame, ax: plt.Axes):
    durations = df.loc[df["outcome"] != "skipped", "duration"].values * 1000

    if len(durations) == 0:
        ax.text(0.5, 0.5, "No data", ha="center", va="center", color=TEXT_COLOR)
        _style_ax(ax, "Test Duration (ms)")
        return

    mean_ms   = float(np.mean(durations))
    median_ms = float(np.median(durations))
    p95_ms    = float(np.percentile(durations, 95))

    ax.hist(durations, bins=20, color="#5cb8ff", edgecolor=BG_COLOR, linewidth=0.6, alpha=0.85)
    ax.axvline(mean_ms,   color="#3ddc84", linewidth=1.5, linestyle="--", label=f"Mean   {mean_ms:.1f} ms")
    ax.axvline(median_ms, color="#e0c84a", linewidth=1.5, linestyle=":",  label=f"Median {median_ms:.1f} ms")
    ax.axvline(p95_ms,    color="#ff5c5c", linewidth=1.5, linestyle="-.", label=f"P95    {p95_ms:.1f} ms")

    ax.set_xlabel("ms", color=TEXT_COLOR, fontsize=9)
    ax.set_ylabel("Tests", color=TEXT_COLOR, fontsize=9)
    ax.legend(fontsize=7, labelcolor=TEXT_COLOR, facecolor=PANEL_COLOR, edgecolor=GRID_COLOR)
    _style_ax(ax, "Test Duration (ms)")


def chart_slowest_tests(df: pd.DataFrame, ax: plt.Axes):
    top = (
        df[df["outcome"] != "skipped"]
        .nlargest(10, "duration")
        .copy()
    )
    top["label"]    = top["class"] + "::" + top["name"]
    top["duration"] = top["duration"] * 1000

    colors = [STATUS_COLORS.get(o, "#888") for o in top["outcome"]]
    bars   = ax.barh(top["label"], top["duration"], color=colors, edgecolor=BG_COLOR, linewidth=0.6)

    ax.set_xlabel("ms", color=TEXT_COLOR, fontsize=9)
    ax.tick_params(axis="y", labelsize=7)
    ax.invert_yaxis()
    _style_ax(ax, "Top 10 Slowest Tests")


def chart_stats_table(df: pd.DataFrame, summary: dict, ax: plt.Axes):
    ax.set_facecolor(PANEL_COLOR)
    ax.axis("off")

    total    = len(df)
    passed   = summary.get("passed",  0)
    failed   = summary.get("failed",  0)
    skipped  = summary.get("skipped", 0)
    duration = df["duration"].sum() * 1000

    durations = df.loc[df["outcome"] != "skipped", "duration"].values * 1000

    rows = [
        ["Total tests",    str(total)],
        ["Passed",         str(passed)],
        ["Failed",         str(failed)],
        ["Skipped",        str(skipped)],
        ["Pass rate",      f"{passed / total * 100:.1f}%" if total else "—"],
        ["Total time",     f"{duration:.0f} ms"],
        ["Mean duration",  f"{np.mean(durations):.1f} ms"  if len(durations) else "—"],
        ["Median",         f"{np.median(durations):.1f} ms" if len(durations) else "—"],
        ["P95",            f"{np.percentile(durations, 95):.1f} ms" if len(durations) else "—"],
    ]

    table = ax.table(
        cellText=rows,
        colLabels=["Metric", "Value"],
        cellLoc="left",
        loc="center",
        colWidths=[0.6, 0.4],
    )
    table.auto_set_font_size(False)
    table.set_fontsize(9)

    for (row, col), cell in table.get_celld().items():
        cell.set_facecolor("#2e2e2e" if row % 2 == 0 else PANEL_COLOR)
        cell.set_edgecolor(GRID_COLOR)
        cell.set_text_props(color=TEXT_COLOR, fontfamily=FONT_MONO)

    ax.set_title("Summary", color=TEXT_COLOR, fontsize=11, fontweight="bold", pad=10)


def generate(output_path: Path = None) -> Path:
    CHARTS_DIR.mkdir(parents=True, exist_ok=True)
    output_path = output_path or CHARTS_DIR / "report.png"

    df, summary = load_data()

    fig = plt.figure(figsize=(18, 14), facecolor=BG_COLOR)
    fig.suptitle(
        "Legal AI Assistant — Test Report",
        fontsize=16, fontweight="bold", color=TEXT_COLOR,
        y=0.98, fontfamily=FONT_MONO,
    )

    gs = fig.add_gridspec(2, 3, hspace=0.45, wspace=0.35,
                          top=0.93, bottom=0.05, left=0.06, right=0.97)

    chart_overall_status(df,         fig.add_subplot(gs[0, 0]))
    chart_by_module(df,              fig.add_subplot(gs[0, 1:]))
    chart_duration_histogram(df,     fig.add_subplot(gs[1, 0]))
    chart_slowest_tests(df,          fig.add_subplot(gs[1, 1]))
    chart_stats_table(df, summary,   fig.add_subplot(gs[1, 2]))

    fig.savefig(output_path, dpi=150, bbox_inches="tight", facecolor=BG_COLOR)
    plt.close(fig)
    return output_path


if __name__ == "__main__":
    out = generate()
    print(f"Chart saved: {out}")