ai-lawyer-agent/testing/reports/charts.py
2026-03-23 02:55:42 +01:00

245 lines
7.9 KiB
Python

import json
from pathlib import Path
from collections import Counter, defaultdict
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
matplotlib.use("Agg")
RESULTS_FILE = Path(__file__).parent.parent / "results.json"
CHARTS_DIR = Path(__file__).parent.parent / "charts"
STATUS_COLORS = {
"passed": "#3ddc84",
"failed": "#ff5c5c",
"skipped": "#e0c84a",
"error": "#ff8a8a",
}
BG_COLOR = "#1a1a1a"
PANEL_COLOR = "#242424"
TEXT_COLOR = "#e0e0e0"
GRID_COLOR = "#333333"
FONT_MONO = "monospace"
def _style_ax(ax, title: str):
ax.set_facecolor(PANEL_COLOR)
ax.set_title(title, color=TEXT_COLOR, fontsize=11, fontweight="bold", pad=10)
ax.tick_params(colors=TEXT_COLOR, labelsize=8)
ax.spines[:].set_color(GRID_COLOR)
ax.yaxis.grid(True, color=GRID_COLOR, linewidth=0.5, linestyle="--")
ax.set_axisbelow(True)
for label in ax.get_xticklabels() + ax.get_yticklabels():
label.set_color(TEXT_COLOR)
label.set_fontfamily(FONT_MONO)
def load_data() -> tuple[pd.DataFrame, dict]:
if not RESULTS_FILE.exists():
raise FileNotFoundError(f"results.json not found at {RESULTS_FILE}")
raw = json.loads(RESULTS_FILE.read_text(encoding="utf-8"))
tests = raw.get("tests", [])
rows = []
for t in tests:
node = t["nodeid"]
parts = node.split("::")
module = parts[0].replace("tests/", "").replace("/", ".").replace(".py", "")
cls = parts[1] if len(parts) >= 3 else "unknown"
name = parts[-1]
duration = t.get("call", {}).get("duration", 0.0) if t.get("call") else 0.0
rows.append({
"module": module,
"class": cls,
"name": name,
"outcome": t["outcome"],
"duration": duration,
})
df = pd.DataFrame(rows)
summary = raw.get("summary", {})
return df, summary
def chart_overall_status(df: pd.DataFrame, ax: plt.Axes):
counts = df["outcome"].value_counts()
colors = [STATUS_COLORS.get(k, "#888") for k in counts.index]
wedges, texts, pcts = ax.pie(
counts.values,
labels=counts.index,
colors=colors,
autopct="%1.1f%%",
startangle=90,
pctdistance=0.78,
wedgeprops={"edgecolor": BG_COLOR, "linewidth": 2},
)
for t in texts:
t.set_color(TEXT_COLOR)
t.set_fontsize(9)
t.set_fontfamily(FONT_MONO)
for p in pcts:
p.set_color("#111")
p.set_fontsize(8)
p.set_fontweight("bold")
ax.set_facecolor(PANEL_COLOR)
ax.set_title("Overall Results", color=TEXT_COLOR, fontsize=11, fontweight="bold", pad=10)
def chart_by_module(df: pd.DataFrame, ax: plt.Axes):
pivot = (
df.groupby(["module", "outcome"])
.size()
.unstack(fill_value=0)
.reindex(columns=["passed", "failed", "skipped", "error"], fill_value=0)
)
x = np.arange(len(pivot))
width = 0.2
offset = -(len(pivot.columns) - 1) / 2 * width
for i, col in enumerate(pivot.columns):
bars = ax.bar(
x + offset + i * width,
pivot[col],
width,
label=col,
color=STATUS_COLORS.get(col, "#888"),
edgecolor=BG_COLOR,
linewidth=0.8,
)
ax.set_xticks(x)
ax.set_xticklabels(pivot.index, rotation=25, ha="right", fontsize=7, fontfamily=FONT_MONO)
ax.set_ylabel("Tests", color=TEXT_COLOR, fontsize=9)
ax.legend(
fontsize=7,
labelcolor=TEXT_COLOR,
facecolor=PANEL_COLOR,
edgecolor=GRID_COLOR,
)
_style_ax(ax, "Results by Module")
def chart_duration_histogram(df: pd.DataFrame, ax: plt.Axes):
durations = df.loc[df["outcome"] != "skipped", "duration"].values * 1000
if len(durations) == 0:
ax.text(0.5, 0.5, "No data", ha="center", va="center", color=TEXT_COLOR)
_style_ax(ax, "Test Duration (ms)")
return
mean_ms = float(np.mean(durations))
median_ms = float(np.median(durations))
p95_ms = float(np.percentile(durations, 95))
ax.hist(durations, bins=20, color="#5cb8ff", edgecolor=BG_COLOR, linewidth=0.6, alpha=0.85)
ax.axvline(mean_ms, color="#3ddc84", linewidth=1.5, linestyle="--", label=f"Mean {mean_ms:.1f} ms")
ax.axvline(median_ms, color="#e0c84a", linewidth=1.5, linestyle=":", label=f"Median {median_ms:.1f} ms")
ax.axvline(p95_ms, color="#ff5c5c", linewidth=1.5, linestyle="-.", label=f"P95 {p95_ms:.1f} ms")
ax.set_xlabel("ms", color=TEXT_COLOR, fontsize=9)
ax.set_ylabel("Tests", color=TEXT_COLOR, fontsize=9)
ax.legend(fontsize=7, labelcolor=TEXT_COLOR, facecolor=PANEL_COLOR, edgecolor=GRID_COLOR)
_style_ax(ax, "Test Duration (ms)")
def chart_slowest_tests(df: pd.DataFrame, ax: plt.Axes):
top = (
df[df["outcome"] != "skipped"]
.nlargest(10, "duration")
.copy()
)
top["label"] = top["class"] + "::" + top["name"]
top["duration"] = top["duration"] * 1000
colors = [STATUS_COLORS.get(o, "#888") for o in top["outcome"]]
bars = ax.barh(top["label"], top["duration"], color=colors, edgecolor=BG_COLOR, linewidth=0.6)
ax.set_xlabel("ms", color=TEXT_COLOR, fontsize=9)
ax.tick_params(axis="y", labelsize=7)
ax.invert_yaxis()
_style_ax(ax, "Top 10 Slowest Tests")
def chart_stats_table(df: pd.DataFrame, summary: dict, ax: plt.Axes):
ax.set_facecolor(PANEL_COLOR)
ax.axis("off")
total = len(df)
passed = summary.get("passed", 0)
failed = summary.get("failed", 0)
skipped = summary.get("skipped", 0)
duration = df["duration"].sum() * 1000
durations = df.loc[df["outcome"] != "skipped", "duration"].values * 1000
rows = [
["Total tests", str(total)],
["Passed", str(passed)],
["Failed", str(failed)],
["Skipped", str(skipped)],
["Pass rate", f"{passed / total * 100:.1f}%" if total else ""],
["Total time", f"{duration:.0f} ms"],
["Mean duration", f"{np.mean(durations):.1f} ms" if len(durations) else ""],
["Median", f"{np.median(durations):.1f} ms" if len(durations) else ""],
["P95", f"{np.percentile(durations, 95):.1f} ms" if len(durations) else ""],
]
table = ax.table(
cellText=rows,
colLabels=["Metric", "Value"],
cellLoc="left",
loc="center",
colWidths=[0.6, 0.4],
)
table.auto_set_font_size(False)
table.set_fontsize(9)
for (row, col), cell in table.get_celld().items():
cell.set_facecolor("#2e2e2e" if row % 2 == 0 else PANEL_COLOR)
cell.set_edgecolor(GRID_COLOR)
cell.set_text_props(color=TEXT_COLOR, fontfamily=FONT_MONO)
ax.set_title("Summary", color=TEXT_COLOR, fontsize=11, fontweight="bold", pad=10)
def generate(output_path: Path = None) -> Path:
CHARTS_DIR.mkdir(parents=True, exist_ok=True)
output_path = output_path or CHARTS_DIR / "report.png"
df, summary = load_data()
fig = plt.figure(figsize=(18, 14), facecolor=BG_COLOR)
fig.suptitle(
"Legal AI Assistant — Test Report",
fontsize=16, fontweight="bold", color=TEXT_COLOR,
y=0.98, fontfamily=FONT_MONO,
)
gs = fig.add_gridspec(2, 3, hspace=0.45, wspace=0.35,
top=0.93, bottom=0.05, left=0.06, right=0.97)
chart_overall_status(df, fig.add_subplot(gs[0, 0]))
chart_by_module(df, fig.add_subplot(gs[0, 1:]))
chart_duration_histogram(df, fig.add_subplot(gs[1, 0]))
chart_slowest_tests(df, fig.add_subplot(gs[1, 1]))
chart_stats_table(df, summary, fig.add_subplot(gs[1, 2]))
fig.savefig(output_path, dpi=150, bbox_inches="tight", facecolor=BG_COLOR)
plt.close(fig)
return output_path
if __name__ == "__main__":
out = generate()
print(f"Chart saved: {out}")