mirror of https://github.com/ghostfolio/ghostfolio
3 changed files with 139 additions and 0 deletions
@ -0,0 +1,22 @@ |
|||||
|
[ |
||||
|
{ |
||||
|
"timestamp": "2026-02-28T14:47:17.398436Z", |
||||
|
"passed": 0, |
||||
|
"failed": 0, |
||||
|
"errors": 0, |
||||
|
"total": 0, |
||||
|
"pass_rate_pct": 0, |
||||
|
"status": "FAIL", |
||||
|
"regression": false |
||||
|
}, |
||||
|
{ |
||||
|
"timestamp": "2026-02-28T14:48:25.385529Z", |
||||
|
"passed": 182, |
||||
|
"failed": 0, |
||||
|
"errors": 0, |
||||
|
"total": 182, |
||||
|
"pass_rate_pct": 100.0, |
||||
|
"status": "PASS", |
||||
|
"regression": false |
||||
|
} |
||||
|
] |
||||
@ -0,0 +1,10 @@ |
|||||
|
{ |
||||
|
"timestamp": "2026-02-28T14:48:25.385529Z", |
||||
|
"passed": 182, |
||||
|
"failed": 0, |
||||
|
"errors": 0, |
||||
|
"total": 182, |
||||
|
"pass_rate_pct": 100.0, |
||||
|
"status": "PASS", |
||||
|
"regression": false |
||||
|
} |
||||
@ -0,0 +1,107 @@ |
|||||
|
""" |
||||
|
Runs the eval suite and saves results to a JSON history file for regression tracking. |
||||
|
""" |
||||
|
import json |
||||
|
import re |
||||
|
import subprocess |
||||
|
import sys |
||||
|
from datetime import datetime |
||||
|
from pathlib import Path |
||||
|
|
||||
|
|
||||
|
def run_and_save_evals(): |
||||
|
"""Runs the eval suite and saves results to a JSON history file for regression tracking.""" |
||||
|
results_dir = Path(__file__).parent / "results" |
||||
|
results_dir.mkdir(exist_ok=True) |
||||
|
|
||||
|
# Run pytest from project root (parent of agent) |
||||
|
project_root = Path(__file__).parent.parent.parent |
||||
|
result = subprocess.run( |
||||
|
[ |
||||
|
sys.executable, |
||||
|
"-m", |
||||
|
"pytest", |
||||
|
"agent/evals", |
||||
|
"-v", |
||||
|
"--tb=short", |
||||
|
"-q", |
||||
|
], |
||||
|
capture_output=True, |
||||
|
text=True, |
||||
|
cwd=project_root, |
||||
|
) |
||||
|
|
||||
|
# Parse results from pytest output |
||||
|
output = result.stdout + result.stderr |
||||
|
lines = output.split("\n") |
||||
|
|
||||
|
passed = sum(1 for l in lines if " PASSED" in l) |
||||
|
failed = sum(1 for l in lines if " FAILED" in l) |
||||
|
errors = sum(1 for l in lines if " ERROR" in l) |
||||
|
|
||||
|
# Fallback: parse summary line like "182 passed, 1 warning in 30.32s" |
||||
|
if passed == 0 and failed == 0 and "passed" in output.lower(): |
||||
|
m = re.search(r"(\d+)\s+passed", output) |
||||
|
if m: |
||||
|
passed = int(m.group(1)) |
||||
|
m = re.search(r"(\d+)\s+failed", output) |
||||
|
if m: |
||||
|
failed = int(m.group(1)) |
||||
|
m = re.search(r"(\d+)\s+error", output, re.I) |
||||
|
if m: |
||||
|
errors = int(m.group(1)) |
||||
|
|
||||
|
total = passed + failed + errors |
||||
|
pass_rate = round(passed / total * 100, 1) if total > 0 else 0 |
||||
|
|
||||
|
run_record = { |
||||
|
"timestamp": datetime.utcnow().isoformat() + "Z", |
||||
|
"passed": passed, |
||||
|
"failed": failed, |
||||
|
"errors": errors, |
||||
|
"total": total, |
||||
|
"pass_rate_pct": pass_rate, |
||||
|
"status": "PASS" if pass_rate >= 80 else "FAIL", |
||||
|
"regression": False, |
||||
|
} |
||||
|
|
||||
|
# Load history |
||||
|
history_file = results_dir / "eval_history.json" |
||||
|
history = [] |
||||
|
if history_file.exists(): |
||||
|
try: |
||||
|
history = json.loads(history_file.read_text()) |
||||
|
except Exception: |
||||
|
history = [] |
||||
|
|
||||
|
# Check for regression |
||||
|
if history: |
||||
|
last = history[-1] |
||||
|
if pass_rate < last.get("pass_rate_pct", 100): |
||||
|
run_record["regression"] = True |
||||
|
run_record["regression_detail"] = ( |
||||
|
f"Pass rate dropped from " |
||||
|
f"{last['pass_rate_pct']}% to {pass_rate}%" |
||||
|
) |
||||
|
|
||||
|
history.append(run_record) |
||||
|
history_file.write_text(json.dumps(history, indent=2)) |
||||
|
|
||||
|
# Also save latest run separately |
||||
|
latest_file = results_dir / "latest_run.json" |
||||
|
latest_file.write_text(json.dumps(run_record, indent=2)) |
||||
|
|
||||
|
print(f"\n{'='*50}") |
||||
|
print(f"EVAL RUN: {run_record['timestamp']}") |
||||
|
print(f"Passed: {passed}/{total} ({pass_rate}%)") |
||||
|
print(f"Status: {run_record['status']}") |
||||
|
if run_record.get("regression"): |
||||
|
print(f"⚠️ REGRESSION: {run_record['regression_detail']}") |
||||
|
print(f"History: {len(history)} runs saved") |
||||
|
print(f"{'='*50}\n") |
||||
|
|
||||
|
return run_record |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
run_and_save_evals() |
||||
Loading…
Reference in new issue