fix(retrieval): enforce project-scoped context boundaries
This commit is contained in:
@@ -44,6 +44,7 @@ import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
DEFAULT_BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://dalidou:8100")
|
||||
@@ -52,6 +53,13 @@ DEFAULT_BUDGET = 3000
|
||||
DEFAULT_FIXTURES = Path(__file__).parent / "retrieval_eval_fixtures.json"
|
||||
|
||||
|
||||
def request_json(base_url: str, path: str, timeout: int) -> dict:
|
||||
req = urllib.request.Request(f"{base_url}{path}", method="GET")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
body = resp.read().decode("utf-8")
|
||||
return json.loads(body) if body.strip() else {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Fixture:
|
||||
name: str
|
||||
@@ -60,6 +68,7 @@ class Fixture:
|
||||
budget: int = DEFAULT_BUDGET
|
||||
expect_present: list[str] = field(default_factory=list)
|
||||
expect_absent: list[str] = field(default_factory=list)
|
||||
known_issue: bool = False
|
||||
notes: str = ""
|
||||
|
||||
|
||||
@@ -70,8 +79,13 @@ class FixtureResult:
|
||||
missing_present: list[str]
|
||||
unexpected_absent: list[str]
|
||||
total_chars: int
|
||||
known_issue: bool = False
|
||||
error: str = ""
|
||||
|
||||
@property
|
||||
def blocking_failure(self) -> bool:
|
||||
return not self.ok and not self.known_issue
|
||||
|
||||
|
||||
def load_fixtures(path: Path) -> list[Fixture]:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
@@ -89,6 +103,7 @@ def load_fixtures(path: Path) -> list[Fixture]:
|
||||
budget=int(raw.get("budget", DEFAULT_BUDGET)),
|
||||
expect_present=list(raw.get("expect_present", [])),
|
||||
expect_absent=list(raw.get("expect_absent", [])),
|
||||
known_issue=bool(raw.get("known_issue", False)),
|
||||
notes=raw.get("notes", ""),
|
||||
)
|
||||
)
|
||||
@@ -117,6 +132,7 @@ def run_fixture(fixture: Fixture, base_url: str, timeout: int) -> FixtureResult:
|
||||
missing_present=list(fixture.expect_present),
|
||||
unexpected_absent=[],
|
||||
total_chars=0,
|
||||
known_issue=fixture.known_issue,
|
||||
error=f"http_error: {exc}",
|
||||
)
|
||||
|
||||
@@ -129,16 +145,26 @@ def run_fixture(fixture: Fixture, base_url: str, timeout: int) -> FixtureResult:
|
||||
missing_present=missing,
|
||||
unexpected_absent=unexpected,
|
||||
total_chars=len(formatted),
|
||||
known_issue=fixture.known_issue,
|
||||
)
|
||||
|
||||
|
||||
def print_human_report(results: list[FixtureResult]) -> None:
|
||||
def print_human_report(results: list[FixtureResult], metadata: dict) -> None:
|
||||
total = len(results)
|
||||
passed = sum(1 for r in results if r.ok)
|
||||
known = sum(1 for r in results if not r.ok and r.known_issue)
|
||||
blocking = sum(1 for r in results if r.blocking_failure)
|
||||
print(f"Retrieval eval: {passed}/{total} fixtures passed")
|
||||
print(
|
||||
"Target: "
|
||||
f"{metadata.get('base_url', 'unknown')} "
|
||||
f"build={metadata.get('health', {}).get('build_sha', 'unknown')}"
|
||||
)
|
||||
if known or blocking:
|
||||
print(f"Blocking failures: {blocking} Known issues: {known}")
|
||||
print()
|
||||
for r in results:
|
||||
marker = "PASS" if r.ok else "FAIL"
|
||||
marker = "PASS" if r.ok else ("KNOWN" if r.known_issue else "FAIL")
|
||||
print(f"[{marker}] {r.fixture.name} project={r.fixture.project} chars={r.total_chars}")
|
||||
if r.error:
|
||||
print(f" error: {r.error}")
|
||||
@@ -150,15 +176,21 @@ def print_human_report(results: list[FixtureResult]) -> None:
|
||||
print(f" notes: {r.fixture.notes}")
|
||||
|
||||
|
||||
def print_json_report(results: list[FixtureResult]) -> None:
|
||||
def print_json_report(results: list[FixtureResult], metadata: dict) -> None:
|
||||
payload = {
|
||||
"generated_at": metadata.get("generated_at"),
|
||||
"base_url": metadata.get("base_url"),
|
||||
"health": metadata.get("health", {}),
|
||||
"total": len(results),
|
||||
"passed": sum(1 for r in results if r.ok),
|
||||
"known_issues": sum(1 for r in results if not r.ok and r.known_issue),
|
||||
"blocking_failures": sum(1 for r in results if r.blocking_failure),
|
||||
"fixtures": [
|
||||
{
|
||||
"name": r.fixture.name,
|
||||
"project": r.fixture.project,
|
||||
"ok": r.ok,
|
||||
"known_issue": r.known_issue,
|
||||
"total_chars": r.total_chars,
|
||||
"missing_present": r.missing_present,
|
||||
"unexpected_absent": r.unexpected_absent,
|
||||
@@ -179,15 +211,26 @@ def main() -> int:
|
||||
parser.add_argument("--json", action="store_true", help="emit machine-readable JSON")
|
||||
args = parser.parse_args()
|
||||
|
||||
base_url = args.base_url.rstrip("/")
|
||||
try:
|
||||
health = request_json(base_url, "/health", args.timeout)
|
||||
except (urllib.error.URLError, TimeoutError, OSError, json.JSONDecodeError) as exc:
|
||||
health = {"error": str(exc)}
|
||||
metadata = {
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"base_url": base_url,
|
||||
"health": health,
|
||||
}
|
||||
|
||||
fixtures = load_fixtures(args.fixtures)
|
||||
results = [run_fixture(f, args.base_url, args.timeout) for f in fixtures]
|
||||
results = [run_fixture(f, base_url, args.timeout) for f in fixtures]
|
||||
|
||||
if args.json:
|
||||
print_json_report(results)
|
||||
print_json_report(results, metadata)
|
||||
else:
|
||||
print_human_report(results)
|
||||
print_human_report(results, metadata)
|
||||
|
||||
return 0 if all(r.ok for r in results) else 1
|
||||
return 0 if not any(r.blocking_failure for r in results) else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user