#!/usr/bin/env python3 """ Verify pending (unconfirmed) engrams using lightweight external checks. Policy (conservative): - `openclaw-memory` is treated as internal ground-truth and is auto-confirmed by the review job (see `cron_tasks/review_brain.py` in the workspace runtime). - For `source=web`, confirm if the grounded URL responds with HTTP 2xx, reject on 4xx/5xx, and keep pending on timeouts/unknown. - Reject obvious low-signal placeholders (e.g. session summary stubs). """ import json import os import sys from pathlib import Path from datetime import datetime, timezone from typing import Any, Optional WORKSPACE = Path(os.environ.get("SECOND_BRAIN_WORKSPACE", "/root/.openclaw/workspace/second-brain")) DB_PATH = Path(os.environ.get("BRAIN_DB", str(WORKSPACE / "data" / "brain.sqlite"))).resolve() sys.path.insert(0, str(WORKSPACE)) from src.store import EngramStore from src.engram import ReviewEntry OUTPUT_FILE = os.environ.get("CRON_OUTPUT_FILE", "/tmp/verify_pending_external.json") def _now() -> str: return datetime.now(timezone.utc).isoformat() def _get_url(meta: dict[str, Any]) -> Optional[str]: url = meta.get("url") if isinstance(url, str) and url.startswith(("http://", "https://")): return url grounding = meta.get("grounding") if isinstance(grounding, dict): g_url = grounding.get("url") if isinstance(g_url, str) and g_url.startswith(("http://", "https://")): return g_url return None def _http_status(url: str, timeout_s: float = 6.0) -> Optional[int]: try: import urllib.request req = urllib.request.Request( url, method="GET", headers={"User-Agent": "openclaw-secondbrain/verify_pending_external"}, ) with urllib.request.urlopen(req, timeout=timeout_s) as resp: return int(getattr(resp, "status", 200)) except Exception: return None def main() -> int: if not DB_PATH.exists(): out = {"success": False, "error": f"db missing: {DB_PATH}", "time": _now()} Path(OUTPUT_FILE).write_text(json.dumps(out, indent=2)) print(out["error"]) return 1 store = EngramStore(str(DB_PATH)) all_egs = [] offset = 0 while True: batch = store.get_all(limit=2000, offset=offset) if not batch: break all_egs.extend(batch) offset += len(batch) pending = [ eg for eg in all_egs if (not eg.correctness.confirmed and eg.correctness.rejections == 0) ] confirmed = 0 rejected = 0 still_pending = 0 checked = 0 for eg in pending: checked += 1 src = eg.metadata.get("source") content = (eg.content or "").strip() if src == "session" and ( content.startswith("Session Summary (sess_") or content.startswith("Please remember ") ): eg.correctness.rejections += 1 eg.correctness.last_reviewed = _now() eg.correctness.review_history.append( ReviewEntry( by="verify-pending", action="reject", at=_now(), note="Auto-reject: session placeholder", ) ) store.save(eg) rejected += 1 continue if src == "web": url = _get_url(eg.metadata) if not url: still_pending += 1 continue status = _http_status(url) if status is None: still_pending += 1 continue if 200 <= status < 300: eg.correctness.confirmed = True eg.correctness.confirmations += 1 eg.correctness.last_reviewed = _now() eg.correctness.review_history.append( ReviewEntry( by="verify-pending", action="confirm", at=_now(), note=f"Auto-confirm: web url ok ({status}) {url}", ) ) store.save(eg) confirmed += 1 else: eg.correctness.rejections += 1 eg.correctness.last_reviewed = _now() eg.correctness.review_history.append( ReviewEntry( by="verify-pending", action="reject", at=_now(), note=f"Auto-reject: web url status={status} {url}", ) ) store.save(eg) rejected += 1 continue still_pending += 1 out = { "success": True, "time": _now(), "total": len(all_egs), "pending_before": len(pending), "checked": checked, "confirmed": confirmed, "rejected": rejected, "still_pending": still_pending, } Path(OUTPUT_FILE).write_text(json.dumps(out, indent=2)) print( f"VERIFY: pending_before={out['pending_before']} confirmed={confirmed} rejected={rejected} still_pending={still_pending}" ) return 0 if __name__ == "__main__": raise SystemExit(main())