From ec8870ea4078e0fdac79ddcc5493a65ff41dc510 Mon Sep 17 00:00:00 2001 From: Otto Date: Wed, 27 May 2026 00:05:51 +0200 Subject: [PATCH] feat(verify): add pending external verifier --- RUNBOOK.md | 8 + cron_tasks/verify_pending_external.py | 163 ++++++++++++++++++ docs/RELEASE_CHECKLIST.md | 8 + ...penclaw-secondbrain-verify-pending.service | 9 + .../openclaw-secondbrain-verify-pending.timer | 11 ++ 5 files changed, 199 insertions(+) create mode 100755 cron_tasks/verify_pending_external.py create mode 100644 systemd/openclaw-secondbrain-verify-pending.service create mode 100644 systemd/openclaw-secondbrain-verify-pending.timer diff --git a/RUNBOOK.md b/RUNBOOK.md index 33db4a4..d5e15d4 100644 --- a/RUNBOOK.md +++ b/RUNBOOK.md @@ -15,6 +15,14 @@ sudo ln -sf /root/.openclaw/workspace/second-brain/systemd/openclaw-memory-archi sudo systemctl daemon-reload ``` +Optional (verification hardening): + +```bash +sudo ln -sf /root/.openclaw/workspace/second-brain/systemd/openclaw-secondbrain-verify-pending.* /etc/systemd/system/ +sudo systemctl daemon-reload +sudo systemctl enable --now openclaw-secondbrain-verify-pending.timer +``` + Enable timers: ```bash diff --git a/cron_tasks/verify_pending_external.py b/cron_tasks/verify_pending_external.py new file mode 100755 index 0000000..de0afd6 --- /dev/null +++ b/cron_tasks/verify_pending_external.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +""" +Verify pending (unconfirmed) engrams using lightweight external checks. + +Policy (conservative): +- `openclaw-memory` is treated as internal ground-truth and is auto-confirmed + by the review job (see `cron_tasks/review_brain.py` in the workspace runtime). +- For `source=web`, confirm if the grounded URL responds with HTTP 2xx, reject on + 4xx/5xx, and keep pending on timeouts/unknown. +- Reject obvious low-signal placeholders (e.g. session summary stubs). +""" + +import json +import os +import sys +from pathlib import Path +from datetime import datetime, timezone +from typing import Any, Optional + + +WORKSPACE = Path(os.environ.get("SECOND_BRAIN_WORKSPACE", "/root/.openclaw/workspace/second-brain")) +DB_PATH = Path(os.environ.get("BRAIN_DB", str(WORKSPACE / "data" / "brain.sqlite"))).resolve() + +sys.path.insert(0, str(WORKSPACE)) +from src.store import EngramStore +from src.engram import ReviewEntry + +OUTPUT_FILE = os.environ.get("CRON_OUTPUT_FILE", "/tmp/verify_pending_external.json") + + +def _now() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _get_url(meta: dict[str, Any]) -> Optional[str]: + url = meta.get("url") + if isinstance(url, str) and url.startswith(("http://", "https://")): + return url + grounding = meta.get("grounding") + if isinstance(grounding, dict): + g_url = grounding.get("url") + if isinstance(g_url, str) and g_url.startswith(("http://", "https://")): + return g_url + return None + + +def _http_status(url: str, timeout_s: float = 6.0) -> Optional[int]: + try: + import urllib.request + + req = urllib.request.Request( + url, + method="GET", + headers={"User-Agent": "openclaw-secondbrain/verify_pending_external"}, + ) + with urllib.request.urlopen(req, timeout=timeout_s) as resp: + return int(getattr(resp, "status", 200)) + except Exception: + return None + + +def main() -> int: + if not DB_PATH.exists(): + out = {"success": False, "error": f"db missing: {DB_PATH}", "time": _now()} + Path(OUTPUT_FILE).write_text(json.dumps(out, indent=2)) + print(out["error"]) + return 1 + + store = EngramStore(str(DB_PATH)) + all_egs = store.get_all() + pending = [ + eg + for eg in all_egs + if (not eg.correctness.confirmed and eg.correctness.rejections == 0) + ] + + confirmed = 0 + rejected = 0 + still_pending = 0 + checked = 0 + + for eg in pending: + checked += 1 + src = eg.metadata.get("source") + content = (eg.content or "").strip() + + if src == "session" and ( + content.startswith("Session Summary (sess_") or content.startswith("Please remember ") + ): + eg.correctness.rejections += 1 + eg.correctness.last_reviewed = _now() + eg.correctness.review_history.append( + ReviewEntry( + by="verify-pending", + action="reject", + at=_now(), + note="Auto-reject: session placeholder", + ) + ) + store.save(eg) + rejected += 1 + continue + + if src == "web": + url = _get_url(eg.metadata) + if not url: + still_pending += 1 + continue + status = _http_status(url) + if status is None: + still_pending += 1 + continue + if 200 <= status < 300: + eg.correctness.confirmed = True + eg.correctness.confirmations += 1 + eg.correctness.last_reviewed = _now() + eg.correctness.review_history.append( + ReviewEntry( + by="verify-pending", + action="confirm", + at=_now(), + note=f"Auto-confirm: web url ok ({status}) {url}", + ) + ) + store.save(eg) + confirmed += 1 + else: + eg.correctness.rejections += 1 + eg.correctness.last_reviewed = _now() + eg.correctness.review_history.append( + ReviewEntry( + by="verify-pending", + action="reject", + at=_now(), + note=f"Auto-reject: web url status={status} {url}", + ) + ) + store.save(eg) + rejected += 1 + continue + + still_pending += 1 + + out = { + "success": True, + "time": _now(), + "total": len(all_egs), + "pending_before": len(pending), + "checked": checked, + "confirmed": confirmed, + "rejected": rejected, + "still_pending": still_pending, + } + Path(OUTPUT_FILE).write_text(json.dumps(out, indent=2)) + print( + f"VERIFY: pending_before={out['pending_before']} confirmed={confirmed} rejected={rejected} still_pending={still_pending}" + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) + diff --git a/docs/RELEASE_CHECKLIST.md b/docs/RELEASE_CHECKLIST.md index 8f0f765..6340c86 100644 --- a/docs/RELEASE_CHECKLIST.md +++ b/docs/RELEASE_CHECKLIST.md @@ -31,6 +31,14 @@ sudo ln -sf /root/.openclaw/workspace/second-brain/systemd/openclaw-secondbrain- sudo systemctl daemon-reload ``` +Optional (verification hardening): + +```bash +sudo ln -sf /root/.openclaw/workspace/second-brain/systemd/openclaw-secondbrain-verify-pending.* /etc/systemd/system/ +sudo systemctl daemon-reload +sudo systemctl enable --now openclaw-secondbrain-verify-pending.timer +``` + ### Enable timers ```bash diff --git a/systemd/openclaw-secondbrain-verify-pending.service b/systemd/openclaw-secondbrain-verify-pending.service new file mode 100644 index 0000000..a3ac875 --- /dev/null +++ b/systemd/openclaw-secondbrain-verify-pending.service @@ -0,0 +1,9 @@ +[Unit] +Description=OpenClaw Second-Brain verify_pending_external +OnFailure=openclaw-secondbrain-notify@%n.service + +[Service] +Type=oneshot +WorkingDirectory=/root/.openclaw/workspace +ExecStart=/bin/bash -lc 'flock -n /tmp/%n.lock /usr/bin/python3 /root/.openclaw/workspace/openclaw_cron_wrapper.py verify_pending_external' + diff --git a/systemd/openclaw-secondbrain-verify-pending.timer b/systemd/openclaw-secondbrain-verify-pending.timer new file mode 100644 index 0000000..8665b20 --- /dev/null +++ b/systemd/openclaw-secondbrain-verify-pending.timer @@ -0,0 +1,11 @@ +[Unit] +Description=OpenClaw Second-Brain periodic verify_pending_external + +[Timer] +OnBootSec=15min +OnUnitActiveSec=2h +Persistent=true + +[Install] +WantedBy=timers.target +