Files
second-brain/cron_tasks/verify_pending_external.py
2026-05-27 01:11:59 +02:00

170 lines
5.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Verify pending (unconfirmed) engrams using lightweight external checks.
Policy (conservative):
- `openclaw-memory` is treated as internal ground-truth and is auto-confirmed
by the review job (see `cron_tasks/review_brain.py` in the workspace runtime).
- For `source=web`, confirm if the grounded URL responds with HTTP 2xx, reject on
4xx/5xx, and keep pending on timeouts/unknown.
- Reject obvious low-signal placeholders (e.g. session summary stubs).
"""
import json
import os
import sys
from pathlib import Path
from datetime import datetime, timezone
from typing import Any, Optional
WORKSPACE = Path(os.environ.get("SECOND_BRAIN_WORKSPACE", "/root/.openclaw/workspace/second-brain"))
DB_PATH = Path(os.environ.get("BRAIN_DB", str(WORKSPACE / "data" / "brain.sqlite"))).resolve()
sys.path.insert(0, str(WORKSPACE))
from src.store import EngramStore
from src.engram import ReviewEntry
OUTPUT_FILE = os.environ.get("CRON_OUTPUT_FILE", "/tmp/verify_pending_external.json")
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
def _get_url(meta: dict[str, Any]) -> Optional[str]:
url = meta.get("url")
if isinstance(url, str) and url.startswith(("http://", "https://")):
return url
grounding = meta.get("grounding")
if isinstance(grounding, dict):
g_url = grounding.get("url")
if isinstance(g_url, str) and g_url.startswith(("http://", "https://")):
return g_url
return None
def _http_status(url: str, timeout_s: float = 6.0) -> Optional[int]:
try:
import urllib.request
req = urllib.request.Request(
url,
method="GET",
headers={"User-Agent": "openclaw-secondbrain/verify_pending_external"},
)
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
return int(getattr(resp, "status", 200))
except Exception:
return None
def main() -> int:
if not DB_PATH.exists():
out = {"success": False, "error": f"db missing: {DB_PATH}", "time": _now()}
Path(OUTPUT_FILE).write_text(json.dumps(out, indent=2))
print(out["error"])
return 1
store = EngramStore(str(DB_PATH))
all_egs = []
offset = 0
while True:
batch = store.get_all(limit=2000, offset=offset)
if not batch:
break
all_egs.extend(batch)
offset += len(batch)
pending = [
eg
for eg in all_egs
if (not eg.correctness.confirmed and eg.correctness.rejections == 0)
]
confirmed = 0
rejected = 0
still_pending = 0
checked = 0
for eg in pending:
checked += 1
src = eg.metadata.get("source")
content = (eg.content or "").strip()
if src == "session" and (
content.startswith("Session Summary (sess_") or content.startswith("Please remember ")
):
eg.correctness.rejections += 1
eg.correctness.last_reviewed = _now()
eg.correctness.review_history.append(
ReviewEntry(
by="verify-pending",
action="reject",
at=_now(),
note="Auto-reject: session placeholder",
)
)
store.save(eg)
rejected += 1
continue
if src == "web":
url = _get_url(eg.metadata)
if not url:
still_pending += 1
continue
status = _http_status(url)
if status is None:
still_pending += 1
continue
if 200 <= status < 300:
eg.correctness.confirmed = True
eg.correctness.confirmations += 1
eg.correctness.last_reviewed = _now()
eg.correctness.review_history.append(
ReviewEntry(
by="verify-pending",
action="confirm",
at=_now(),
note=f"Auto-confirm: web url ok ({status}) {url}",
)
)
store.save(eg)
confirmed += 1
else:
eg.correctness.rejections += 1
eg.correctness.last_reviewed = _now()
eg.correctness.review_history.append(
ReviewEntry(
by="verify-pending",
action="reject",
at=_now(),
note=f"Auto-reject: web url status={status} {url}",
)
)
store.save(eg)
rejected += 1
continue
still_pending += 1
out = {
"success": True,
"time": _now(),
"total": len(all_egs),
"pending_before": len(pending),
"checked": checked,
"confirmed": confirmed,
"rejected": rejected,
"still_pending": still_pending,
}
Path(OUTPUT_FILE).write_text(json.dumps(out, indent=2))
print(
f"VERIFY: pending_before={out['pending_before']} confirmed={confirmed} rejected={rejected} still_pending={still_pending}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())