#!/usr/bin/env python3 """ Index Engrams into Chroma vector store for semantic search. """ from __future__ import annotations import json import sys from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List BRAIN_DIR = Path("/root/.openclaw/workspace/second-brain") sys.path.insert(0, str(BRAIN_DIR)) from src.store import EngramStore from src.chroma_store import ChromaStore DB_PATH = BRAIN_DIR / "data" / "brain.sqlite" CHROMA_DIR = BRAIN_DIR / "data" / "chroma" def run() -> Dict[str, Any]: store = EngramStore(str(DB_PATH)) chroma = ChromaStore(str(CHROMA_DIR)) out = { "success": True, "time": datetime.now(timezone.utc).isoformat(), "indexed": 0, "skipped": 0, "errors": [], } # Get all engram IDs from SQL DB rows = store._conn.execute("SELECT id FROM engrams").fetchall() all_ids = [row[0] for row in rows] # Get existing IDs from Chroma existing = set(chroma.collection.get(include=[])["ids"]) for eg_id in all_ids: try: if eg_id in existing: out["skipped"] += 1 continue eg = store.get(eg_id) if eg is None: out["errors"].append(f"{eg_id}: not found in store") continue chroma.add(eg) out["indexed"] += 1 except Exception as e: out["errors"].append(f"{eg_id}: {e}") return out if __name__ == "__main__": res = run() print(json.dumps(res, ensure_ascii=False, indent=2))