61 lines
1.5 KiB
Python
61 lines
1.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Index Engrams into Chroma vector store for semantic search.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List
|
|
|
|
BRAIN_DIR = Path("/root/.openclaw/workspace/second-brain")
|
|
sys.path.insert(0, str(BRAIN_DIR))
|
|
from src.store import EngramStore
|
|
from src.chroma_store import ChromaStore
|
|
|
|
DB_PATH = BRAIN_DIR / "data" / "brain.sqlite"
|
|
CHROMA_DIR = BRAIN_DIR / "data" / "chroma"
|
|
|
|
|
|
def run() -> Dict[str, Any]:
|
|
store = EngramStore(str(DB_PATH))
|
|
chroma = ChromaStore(str(CHROMA_DIR))
|
|
|
|
out = {
|
|
"success": True,
|
|
"time": datetime.now(timezone.utc).isoformat(),
|
|
"indexed": 0,
|
|
"skipped": 0,
|
|
"errors": [],
|
|
}
|
|
|
|
# Get all engram IDs from SQL DB
|
|
rows = store._conn.execute("SELECT id FROM engrams").fetchall()
|
|
all_ids = [row[0] for row in rows]
|
|
# Get existing IDs from Chroma
|
|
existing = set(chroma.collection.get(include=[])["ids"])
|
|
|
|
for eg_id in all_ids:
|
|
try:
|
|
if eg_id in existing:
|
|
out["skipped"] += 1
|
|
continue
|
|
eg = store.get(eg_id)
|
|
if eg is None:
|
|
out["errors"].append(f"{eg_id}: not found in store")
|
|
continue
|
|
chroma.add(eg)
|
|
out["indexed"] += 1
|
|
except Exception as e:
|
|
out["errors"].append(f"{eg_id}: {e}")
|
|
|
|
return out
|
|
|
|
|
|
if __name__ == "__main__":
|
|
res = run()
|
|
print(json.dumps(res, ensure_ascii=False, indent=2))
|