Files
second-brain/cron_tasks/index_vectors_fix.py

42 lines
1.1 KiB
Python

#!/usr/bin/env python3
"""Force index all missing Engrams into Chroma."""
import sys
from pathlib import Path
BRAIN_DIR = Path("/root/.openclaw/workspace/second-brain")
sys.path.insert(0, str(BRAIN_DIR))
from src.store import EngramStore
from src.chroma_store import ChromaStore
DB_PATH = BRAIN_DIR / "data" / "brain.sqlite"
CHROMA_DIR = BRAIN_DIR / "data" / "chroma"
store = EngramStore(str(DB_PATH))
chroma = ChromaStore(str(CHROMA_DIR))
# Get all DB IDs
db_ids = [row[0] for row in store._conn.execute("SELECT id FROM engrams").fetchall()]
existing = set(chroma.collection.get(include=[])["ids"])
missing = [eid for eid in db_ids if eid not in existing]
print(f"DB: {len(db_ids)} IDs, Chroma: {len(existing)} IDs, Missing: {len(missing)}")
indexed = 0
errors = []
for eid in missing:
try:
eg = store.get(eid)
if eg is None:
errors.append(f"{eid}: not found")
continue
chroma.add(eg)
indexed += 1
except Exception as e:
errors.append(f"{eid}: {e}")
print(f"Indexed: {indexed}, Errors: {len(errors)}")
if errors:
for err in errors[:10]:
print(f" {err}")