""" chroma_store.py - ChromaDB Vektor-Speicher für semantische Suche. Erweitert den SQLite-Store um Vektor-ähnlichkeit. """ import json from pathlib import Path from typing import List, Optional, Dict, Any from uuid import UUID import chromadb from chromadb.config import Settings from .engram import Engram from .embedder import encode class ChromaStore: """ ChromaDB-basierter Vektor-Speicher. Speichert Engramme als Vektoren mit Metadaten. """ def __init__(self, path: str = "data/chroma"): self.path = Path(path) self.path.mkdir(parents=True, exist_ok=True) self.client = chromadb.PersistentClient(path=str(self.path)) self.collection = self.client.get_or_create_collection( name="engrams", metadata={"hnsw:space": "cosine"}, ) def _build_metadata(self, engram: Engram) -> Dict[str, Any]: """Serialisierte Metadaten für ChromaDB (nur primitiv/scalar/Str).""" m = engram.metadata safe: Dict[str, Any] = {} # Nur explizit erlaubte Felder übernehmen safe["source"] = str(m.get("source", "agent")) safe["confidence"] = float(m.get("confidence", 0.5)) safe["grounding"] = int(m.get("grounding", 1)) tags = m.get("tags", []) safe["tags"] = ",".join(str(t) for t in tags) if isinstance(tags, list) else str(tags) safe["created"] = str(m.get("created", "")) safe["modified"] = str(m.get("modified", "")) safe["access_count"] = int(m.get("access_count", 0)) safe["correctness"] = "confirmed" if engram.correctness.confirmed else "unconfirmed" safe["content"] = str(engram.content)[:500] # Chroma akzeptiert kurze Strings besser return safe def add(self, engram: Engram, embedding: Optional[List[float]] = None) -> None: """Engramm mit Embedding zur Vektor-DB hinzufügen.""" eid = str(engram.id) emb = embedding or engram.embedding if emb is None: emb = encode(engram.content) if emb is None: return meta = self._build_metadata(engram) meta["content"] = engram.content[:1000] # Chroma likes short strings self.collection.add( ids=[eid], embeddings=[emb], metadatas=[meta], ) def update(self, engram: Engram, embedding: Optional[List[float]] = None) -> None: """Engramm aktualisieren.""" eid = str(engram.id) emb = embedding or engram.embedding if emb is None: emb = encode(engram.content) if emb is None: return meta = self._build_metadata(engram) self.collection.update( ids=[eid], embeddings=[emb], metadatas=[meta], ) def delete(self, eid: str) -> None: """Engramm aus Vektor-DB entfernen.""" self.collection.delete(ids=[eid]) def query(self, text: str, top_k: int = 5, filters: Optional[Dict] = None) -> List[Dict[str, Any]]: """Semantische Suche.""" emb = encode(text) if emb is None: return [] results = self.collection.query( query_embeddings=[emb], n_results=top_k, where=filters, include=["metadatas", "distances", "documents"], ) out = [] for i in range(len(results["ids"][0])): out.append({ "id": results["ids"][0][i], "distance": results["distances"][0][i], "metadata": results["metadatas"][0][i], }) return out def get_by_id(self, eid: str) -> Optional[Dict[str, Any]]: """Einzelnes Engramm via ID.""" try: r = self.collection.get(ids=[eid], include=["embeddings", "metadatas"]) if r and r["ids"]: return { "id": r["ids"][0], "embedding": r["embeddings"][0] if "embeddings" in r else None, "metadata": r["metadatas"][0] if "metadatas" in r else {}, } except Exception as e: print(f"[chroma_store] get_by_id failed: {e}") return None def count(self) -> int: return self.collection.count()