feat(complete): Phase 2-5 - Vektor-Embeddings, ChromaDB, Neural Scorer, Streamlit Dashboard, Graph-Visualisierung
This commit is contained in:
119
src/chroma_store.py
Normal file
119
src/chroma_store.py
Normal file
@@ -0,0 +1,119 @@
|
||||
"""
|
||||
chroma_store.py - ChromaDB Vektor-Speicher für semantische Suche.
|
||||
Erweitert den SQLite-Store um Vektor-ähnlichkeit.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
from uuid import UUID
|
||||
|
||||
import chromadb
|
||||
from chromadb.config import Settings
|
||||
|
||||
from .engram import Engram
|
||||
from .embedder import encode
|
||||
|
||||
|
||||
class ChromaStore:
|
||||
"""
|
||||
ChromaDB-basierter Vektor-Speicher.
|
||||
Speichert Engramme als Vektoren mit Metadaten.
|
||||
"""
|
||||
|
||||
def __init__(self, path: str = "data/chroma"):
|
||||
self.path = Path(path)
|
||||
self.path.mkdir(parents=True, exist_ok=True)
|
||||
self.client = chromadb.PersistentClient(path=str(self.path))
|
||||
self.collection = self.client.get_or_create_collection(
|
||||
name="engrams",
|
||||
metadata={"hnsw:space": "cosine"},
|
||||
)
|
||||
|
||||
def _build_metadata(self, engram: Engram) -> Dict[str, Any]:
|
||||
"""Serialisierte Metadaten für ChromaDB (nur primitives)."""
|
||||
meta = engram.metadata.copy()
|
||||
# ChromaDB akzeptiert nur Listen/Strings/Numbers/Bools
|
||||
tags = meta.pop("tags", [])
|
||||
if isinstance(tags, list):
|
||||
meta["tags"] = ",".join(str(t) for t in tags)
|
||||
meta.setdefault("source", "agent")
|
||||
meta.setdefault("confidence", 0.5)
|
||||
meta.setdefault("correctness", "unconfirmed")
|
||||
# Hierarchy als JSON-String
|
||||
if "hierarchy" in meta:
|
||||
meta["hierarchy"] = json.dumps(meta["hierarchy"])
|
||||
return meta
|
||||
|
||||
def add(self, engram: Engram, embedding: Optional[List[float]] = None) -> None:
|
||||
"""Engramm mit Embedding zur Vektor-DB hinzufügen."""
|
||||
eid = str(engram.id)
|
||||
emb = embedding or engram.embedding
|
||||
if emb is None:
|
||||
emb = encode(engram.content)
|
||||
if emb is None:
|
||||
return
|
||||
|
||||
meta = self._build_metadata(engram)
|
||||
meta["content"] = engram.content[:1000] # Chroma likes short strings
|
||||
self.collection.add(
|
||||
ids=[eid],
|
||||
embeddings=[emb],
|
||||
metadatas=[meta],
|
||||
)
|
||||
|
||||
def update(self, engram: Engram, embedding: Optional[List[float]] = None) -> None:
|
||||
"""Engramm aktualisieren."""
|
||||
eid = str(engram.id)
|
||||
emb = embedding or engram.embedding
|
||||
if emb is None:
|
||||
emb = encode(engram.content)
|
||||
if emb is None:
|
||||
return
|
||||
meta = self._build_metadata(engram)
|
||||
self.collection.update(
|
||||
ids=[eid],
|
||||
embeddings=[emb],
|
||||
metadatas=[meta],
|
||||
)
|
||||
|
||||
def delete(self, eid: str) -> None:
|
||||
"""Engramm aus Vektor-DB entfernen."""
|
||||
self.collection.delete(ids=[eid])
|
||||
|
||||
def query(self, text: str, top_k: int = 5, filters: Optional[Dict] = None) -> List[Dict[str, Any]]:
|
||||
"""Semantische Suche."""
|
||||
emb = encode(text)
|
||||
if emb is None:
|
||||
return []
|
||||
results = self.collection.query(
|
||||
query_embeddings=[emb],
|
||||
n_results=top_k,
|
||||
where=filters,
|
||||
include=["metadatas", "distances", "documents"],
|
||||
)
|
||||
out = []
|
||||
for i in range(len(results["ids"][0])):
|
||||
out.append({
|
||||
"id": results["ids"][0][i],
|
||||
"distance": results["distances"][0][i],
|
||||
"metadata": results["metadatas"][0][i],
|
||||
})
|
||||
return out
|
||||
|
||||
def get_by_id(self, eid: str) -> Optional[Dict[str, Any]]:
|
||||
"""Einzelnes Engramm via ID."""
|
||||
try:
|
||||
r = self.collection.get(ids=[eid], include=["embeddings", "metadatas"])
|
||||
if r and r["ids"]:
|
||||
return {
|
||||
"id": r["ids"][0],
|
||||
"embedding": r["embeddings"][0] if "embeddings" in r else None,
|
||||
"metadata": r["metadatas"][0] if "metadatas" in r else {},
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"[chroma_store] get_by_id failed: {e}")
|
||||
return None
|
||||
|
||||
def count(self) -> int:
|
||||
return self.collection.count()
|
||||
Reference in New Issue
Block a user