Second-brain 2.0: hybrid retrieval, obsidian bridge, vector watermark, tests

This commit is contained in:
2026-05-26 19:27:12 +02:00
parent 29bc45d623
commit e1640071e4
7 changed files with 291 additions and 16 deletions

View File

@@ -38,6 +38,12 @@ try:
except ImportError:
Retriever = None
# Chroma: optional (braucht chromadb)
try:
from src.chroma_store import ChromaStore
except Exception:
ChromaStore = None
# --- Konfiguration ---
BRAIN_DB = Path(__file__).parent.parent / "data" / "brain.sqlite"
@@ -207,10 +213,34 @@ def enrich_context(topic: str, limit: int = 3) -> str:
"""
store = get_brain()
# Versuche Retriever (mit Embeddings), fallback auf einfache Textsuche
# Versuche Hybrid-Retrieval (FTS + optional Vector), fallback auf Textsuche
if Retriever:
ret = Retriever(store)
results = ret.retrieve(topic, limit=limit, min_confidence=0.3)
chroma = None
if ChromaStore:
try:
chroma = ChromaStore(path=str(Path(__file__).parent.parent / "data" / "chroma"))
except Exception:
chroma = None
ret = Retriever(store, chroma=chroma)
try:
results = ret.hybrid_retrieve(topic, limit=limit * 3, min_confidence=0.3)
except Exception:
results = ret.retrieve(topic, limit=limit * 3, min_confidence=0.3)
# confirmed-first ranking
def _rank(r):
eg = r["engram"]
confirmed = 1 if getattr(eg.correctness, "confirmed", False) else 0
return (confirmed, float(r.get("score", 0.0)))
results.sort(key=_rank, reverse=True)
# If we have confirmed results, show only confirmed up to limit
confirmed_only = [r for r in results if r["engram"].correctness.confirmed]
if confirmed_only:
results = confirmed_only[:limit]
else:
results = results[:limit]
else:
results_raw = store.search_text(topic, limit=limit)
results = [{"engram": eg, "score": 0.5} for eg in results_raw]