""" Hybrid-Retrieval Engine. Phase 1: FTS-Keyword + Confidence-Reranking. Phase 2: + Embedding + Fusion. """ from typing import List, Dict, Any, Optional from .engram import Engram from .store import EngramStore class Retriever: def __init__(self, store: EngramStore, chroma: Optional[object] = None): self.store = store self.chroma = chroma def retrieve( self, query: str, limit: int = 5, min_confidence: float = 0.0, source_filter: str = None, tag_filter: str = None, ) -> List[Dict[str, Any]]: results = [] keyword_results = self.store.search_text(query, limit=limit * 3) for eg in keyword_results: conf = eg.compute_confidence() if conf < min_confidence: continue if source_filter and eg.metadata.get("source") != source_filter: continue if tag_filter and tag_filter not in eg.metadata.get("tags", []): continue eg.touch() self.store.save(eg) results.append({"engram": eg, "score": conf, "match_type": "keyword"}) results.sort(key=lambda r: r["score"], reverse=True) return results[:limit] def semantic_retrieve( self, query: str, limit: int = 5, min_confidence: float = 0.0, ) -> List[Dict[str, Any]]: """Semantische Suche via ChromaDB.""" if not self.chroma: return [] chroma_results = self.chroma.query(query, top_k=limit * 3) results = [] for r in chroma_results: eg = self.store.get(r["id"]) if not eg: continue conf = eg.compute_confidence() if conf < min_confidence: continue score = 1.0 - r.get("distance", 0) results.append({"engram": eg, "score": score, "match_type": "semantic"}) results.sort(key=lambda r: r["score"], reverse=True) return results[:limit] def hybrid_retrieve( self, query: str, limit: int = 5, min_confidence: float = 0.0, keyword_weight: float = 0.4, semantic_weight: float = 0.6, ) -> List[Dict[str, Any]]: """ Fusion: Keyword + Semantic + Neural Score. """ kw_results = { str(r["engram"].id): r for r in self.retrieve(query, limit=limit * 3, min_confidence=min_confidence) } sem_results = { str(r["engram"].id): r for r in self.semantic_retrieve(query, limit=limit * 3, min_confidence=min_confidence) } all_ids = set(kw_results.keys()) | set(sem_results.keys()) fusion: List[Dict[str, Any]] = [] for eid in all_ids: kw = kw_results.get(eid) sem = sem_results.get(eid) kw_score = kw["score"] if kw else 0.0 sem_score = sem["score"] if sem else 0.0 # Weighted fusion mixed = keyword_weight * kw_score + semantic_weight * sem_score # Neural/Confidence bonus eg = kw["engram"] if kw else sem["engram"] neural_bonus = eg.compute_confidence() * 0.1 final = min(1.0, mixed + neural_bonus) match_type = "hybrid" if kw and sem: match_type = "hybrid" elif sem: match_type = "semantic" else: match_type = "keyword" fusion.append({"engram": eg, "score": final, "match_type": match_type}) fusion.sort(key=lambda r: r["score"], reverse=True) return fusion[:limit] def related(self, engram_id: str, limit: int = 5) -> List[Engram]: eg = self.store.get(engram_id) if not eg: return [] out = [] for lid in eg.links: linked = self.store.get(str(lid)) if linked: out.append(linked) return sorted(out, key=lambda e: e.compute_confidence(), reverse=True)[:limit] def recent(self, limit: int = 10) -> List[Engram]: return self.store.get_all(limit=limit) def stats(self) -> Dict[str, Any]: return self.store.stats()