feat(core): Engram, Store, Retriever, CLI - Grundsystem Second Brain

- src/engram.py: Gedaechtniseinheit mit Confidence, Correctness, Links
- src/store.py: SQLite FTS5 persistenter Speicher
- src/retriever.py: Hybrid Suche + Reranking
- src/cli.py: Kommandozeilen-Interface

Issue: #1
This commit is contained in:
2026-05-25 00:53:56 +02:00
commit 5e4f21e680
7 changed files with 891 additions and 0 deletions

55
src/retriever.py Normal file
View File

@@ -0,0 +1,55 @@
"""
Hybrid-Retrieval Engine.
Phase 1: FTS-Keyword + Confidence-Reranking.
Phase 2: + Embedding + Fusion.
"""
from typing import List, Dict, Any
from .engram import Engram
from .store import EngramStore
class Retriever:
def __init__(self, store: EngramStore):
self.store = store
def retrieve(
self,
query: str,
limit: int = 5,
min_confidence: float = 0.0,
source_filter: str = None,
tag_filter: str = None,
) -> List[Dict[str, Any]]:
results = []
keyword_results = self.store.search_text(query, limit=limit * 3)
for eg in keyword_results:
conf = eg.compute_confidence()
if conf < min_confidence:
continue
if source_filter and eg.metadata.get("source") != source_filter:
continue
if tag_filter and tag_filter not in eg.metadata.get("tags", []):
continue
eg.touch()
self.store.save(eg)
results.append({"engram": eg, "score": conf, "match_type": "keyword"})
results.sort(key=lambda r: r["score"], reverse=True)
return results[:limit]
def related(self, engram_id: str, limit: int = 5) -> List[Engram]:
eg = self.store.get(engram_id)
if not eg:
return []
out = []
for lid in eg.links:
linked = self.store.get(str(lid))
if linked:
out.append(linked)
return sorted(out, key=lambda e: e.compute_confidence(), reverse=True)[:limit]
def recent(self, limit: int = 10) -> List[Engram]:
return self.store.get_all(limit=limit)
def stats(self) -> Dict[str, Any]:
return self.store.stats()