feat(core): Engram, Store, Retriever, CLI - Grundsystem Second Brain

- src/engram.py: Gedaechtniseinheit mit Confidence, Correctness, Links - src/store.py: SQLite FTS5 persistenter Speicher - src/retriever.py: Hybrid Suche + Reranking - src/cli.py: Kommandozeilen-Interface Issue: #1
2026-05-25 00:53:56 +02:00
commit 5e4f21e680
7 changed files with 891 additions and 0 deletions
--- a/src/engram.py
+++ b/src/engram.py
@@ -0,0 +1,230 @@
+"""
+Engram - Gedächtniseinheit für das Second Brain.
+Rein Python, kein externe Abhängigkeiten.
+"""
+
+import json
+import hashlib
+from dataclasses import dataclass, field, asdict
+from datetime import datetime, timezone
+from enum import IntEnum
+from typing import Optional, List, Dict, Any
+from uuid import uuid4, UUID
+
+
+class Grounding(IntEnum):
+    """Herkunft/Verlässlichkeit einer Information."""
+    UNKNOWN = 0
+    ASSUMPTION = 1
+    INFERRED = 2
+    SOURCED = 3
+    VERIFIED = 4
+
+
+@dataclass
+class ReviewEntry:
+    """Ein Eintrag im Korrekturverlauf."""
+    by: str      # "user" oder agent_id
+    action: str  # "confirm", "reject", "modify"
+    at: str      # ISO-8601 timestamp
+    note: str = ""
+
+    def to_dict(self) -> dict:
+        return {"by": self.by, "action": self.action, "at": self.at, "note": self.note}
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "ReviewEntry":
+        return cls(d["by"], d["action"], d["at"], d.get("note", ""))
+
+
+@dataclass
+class Correctness:
+    """Verfolgt die Korrektheit eines Engramms über Zeit."""
+    confirmed: bool = False
+    confirmations: int = 0
+    rejections: int = 0
+    last_reviewed: Optional[str] = None
+    review_history: List[ReviewEntry] = field(default_factory=list)
+
+    def confirm(self, by: str, note: str = "") -> None:
+        self.confirmations += 1
+        self.confirmed = True
+        self.last_reviewed = _now()
+        self.review_history.append(ReviewEntry(by, "confirm", self.last_reviewed, note))
+
+    def reject(self, by: str, note: str = "") -> None:
+        self.rejections += 1
+        self.confirmed = False
+        self.last_reviewed = _now()
+        self.review_history.append(ReviewEntry(by, "reject", self.last_reviewed, note))
+
+    def score(self) -> float:
+        """Confidence-Score aus Korrekturhistorie."""
+        total = self.confirmations + self.rejections
+        if total == 0:
+            return 0.5  # Unbestimmt
+        return self.confirmations / total
+
+    def to_dict(self) -> dict:
+        return {
+            "confirmed": self.confirmed,
+            "confirmations": self.confirmations,
+            "rejections": self.rejections,
+            "last_reviewed": self.last_reviewed,
+            "review_history": [r.to_dict() for r in self.review_history],
+        }
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "Correctness":
+        c = cls()
+        c.confirmed = d.get("confirmed", False)
+        c.confirmations = d.get("confirmations", 0)
+        c.rejections = d.get("rejections", 0)
+        c.last_reviewed = d.get("last_reviewed")
+        c.review_history = [ReviewEntry.from_dict(r) for r in d.get("review_history", [])]
+        return c
+
+
+@dataclass
+class Engram:
+    """
+    Eine Gedächtniseinheit (Engramm).
+
+    Jedes Faktum, jede Beobachtung, jeder Fehler wird als Engramm gespeichert.
+    Es trägt seinen eigenen Vertrauenswert und seinen Korrekturverlauf mit.
+    """
+    id: UUID
+    content: str
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    correctness: Correctness = field(default_factory=Correctness)
+    links: List[UUID] = field(default_factory=list)
+    hierarchy: Dict[str, Any] = field(default_factory=dict)
+    embedding: Optional[List[float]] = None  # Wird bei Bedarf berechnet
+
+    @classmethod
+    def create(
+        cls,
+        content: str,
+        source: str = "agent",
+        confidence: float = 0.5,
+        tags: Optional[List[str]] = None,
+        session_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+        grounding: Grounding = Grounding.ASSUMPTION,
+        parent: Optional[UUID] = None,
+    ) -> "Engram":
+        """Factory: Erstellt ein neues Engramm mit sinnvollen Defaults."""
+        now = _now()
+        return cls(
+            id=uuid4(),
+            content=content,
+            metadata={
+                "source": source,
+                "confidence": confidence,
+                "created": now,
+                "modified": now,
+                "access_count": 0,
+                "last_accessed": now,
+                "tags": tags or [],
+                "session_id": session_id,
+                "agent_id": agent_id,
+                "grounding": grounding.value,
+                "hash": _hash(content),
+            },
+            correctness=Correctness(),
+            links=[],
+            hierarchy={"parent": str(parent) if parent else None, "children": [], "depth": 0},
+        )
+
+    def touch(self) -> None:
+        """Markiert Zugriff, aktualisiert Zähler und Zeit."""
+        self.metadata["access_count"] = self.metadata.get("access_count", 0) + 1
+        self.metadata["last_accessed"] = _now()
+
+    def add_link(self, other: "Engram") -> None:
+        """Bidirektionale Verknüpfung mit anderem Engramm."""
+        if other.id not in self.links:
+            self.links.append(other.id)
+        if self.id not in other.links:
+            other.links.append(self.id)
+
+    def set_parent(self, parent: "Engram") -> None:
+        """Setzt Eltern-Kind-Beziehung."""
+        self.hierarchy["parent"] = str(parent.id)
+        self.hierarchy["depth"] = parent.hierarchy.get("depth", 0) + 1
+        if str(self.id) not in parent.hierarchy.get("children", []):
+            parent.hierarchy.setdefault("children", []).append(str(self.id))
+
+    def compute_confidence(self) -> float:
+        """
+        Berechnet Gesamt-Confidence aus mehreren Faktoren.
+        Kein Neuronales Netz nötig - Heuristik für Phase 1.
+        """
+        base = self.metadata.get("confidence", 0.5)
+        # Korrektheit
+        correctness_score = self.correctness.score()
+        # Zugriffshäufigkeit (beliebte Engramme sind oft wichtiger)
+        access = min(self.metadata.get("access_count", 0) / 10, 1.0) * 0.1
+        # Alter (neuere Informationen sind relevanter)
+        age_days = _age_days(self.metadata.get("created", _now()))
+        recency = max(0, 1.0 - (age_days / 30)) * 0.1  # Nach 30 Tagen = 0
+        # Grounding
+        grounding_boost = (self.metadata.get("grounding", 0) / 4) * 0.2
+
+        combined = (
+            base * 0.3 +
+            correctness_score * 0.3 +
+            access +
+            recency +
+            grounding_boost
+        )
+        return min(max(combined, 0.0), 1.0)
+
+    def to_dict(self) -> dict:
+        return {
+            "id": str(self.id),
+            "content": self.content,
+            "metadata": self.metadata,
+            "correctness": self.correctness.to_dict(),
+            "links": [str(l) for l in self.links],
+            "hierarchy": self.hierarchy,
+            "embedding": self.embedding,
+        }
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "Engram":
+        e = cls(
+            id=UUID(d["id"]),
+            content=d["content"],
+            metadata=d.get("metadata", {}),
+            correctness=Correctness.from_dict(d.get("correctness", {})),
+            links=[UUID(l) for l in d.get("links", [])],
+            hierarchy=d.get("hierarchy", {}),
+            embedding=d.get("embedding"),
+        )
+        return e
+
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
+
+    @classmethod
+    def from_json(cls, s: str) -> "Engram":
+        return cls.from_dict(json.loads(s))
+
+
+# --- Helpers ---
+
+def _now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def _hash(content: str) -> str:
+    return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
+
+
+def _age_days(iso_str: str) -> float:
+    try:
+        dt = datetime.fromisoformat(iso_str)
+        return (datetime.now(timezone.utc) - dt).total_seconds() / 86400
+    except Exception:
+        return 0.0