feat(core): Engram, Store, Retriever, CLI - Grundsystem Second Brain

- src/engram.py: Gedaechtniseinheit mit Confidence, Correctness, Links
- src/store.py: SQLite FTS5 persistenter Speicher
- src/retriever.py: Hybrid Suche + Reranking
- src/cli.py: Kommandozeilen-Interface

Issue: #1
This commit is contained in:
2026-05-25 00:53:56 +02:00
commit 5e4f21e680
7 changed files with 891 additions and 0 deletions

230
src/engram.py Normal file
View File

@@ -0,0 +1,230 @@
"""
Engram - Gedächtniseinheit für das Second Brain.
Rein Python, kein externe Abhängigkeiten.
"""
import json
import hashlib
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from enum import IntEnum
from typing import Optional, List, Dict, Any
from uuid import uuid4, UUID
class Grounding(IntEnum):
"""Herkunft/Verlässlichkeit einer Information."""
UNKNOWN = 0
ASSUMPTION = 1
INFERRED = 2
SOURCED = 3
VERIFIED = 4
@dataclass
class ReviewEntry:
"""Ein Eintrag im Korrekturverlauf."""
by: str # "user" oder agent_id
action: str # "confirm", "reject", "modify"
at: str # ISO-8601 timestamp
note: str = ""
def to_dict(self) -> dict:
return {"by": self.by, "action": self.action, "at": self.at, "note": self.note}
@classmethod
def from_dict(cls, d: dict) -> "ReviewEntry":
return cls(d["by"], d["action"], d["at"], d.get("note", ""))
@dataclass
class Correctness:
"""Verfolgt die Korrektheit eines Engramms über Zeit."""
confirmed: bool = False
confirmations: int = 0
rejections: int = 0
last_reviewed: Optional[str] = None
review_history: List[ReviewEntry] = field(default_factory=list)
def confirm(self, by: str, note: str = "") -> None:
self.confirmations += 1
self.confirmed = True
self.last_reviewed = _now()
self.review_history.append(ReviewEntry(by, "confirm", self.last_reviewed, note))
def reject(self, by: str, note: str = "") -> None:
self.rejections += 1
self.confirmed = False
self.last_reviewed = _now()
self.review_history.append(ReviewEntry(by, "reject", self.last_reviewed, note))
def score(self) -> float:
"""Confidence-Score aus Korrekturhistorie."""
total = self.confirmations + self.rejections
if total == 0:
return 0.5 # Unbestimmt
return self.confirmations / total
def to_dict(self) -> dict:
return {
"confirmed": self.confirmed,
"confirmations": self.confirmations,
"rejections": self.rejections,
"last_reviewed": self.last_reviewed,
"review_history": [r.to_dict() for r in self.review_history],
}
@classmethod
def from_dict(cls, d: dict) -> "Correctness":
c = cls()
c.confirmed = d.get("confirmed", False)
c.confirmations = d.get("confirmations", 0)
c.rejections = d.get("rejections", 0)
c.last_reviewed = d.get("last_reviewed")
c.review_history = [ReviewEntry.from_dict(r) for r in d.get("review_history", [])]
return c
@dataclass
class Engram:
"""
Eine Gedächtniseinheit (Engramm).
Jedes Faktum, jede Beobachtung, jeder Fehler wird als Engramm gespeichert.
Es trägt seinen eigenen Vertrauenswert und seinen Korrekturverlauf mit.
"""
id: UUID
content: str
metadata: Dict[str, Any] = field(default_factory=dict)
correctness: Correctness = field(default_factory=Correctness)
links: List[UUID] = field(default_factory=list)
hierarchy: Dict[str, Any] = field(default_factory=dict)
embedding: Optional[List[float]] = None # Wird bei Bedarf berechnet
@classmethod
def create(
cls,
content: str,
source: str = "agent",
confidence: float = 0.5,
tags: Optional[List[str]] = None,
session_id: Optional[str] = None,
agent_id: Optional[str] = None,
grounding: Grounding = Grounding.ASSUMPTION,
parent: Optional[UUID] = None,
) -> "Engram":
"""Factory: Erstellt ein neues Engramm mit sinnvollen Defaults."""
now = _now()
return cls(
id=uuid4(),
content=content,
metadata={
"source": source,
"confidence": confidence,
"created": now,
"modified": now,
"access_count": 0,
"last_accessed": now,
"tags": tags or [],
"session_id": session_id,
"agent_id": agent_id,
"grounding": grounding.value,
"hash": _hash(content),
},
correctness=Correctness(),
links=[],
hierarchy={"parent": str(parent) if parent else None, "children": [], "depth": 0},
)
def touch(self) -> None:
"""Markiert Zugriff, aktualisiert Zähler und Zeit."""
self.metadata["access_count"] = self.metadata.get("access_count", 0) + 1
self.metadata["last_accessed"] = _now()
def add_link(self, other: "Engram") -> None:
"""Bidirektionale Verknüpfung mit anderem Engramm."""
if other.id not in self.links:
self.links.append(other.id)
if self.id not in other.links:
other.links.append(self.id)
def set_parent(self, parent: "Engram") -> None:
"""Setzt Eltern-Kind-Beziehung."""
self.hierarchy["parent"] = str(parent.id)
self.hierarchy["depth"] = parent.hierarchy.get("depth", 0) + 1
if str(self.id) not in parent.hierarchy.get("children", []):
parent.hierarchy.setdefault("children", []).append(str(self.id))
def compute_confidence(self) -> float:
"""
Berechnet Gesamt-Confidence aus mehreren Faktoren.
Kein Neuronales Netz nötig - Heuristik für Phase 1.
"""
base = self.metadata.get("confidence", 0.5)
# Korrektheit
correctness_score = self.correctness.score()
# Zugriffshäufigkeit (beliebte Engramme sind oft wichtiger)
access = min(self.metadata.get("access_count", 0) / 10, 1.0) * 0.1
# Alter (neuere Informationen sind relevanter)
age_days = _age_days(self.metadata.get("created", _now()))
recency = max(0, 1.0 - (age_days / 30)) * 0.1 # Nach 30 Tagen = 0
# Grounding
grounding_boost = (self.metadata.get("grounding", 0) / 4) * 0.2
combined = (
base * 0.3 +
correctness_score * 0.3 +
access +
recency +
grounding_boost
)
return min(max(combined, 0.0), 1.0)
def to_dict(self) -> dict:
return {
"id": str(self.id),
"content": self.content,
"metadata": self.metadata,
"correctness": self.correctness.to_dict(),
"links": [str(l) for l in self.links],
"hierarchy": self.hierarchy,
"embedding": self.embedding,
}
@classmethod
def from_dict(cls, d: dict) -> "Engram":
e = cls(
id=UUID(d["id"]),
content=d["content"],
metadata=d.get("metadata", {}),
correctness=Correctness.from_dict(d.get("correctness", {})),
links=[UUID(l) for l in d.get("links", [])],
hierarchy=d.get("hierarchy", {}),
embedding=d.get("embedding"),
)
return e
def to_json(self) -> str:
return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
@classmethod
def from_json(cls, s: str) -> "Engram":
return cls.from_dict(json.loads(s))
# --- Helpers ---
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
def _hash(content: str) -> str:
return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
def _age_days(iso_str: str) -> float:
try:
dt = datetime.fromisoformat(iso_str)
return (datetime.now(timezone.utc) - dt).total_seconds() / 86400
except Exception:
return 0.0