feat(core): Engram, Store, Retriever, CLI - Grundsystem Second Brain
- src/engram.py: Gedaechtniseinheit mit Confidence, Correctness, Links - src/store.py: SQLite FTS5 persistenter Speicher - src/retriever.py: Hybrid Suche + Reranking - src/cli.py: Kommandozeilen-Interface Issue: #1
This commit is contained in:
230
src/engram.py
Normal file
230
src/engram.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""
|
||||
Engram - Gedächtniseinheit für das Second Brain.
|
||||
Rein Python, kein externe Abhängigkeiten.
|
||||
"""
|
||||
|
||||
import json
|
||||
import hashlib
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime, timezone
|
||||
from enum import IntEnum
|
||||
from typing import Optional, List, Dict, Any
|
||||
from uuid import uuid4, UUID
|
||||
|
||||
|
||||
class Grounding(IntEnum):
|
||||
"""Herkunft/Verlässlichkeit einer Information."""
|
||||
UNKNOWN = 0
|
||||
ASSUMPTION = 1
|
||||
INFERRED = 2
|
||||
SOURCED = 3
|
||||
VERIFIED = 4
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReviewEntry:
|
||||
"""Ein Eintrag im Korrekturverlauf."""
|
||||
by: str # "user" oder agent_id
|
||||
action: str # "confirm", "reject", "modify"
|
||||
at: str # ISO-8601 timestamp
|
||||
note: str = ""
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {"by": self.by, "action": self.action, "at": self.at, "note": self.note}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict) -> "ReviewEntry":
|
||||
return cls(d["by"], d["action"], d["at"], d.get("note", ""))
|
||||
|
||||
|
||||
@dataclass
|
||||
class Correctness:
|
||||
"""Verfolgt die Korrektheit eines Engramms über Zeit."""
|
||||
confirmed: bool = False
|
||||
confirmations: int = 0
|
||||
rejections: int = 0
|
||||
last_reviewed: Optional[str] = None
|
||||
review_history: List[ReviewEntry] = field(default_factory=list)
|
||||
|
||||
def confirm(self, by: str, note: str = "") -> None:
|
||||
self.confirmations += 1
|
||||
self.confirmed = True
|
||||
self.last_reviewed = _now()
|
||||
self.review_history.append(ReviewEntry(by, "confirm", self.last_reviewed, note))
|
||||
|
||||
def reject(self, by: str, note: str = "") -> None:
|
||||
self.rejections += 1
|
||||
self.confirmed = False
|
||||
self.last_reviewed = _now()
|
||||
self.review_history.append(ReviewEntry(by, "reject", self.last_reviewed, note))
|
||||
|
||||
def score(self) -> float:
|
||||
"""Confidence-Score aus Korrekturhistorie."""
|
||||
total = self.confirmations + self.rejections
|
||||
if total == 0:
|
||||
return 0.5 # Unbestimmt
|
||||
return self.confirmations / total
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"confirmed": self.confirmed,
|
||||
"confirmations": self.confirmations,
|
||||
"rejections": self.rejections,
|
||||
"last_reviewed": self.last_reviewed,
|
||||
"review_history": [r.to_dict() for r in self.review_history],
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict) -> "Correctness":
|
||||
c = cls()
|
||||
c.confirmed = d.get("confirmed", False)
|
||||
c.confirmations = d.get("confirmations", 0)
|
||||
c.rejections = d.get("rejections", 0)
|
||||
c.last_reviewed = d.get("last_reviewed")
|
||||
c.review_history = [ReviewEntry.from_dict(r) for r in d.get("review_history", [])]
|
||||
return c
|
||||
|
||||
|
||||
@dataclass
|
||||
class Engram:
|
||||
"""
|
||||
Eine Gedächtniseinheit (Engramm).
|
||||
|
||||
Jedes Faktum, jede Beobachtung, jeder Fehler wird als Engramm gespeichert.
|
||||
Es trägt seinen eigenen Vertrauenswert und seinen Korrekturverlauf mit.
|
||||
"""
|
||||
id: UUID
|
||||
content: str
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
correctness: Correctness = field(default_factory=Correctness)
|
||||
links: List[UUID] = field(default_factory=list)
|
||||
hierarchy: Dict[str, Any] = field(default_factory=dict)
|
||||
embedding: Optional[List[float]] = None # Wird bei Bedarf berechnet
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
cls,
|
||||
content: str,
|
||||
source: str = "agent",
|
||||
confidence: float = 0.5,
|
||||
tags: Optional[List[str]] = None,
|
||||
session_id: Optional[str] = None,
|
||||
agent_id: Optional[str] = None,
|
||||
grounding: Grounding = Grounding.ASSUMPTION,
|
||||
parent: Optional[UUID] = None,
|
||||
) -> "Engram":
|
||||
"""Factory: Erstellt ein neues Engramm mit sinnvollen Defaults."""
|
||||
now = _now()
|
||||
return cls(
|
||||
id=uuid4(),
|
||||
content=content,
|
||||
metadata={
|
||||
"source": source,
|
||||
"confidence": confidence,
|
||||
"created": now,
|
||||
"modified": now,
|
||||
"access_count": 0,
|
||||
"last_accessed": now,
|
||||
"tags": tags or [],
|
||||
"session_id": session_id,
|
||||
"agent_id": agent_id,
|
||||
"grounding": grounding.value,
|
||||
"hash": _hash(content),
|
||||
},
|
||||
correctness=Correctness(),
|
||||
links=[],
|
||||
hierarchy={"parent": str(parent) if parent else None, "children": [], "depth": 0},
|
||||
)
|
||||
|
||||
def touch(self) -> None:
|
||||
"""Markiert Zugriff, aktualisiert Zähler und Zeit."""
|
||||
self.metadata["access_count"] = self.metadata.get("access_count", 0) + 1
|
||||
self.metadata["last_accessed"] = _now()
|
||||
|
||||
def add_link(self, other: "Engram") -> None:
|
||||
"""Bidirektionale Verknüpfung mit anderem Engramm."""
|
||||
if other.id not in self.links:
|
||||
self.links.append(other.id)
|
||||
if self.id not in other.links:
|
||||
other.links.append(self.id)
|
||||
|
||||
def set_parent(self, parent: "Engram") -> None:
|
||||
"""Setzt Eltern-Kind-Beziehung."""
|
||||
self.hierarchy["parent"] = str(parent.id)
|
||||
self.hierarchy["depth"] = parent.hierarchy.get("depth", 0) + 1
|
||||
if str(self.id) not in parent.hierarchy.get("children", []):
|
||||
parent.hierarchy.setdefault("children", []).append(str(self.id))
|
||||
|
||||
def compute_confidence(self) -> float:
|
||||
"""
|
||||
Berechnet Gesamt-Confidence aus mehreren Faktoren.
|
||||
Kein Neuronales Netz nötig - Heuristik für Phase 1.
|
||||
"""
|
||||
base = self.metadata.get("confidence", 0.5)
|
||||
# Korrektheit
|
||||
correctness_score = self.correctness.score()
|
||||
# Zugriffshäufigkeit (beliebte Engramme sind oft wichtiger)
|
||||
access = min(self.metadata.get("access_count", 0) / 10, 1.0) * 0.1
|
||||
# Alter (neuere Informationen sind relevanter)
|
||||
age_days = _age_days(self.metadata.get("created", _now()))
|
||||
recency = max(0, 1.0 - (age_days / 30)) * 0.1 # Nach 30 Tagen = 0
|
||||
# Grounding
|
||||
grounding_boost = (self.metadata.get("grounding", 0) / 4) * 0.2
|
||||
|
||||
combined = (
|
||||
base * 0.3 +
|
||||
correctness_score * 0.3 +
|
||||
access +
|
||||
recency +
|
||||
grounding_boost
|
||||
)
|
||||
return min(max(combined, 0.0), 1.0)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"id": str(self.id),
|
||||
"content": self.content,
|
||||
"metadata": self.metadata,
|
||||
"correctness": self.correctness.to_dict(),
|
||||
"links": [str(l) for l in self.links],
|
||||
"hierarchy": self.hierarchy,
|
||||
"embedding": self.embedding,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict) -> "Engram":
|
||||
e = cls(
|
||||
id=UUID(d["id"]),
|
||||
content=d["content"],
|
||||
metadata=d.get("metadata", {}),
|
||||
correctness=Correctness.from_dict(d.get("correctness", {})),
|
||||
links=[UUID(l) for l in d.get("links", [])],
|
||||
hierarchy=d.get("hierarchy", {}),
|
||||
embedding=d.get("embedding"),
|
||||
)
|
||||
return e
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, s: str) -> "Engram":
|
||||
return cls.from_dict(json.loads(s))
|
||||
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
def _now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _hash(content: str) -> str:
|
||||
return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def _age_days(iso_str: str) -> float:
|
||||
try:
|
||||
dt = datetime.fromisoformat(iso_str)
|
||||
return (datetime.now(timezone.utc) - dt).total_seconds() / 86400
|
||||
except Exception:
|
||||
return 0.0
|
||||
Reference in New Issue
Block a user