Files
second-brain/src/engram.py

329 lines
12 KiB
Python

"""
Engram - Gedächtniseinheit für das Second Brain.
Rein Python, kein externe Abhängigkeiten.
"""
import json
import hashlib
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from enum import IntEnum
from typing import Optional, List, Dict, Any
from uuid import uuid4, UUID
class Grounding(IntEnum):
"""Herkunft/Verlässlichkeit einer Information."""
UNKNOWN = 0
ASSUMPTION = 1
INFERRED = 2
SOURCED = 3
VERIFIED = 4
@dataclass
class ReviewEntry:
"""Ein Eintrag im Korrekturverlauf."""
by: str # "user" oder agent_id
action: str # "confirm", "reject", "modify"
at: str # ISO-8601 timestamp
note: str = ""
def to_dict(self) -> dict:
return {"by": self.by, "action": self.action, "at": self.at, "note": self.note}
@classmethod
def from_dict(cls, d: dict) -> "ReviewEntry":
return cls(d["by"], d["action"], d["at"], d.get("note", ""))
@dataclass
class Correctness:
"""Verfolgt die Korrektheit eines Engramms über Zeit."""
# verdict model (not only binary confirm/reject)
# Values:
# - unknown
# - probable_true / probable_false
# - confirmed_true / confirmed_false
verdict: str = "unknown"
evidence: List[Dict[str, Any]] = field(default_factory=list)
confirmed: bool = False
confirmations: int = 0
rejections: int = 0
last_reviewed: Optional[str] = None
review_history: List[ReviewEntry] = field(default_factory=list)
def is_final(self) -> bool:
return self.verdict in ("confirmed_true", "confirmed_false")
def set_verdict(self, by: str, verdict: str, note: str = "", evidence: Optional[List[Dict[str, Any]]] = None) -> None:
verdict = (verdict or "").strip()
if verdict not in ("unknown", "probable_true", "probable_false", "confirmed_true", "confirmed_false"):
verdict = "unknown"
self.verdict = verdict
# Keep backward-compatible boolean in sync:
# historically, confirmed=True meant "this statement is correct".
self.confirmed = verdict == "confirmed_true"
self.last_reviewed = _now()
if evidence:
try:
self.evidence.extend([e for e in evidence if isinstance(e, dict)])
except Exception:
pass
self.review_history.append(ReviewEntry(by, "set_verdict", self.last_reviewed, f"{verdict}: {note}".strip()))
def confirm(self, by: str, note: str = "") -> None:
self.confirmations += 1
self.set_verdict(by, "confirmed_true", note)
# Preserve historic action tag too
self.review_history.append(ReviewEntry(by, "confirm", self.last_reviewed, note))
def reject(self, by: str, note: str = "") -> None:
self.rejections += 1
self.set_verdict(by, "confirmed_false", note)
self.review_history.append(ReviewEntry(by, "reject", self.last_reviewed, note))
def score(self) -> float:
"""Confidence-Score aus Korrekturhistorie."""
# verdict-first scoring (explicit, non-binary)
if self.verdict == "confirmed_true":
return 1.0
if self.verdict == "confirmed_false":
return 0.0
if self.verdict == "probable_true":
return 0.75
if self.verdict == "probable_false":
return 0.25
total = self.confirmations + self.rejections
if total == 0:
return 0.5 # Unbestimmt
return self.confirmations / total
def to_dict(self) -> dict:
# Backwards/robustness: older code paths may have appended raw dicts.
review_history: List[dict] = []
for entry in self.review_history:
if isinstance(entry, dict):
review_history.append(entry)
else:
review_history.append(entry.to_dict())
return {
"verdict": self.verdict,
"evidence": self.evidence,
"confirmed": self.confirmed,
"confirmations": self.confirmations,
"rejections": self.rejections,
"last_reviewed": self.last_reviewed,
"review_history": review_history,
}
@classmethod
def from_dict(cls, d: dict) -> "Correctness":
c = cls()
verdict = d.get("verdict")
if isinstance(verdict, str) and verdict.strip():
c.verdict = verdict.strip()
c.confirmed = d.get("confirmed", False)
c.confirmations = d.get("confirmations", 0)
c.rejections = d.get("rejections", 0)
c.last_reviewed = d.get("last_reviewed")
ev = d.get("evidence", [])
if isinstance(ev, list):
c.evidence = [e for e in ev if isinstance(e, dict)]
c.review_history = [ReviewEntry.from_dict(r) for r in d.get("review_history", [])]
# Backfill verdict if missing/invalid.
if c.verdict not in ("unknown", "probable_true", "probable_false", "confirmed_true", "confirmed_false"):
if c.confirmed:
c.verdict = "confirmed_true"
elif c.rejections > 0:
c.verdict = "confirmed_false"
else:
c.verdict = "unknown"
# Ensure boolean stays consistent for older mixed data.
if c.verdict == "confirmed_true":
c.confirmed = True
elif c.verdict == "confirmed_false":
c.confirmed = False
return c
@dataclass
class Engram:
"""
Eine Gedächtniseinheit (Engramm).
Jedes Faktum, jede Beobachtung, jeder Fehler wird als Engramm gespeichert.
Es trägt seinen eigenen Vertrauenswert und seinen Korrekturverlauf mit.
"""
id: UUID
content: str
metadata: Dict[str, Any] = field(default_factory=dict)
correctness: Correctness = field(default_factory=Correctness)
links: List[UUID] = field(default_factory=list)
hierarchy: Dict[str, Any] = field(default_factory=dict)
embedding: Optional[List[float]] = None # Wird bei Bedarf berechnet
@classmethod
def create(
cls,
content: str,
source: str = "agent",
confidence: float = 0.5,
tags: Optional[List[str]] = None,
session_id: Optional[str] = None,
agent_id: Optional[str] = None,
grounding: Grounding = Grounding.ASSUMPTION,
parent: Optional[UUID] = None,
) -> "Engram":
"""Factory: Erstellt ein neues Engramm mit sinnvollen Defaults."""
now = _now()
return cls(
id=uuid4(),
content=content,
metadata={
"source": source,
"confidence": confidence,
"created": now,
"modified": now,
"access_count": 0,
"last_accessed": now,
"tags": tags or [],
"session_id": session_id,
"agent_id": agent_id,
"grounding": grounding.value,
"hash": _hash(content),
},
correctness=Correctness(),
links=[],
hierarchy={"parent": str(parent) if parent else None, "children": [], "depth": 0},
)
def touch(self) -> None:
"""Markiert Zugriff, aktualisiert Zähler und Zeit."""
self.metadata["access_count"] = self.metadata.get("access_count", 0) + 1
self.metadata["last_accessed"] = _now()
def add_link(self, other: "Engram") -> None:
"""Bidirektionale Verknüpfung mit anderem Engramm."""
if other.id not in self.links:
self.links.append(other.id)
if self.id not in other.links:
other.links.append(self.id)
def set_parent(self, parent: "Engram") -> None:
"""Setzt Eltern-Kind-Beziehung."""
self.hierarchy["parent"] = str(parent.id)
self.hierarchy["depth"] = parent.hierarchy.get("depth", 0) + 1
if str(self.id) not in parent.hierarchy.get("children", []):
parent.hierarchy.setdefault("children", []).append(str(self.id))
def compute_confidence(self) -> float:
"""
Berechnet Gesamt-Confidence aus mehreren Faktoren.
Kein Neuronales Netz nötig - Heuristik für Phase 1.
"""
# Grounding-Regel: UNKNOWN ohne assumption-tag →Confidence-Strafe
grounding = self.metadata.get("grounding", 0)
if grounding == Grounding.UNKNOWN.value and "assumption" not in self.metadata.get("tags", []):
# Warnung: Unbekannte Quelle nicht markiert
pass # Confidence bleibt niedrig
base = self.metadata.get("confidence", 0.5)
# Korrektheit
correctness_score = self.correctness.score()
# Zugriffshäufigkeit (beliebte Engramme sind oft wichtiger)
access = min(self.metadata.get("access_count", 0) / 10, 1.0) * 0.1
# Alter (neuere Informationen sind relevanter)
age_days = _age_days(self.metadata.get("created", _now()))
recency = max(0, 1.0 - (age_days / 30)) * 0.1 # Nach 30 Tagen = 0
# Grounding
grounding_boost = (grounding / 4) * 0.2
combined = (
base * 0.3 +
correctness_score * 0.3 +
access +
recency +
grounding_boost
)
return min(max(combined, 0.0), 1.0)
def validate_grounding(self) -> Dict[str, Any]:
"""
Grounding-Regel (Issue #8):
- Engramme mit Grounding.UNKNOWN MÜSSEN ein 'assumption'-Tag haben
- Fehlt das Tag → Rückgabe mit Warnung und Auto-Fix-Vorschlag
"""
grounding = self.metadata.get("grounding", Grounding.UNKNOWN.value)
tags = self.metadata.get("tags", [])
if grounding == Grounding.UNKNOWN.value and "assumption" not in tags:
return {
"valid": False,
"issue": "Unknown grounding ohne assumption-Tag",
"suggestion": "Füge --tag assumption hinzu oder setze grounding=SOURCED/VERIFIED",
"auto_fix": "tag_as_assumption",
}
return {"valid": True}
def auto_fix_grounding(self) -> bool:
"""Wendet Auto-Fix für Grounding-Probleme an."""
validation = self.validate_grounding()
if not validation["valid"] and validation.get("auto_fix") == "tag_as_assumption":
tags = self.metadata.get("tags", [])
if "assumption" not in tags:
tags.append("assumption")
self.metadata["tags"] = tags
self.metadata["grounding"] = Grounding.ASSUMPTION.value
return True
return False
def to_dict(self) -> dict:
return {
"id": str(self.id),
"content": self.content,
"metadata": self.metadata,
"correctness": self.correctness.to_dict(),
"links": [str(l) for l in self.links],
"hierarchy": self.hierarchy,
"embedding": self.embedding,
}
@classmethod
def from_dict(cls, d: dict) -> "Engram":
e = cls(
id=UUID(d["id"]),
content=d["content"],
metadata=d.get("metadata", {}),
correctness=Correctness.from_dict(d.get("correctness", {})),
links=[UUID(l) for l in d.get("links", [])],
hierarchy=d.get("hierarchy", {}),
embedding=d.get("embedding"),
)
return e
def to_json(self) -> str:
return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
@classmethod
def from_json(cls, s: str) -> "Engram":
return cls.from_dict(json.loads(s))
# --- Helpers ---
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
def _hash(content: str) -> str:
return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
def _age_days(iso_str: str) -> float:
try:
dt = datetime.fromisoformat(iso_str)
return (datetime.now(timezone.utc) - dt).total_seconds() / 86400
except Exception:
return 0.0