329 lines
12 KiB
Python
329 lines
12 KiB
Python
"""
|
|
Engram - Gedächtniseinheit für das Second Brain.
|
|
Rein Python, kein externe Abhängigkeiten.
|
|
"""
|
|
|
|
import json
|
|
import hashlib
|
|
from dataclasses import dataclass, field, asdict
|
|
from datetime import datetime, timezone
|
|
from enum import IntEnum
|
|
from typing import Optional, List, Dict, Any
|
|
from uuid import uuid4, UUID
|
|
|
|
|
|
class Grounding(IntEnum):
|
|
"""Herkunft/Verlässlichkeit einer Information."""
|
|
UNKNOWN = 0
|
|
ASSUMPTION = 1
|
|
INFERRED = 2
|
|
SOURCED = 3
|
|
VERIFIED = 4
|
|
|
|
|
|
@dataclass
|
|
class ReviewEntry:
|
|
"""Ein Eintrag im Korrekturverlauf."""
|
|
by: str # "user" oder agent_id
|
|
action: str # "confirm", "reject", "modify"
|
|
at: str # ISO-8601 timestamp
|
|
note: str = ""
|
|
|
|
def to_dict(self) -> dict:
|
|
return {"by": self.by, "action": self.action, "at": self.at, "note": self.note}
|
|
|
|
@classmethod
|
|
def from_dict(cls, d: dict) -> "ReviewEntry":
|
|
return cls(d["by"], d["action"], d["at"], d.get("note", ""))
|
|
|
|
|
|
@dataclass
|
|
class Correctness:
|
|
"""Verfolgt die Korrektheit eines Engramms über Zeit."""
|
|
# verdict model (not only binary confirm/reject)
|
|
# Values:
|
|
# - unknown
|
|
# - probable_true / probable_false
|
|
# - confirmed_true / confirmed_false
|
|
verdict: str = "unknown"
|
|
evidence: List[Dict[str, Any]] = field(default_factory=list)
|
|
confirmed: bool = False
|
|
confirmations: int = 0
|
|
rejections: int = 0
|
|
last_reviewed: Optional[str] = None
|
|
review_history: List[ReviewEntry] = field(default_factory=list)
|
|
|
|
def is_final(self) -> bool:
|
|
return self.verdict in ("confirmed_true", "confirmed_false")
|
|
|
|
def set_verdict(self, by: str, verdict: str, note: str = "", evidence: Optional[List[Dict[str, Any]]] = None) -> None:
|
|
verdict = (verdict or "").strip()
|
|
if verdict not in ("unknown", "probable_true", "probable_false", "confirmed_true", "confirmed_false"):
|
|
verdict = "unknown"
|
|
self.verdict = verdict
|
|
# Keep backward-compatible boolean in sync:
|
|
# historically, confirmed=True meant "this statement is correct".
|
|
self.confirmed = verdict == "confirmed_true"
|
|
self.last_reviewed = _now()
|
|
if evidence:
|
|
try:
|
|
self.evidence.extend([e for e in evidence if isinstance(e, dict)])
|
|
except Exception:
|
|
pass
|
|
self.review_history.append(ReviewEntry(by, "set_verdict", self.last_reviewed, f"{verdict}: {note}".strip()))
|
|
|
|
def confirm(self, by: str, note: str = "") -> None:
|
|
self.confirmations += 1
|
|
self.set_verdict(by, "confirmed_true", note)
|
|
# Preserve historic action tag too
|
|
self.review_history.append(ReviewEntry(by, "confirm", self.last_reviewed, note))
|
|
|
|
def reject(self, by: str, note: str = "") -> None:
|
|
self.rejections += 1
|
|
self.set_verdict(by, "confirmed_false", note)
|
|
self.review_history.append(ReviewEntry(by, "reject", self.last_reviewed, note))
|
|
|
|
def score(self) -> float:
|
|
"""Confidence-Score aus Korrekturhistorie."""
|
|
# verdict-first scoring (explicit, non-binary)
|
|
if self.verdict == "confirmed_true":
|
|
return 1.0
|
|
if self.verdict == "confirmed_false":
|
|
return 0.0
|
|
if self.verdict == "probable_true":
|
|
return 0.75
|
|
if self.verdict == "probable_false":
|
|
return 0.25
|
|
total = self.confirmations + self.rejections
|
|
if total == 0:
|
|
return 0.5 # Unbestimmt
|
|
return self.confirmations / total
|
|
|
|
def to_dict(self) -> dict:
|
|
# Backwards/robustness: older code paths may have appended raw dicts.
|
|
review_history: List[dict] = []
|
|
for entry in self.review_history:
|
|
if isinstance(entry, dict):
|
|
review_history.append(entry)
|
|
else:
|
|
review_history.append(entry.to_dict())
|
|
return {
|
|
"verdict": self.verdict,
|
|
"evidence": self.evidence,
|
|
"confirmed": self.confirmed,
|
|
"confirmations": self.confirmations,
|
|
"rejections": self.rejections,
|
|
"last_reviewed": self.last_reviewed,
|
|
"review_history": review_history,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, d: dict) -> "Correctness":
|
|
c = cls()
|
|
verdict = d.get("verdict")
|
|
if isinstance(verdict, str) and verdict.strip():
|
|
c.verdict = verdict.strip()
|
|
c.confirmed = d.get("confirmed", False)
|
|
c.confirmations = d.get("confirmations", 0)
|
|
c.rejections = d.get("rejections", 0)
|
|
c.last_reviewed = d.get("last_reviewed")
|
|
ev = d.get("evidence", [])
|
|
if isinstance(ev, list):
|
|
c.evidence = [e for e in ev if isinstance(e, dict)]
|
|
c.review_history = [ReviewEntry.from_dict(r) for r in d.get("review_history", [])]
|
|
# Backfill verdict if missing/invalid.
|
|
if c.verdict not in ("unknown", "probable_true", "probable_false", "confirmed_true", "confirmed_false"):
|
|
if c.confirmed:
|
|
c.verdict = "confirmed_true"
|
|
elif c.rejections > 0:
|
|
c.verdict = "confirmed_false"
|
|
else:
|
|
c.verdict = "unknown"
|
|
# Ensure boolean stays consistent for older mixed data.
|
|
if c.verdict == "confirmed_true":
|
|
c.confirmed = True
|
|
elif c.verdict == "confirmed_false":
|
|
c.confirmed = False
|
|
return c
|
|
|
|
|
|
@dataclass
|
|
class Engram:
|
|
"""
|
|
Eine Gedächtniseinheit (Engramm).
|
|
|
|
Jedes Faktum, jede Beobachtung, jeder Fehler wird als Engramm gespeichert.
|
|
Es trägt seinen eigenen Vertrauenswert und seinen Korrekturverlauf mit.
|
|
"""
|
|
id: UUID
|
|
content: str
|
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
correctness: Correctness = field(default_factory=Correctness)
|
|
links: List[UUID] = field(default_factory=list)
|
|
hierarchy: Dict[str, Any] = field(default_factory=dict)
|
|
embedding: Optional[List[float]] = None # Wird bei Bedarf berechnet
|
|
|
|
@classmethod
|
|
def create(
|
|
cls,
|
|
content: str,
|
|
source: str = "agent",
|
|
confidence: float = 0.5,
|
|
tags: Optional[List[str]] = None,
|
|
session_id: Optional[str] = None,
|
|
agent_id: Optional[str] = None,
|
|
grounding: Grounding = Grounding.ASSUMPTION,
|
|
parent: Optional[UUID] = None,
|
|
) -> "Engram":
|
|
"""Factory: Erstellt ein neues Engramm mit sinnvollen Defaults."""
|
|
now = _now()
|
|
return cls(
|
|
id=uuid4(),
|
|
content=content,
|
|
metadata={
|
|
"source": source,
|
|
"confidence": confidence,
|
|
"created": now,
|
|
"modified": now,
|
|
"access_count": 0,
|
|
"last_accessed": now,
|
|
"tags": tags or [],
|
|
"session_id": session_id,
|
|
"agent_id": agent_id,
|
|
"grounding": grounding.value,
|
|
"hash": _hash(content),
|
|
},
|
|
correctness=Correctness(),
|
|
links=[],
|
|
hierarchy={"parent": str(parent) if parent else None, "children": [], "depth": 0},
|
|
)
|
|
|
|
def touch(self) -> None:
|
|
"""Markiert Zugriff, aktualisiert Zähler und Zeit."""
|
|
self.metadata["access_count"] = self.metadata.get("access_count", 0) + 1
|
|
self.metadata["last_accessed"] = _now()
|
|
|
|
def add_link(self, other: "Engram") -> None:
|
|
"""Bidirektionale Verknüpfung mit anderem Engramm."""
|
|
if other.id not in self.links:
|
|
self.links.append(other.id)
|
|
if self.id not in other.links:
|
|
other.links.append(self.id)
|
|
|
|
def set_parent(self, parent: "Engram") -> None:
|
|
"""Setzt Eltern-Kind-Beziehung."""
|
|
self.hierarchy["parent"] = str(parent.id)
|
|
self.hierarchy["depth"] = parent.hierarchy.get("depth", 0) + 1
|
|
if str(self.id) not in parent.hierarchy.get("children", []):
|
|
parent.hierarchy.setdefault("children", []).append(str(self.id))
|
|
|
|
def compute_confidence(self) -> float:
|
|
"""
|
|
Berechnet Gesamt-Confidence aus mehreren Faktoren.
|
|
Kein Neuronales Netz nötig - Heuristik für Phase 1.
|
|
"""
|
|
# Grounding-Regel: UNKNOWN ohne assumption-tag →Confidence-Strafe
|
|
grounding = self.metadata.get("grounding", 0)
|
|
if grounding == Grounding.UNKNOWN.value and "assumption" not in self.metadata.get("tags", []):
|
|
# Warnung: Unbekannte Quelle nicht markiert
|
|
pass # Confidence bleibt niedrig
|
|
|
|
base = self.metadata.get("confidence", 0.5)
|
|
# Korrektheit
|
|
correctness_score = self.correctness.score()
|
|
# Zugriffshäufigkeit (beliebte Engramme sind oft wichtiger)
|
|
access = min(self.metadata.get("access_count", 0) / 10, 1.0) * 0.1
|
|
# Alter (neuere Informationen sind relevanter)
|
|
age_days = _age_days(self.metadata.get("created", _now()))
|
|
recency = max(0, 1.0 - (age_days / 30)) * 0.1 # Nach 30 Tagen = 0
|
|
# Grounding
|
|
grounding_boost = (grounding / 4) * 0.2
|
|
|
|
combined = (
|
|
base * 0.3 +
|
|
correctness_score * 0.3 +
|
|
access +
|
|
recency +
|
|
grounding_boost
|
|
)
|
|
return min(max(combined, 0.0), 1.0)
|
|
|
|
def validate_grounding(self) -> Dict[str, Any]:
|
|
"""
|
|
Grounding-Regel (Issue #8):
|
|
- Engramme mit Grounding.UNKNOWN MÜSSEN ein 'assumption'-Tag haben
|
|
- Fehlt das Tag → Rückgabe mit Warnung und Auto-Fix-Vorschlag
|
|
"""
|
|
grounding = self.metadata.get("grounding", Grounding.UNKNOWN.value)
|
|
tags = self.metadata.get("tags", [])
|
|
|
|
if grounding == Grounding.UNKNOWN.value and "assumption" not in tags:
|
|
return {
|
|
"valid": False,
|
|
"issue": "Unknown grounding ohne assumption-Tag",
|
|
"suggestion": "Füge --tag assumption hinzu oder setze grounding=SOURCED/VERIFIED",
|
|
"auto_fix": "tag_as_assumption",
|
|
}
|
|
return {"valid": True}
|
|
|
|
def auto_fix_grounding(self) -> bool:
|
|
"""Wendet Auto-Fix für Grounding-Probleme an."""
|
|
validation = self.validate_grounding()
|
|
if not validation["valid"] and validation.get("auto_fix") == "tag_as_assumption":
|
|
tags = self.metadata.get("tags", [])
|
|
if "assumption" not in tags:
|
|
tags.append("assumption")
|
|
self.metadata["tags"] = tags
|
|
self.metadata["grounding"] = Grounding.ASSUMPTION.value
|
|
return True
|
|
return False
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"id": str(self.id),
|
|
"content": self.content,
|
|
"metadata": self.metadata,
|
|
"correctness": self.correctness.to_dict(),
|
|
"links": [str(l) for l in self.links],
|
|
"hierarchy": self.hierarchy,
|
|
"embedding": self.embedding,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, d: dict) -> "Engram":
|
|
e = cls(
|
|
id=UUID(d["id"]),
|
|
content=d["content"],
|
|
metadata=d.get("metadata", {}),
|
|
correctness=Correctness.from_dict(d.get("correctness", {})),
|
|
links=[UUID(l) for l in d.get("links", [])],
|
|
hierarchy=d.get("hierarchy", {}),
|
|
embedding=d.get("embedding"),
|
|
)
|
|
return e
|
|
|
|
def to_json(self) -> str:
|
|
return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
|
|
|
|
@classmethod
|
|
def from_json(cls, s: str) -> "Engram":
|
|
return cls.from_dict(json.loads(s))
|
|
|
|
|
|
# --- Helpers ---
|
|
|
|
def _now() -> str:
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
def _hash(content: str) -> str:
|
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
|
|
|
|
|
|
def _age_days(iso_str: str) -> float:
|
|
try:
|
|
dt = datetime.fromisoformat(iso_str)
|
|
return (datetime.now(timezone.utc) - dt).total_seconds() / 86400
|
|
except Exception:
|
|
return 0.0
|