""" Engram - Gedächtniseinheit für das Second Brain. Rein Python, kein externe Abhängigkeiten. """ import json import hashlib from dataclasses import dataclass, field, asdict from datetime import datetime, timezone from enum import IntEnum from typing import Optional, List, Dict, Any from uuid import uuid4, UUID class Grounding(IntEnum): """Herkunft/Verlässlichkeit einer Information.""" UNKNOWN = 0 ASSUMPTION = 1 INFERRED = 2 SOURCED = 3 VERIFIED = 4 @dataclass class ReviewEntry: """Ein Eintrag im Korrekturverlauf.""" by: str # "user" oder agent_id action: str # "confirm", "reject", "modify" at: str # ISO-8601 timestamp note: str = "" def to_dict(self) -> dict: return {"by": self.by, "action": self.action, "at": self.at, "note": self.note} @classmethod def from_dict(cls, d: dict) -> "ReviewEntry": return cls(d["by"], d["action"], d["at"], d.get("note", "")) @dataclass class Correctness: """Verfolgt die Korrektheit eines Engramms über Zeit.""" # verdict model (not only binary confirm/reject) # Values: # - unknown # - probable_true / probable_false # - confirmed_true / confirmed_false verdict: str = "unknown" evidence: List[Dict[str, Any]] = field(default_factory=list) confirmed: bool = False confirmations: int = 0 rejections: int = 0 last_reviewed: Optional[str] = None review_history: List[ReviewEntry] = field(default_factory=list) def is_final(self) -> bool: return self.verdict in ("confirmed_true", "confirmed_false") def set_verdict(self, by: str, verdict: str, note: str = "", evidence: Optional[List[Dict[str, Any]]] = None) -> None: verdict = (verdict or "").strip() if verdict not in ("unknown", "probable_true", "probable_false", "confirmed_true", "confirmed_false"): verdict = "unknown" self.verdict = verdict # Keep backward-compatible boolean in sync: # historically, confirmed=True meant "this statement is correct". self.confirmed = verdict == "confirmed_true" self.last_reviewed = _now() if evidence: try: self.evidence.extend([e for e in evidence if isinstance(e, dict)]) except Exception: pass self.review_history.append(ReviewEntry(by, "set_verdict", self.last_reviewed, f"{verdict}: {note}".strip())) def confirm(self, by: str, note: str = "") -> None: self.confirmations += 1 self.set_verdict(by, "confirmed_true", note) # Preserve historic action tag too self.review_history.append(ReviewEntry(by, "confirm", self.last_reviewed, note)) def reject(self, by: str, note: str = "") -> None: self.rejections += 1 self.set_verdict(by, "confirmed_false", note) self.review_history.append(ReviewEntry(by, "reject", self.last_reviewed, note)) def score(self) -> float: """Confidence-Score aus Korrekturhistorie.""" # verdict-first scoring (explicit, non-binary) if self.verdict == "confirmed_true": return 1.0 if self.verdict == "confirmed_false": return 0.0 if self.verdict == "probable_true": return 0.75 if self.verdict == "probable_false": return 0.25 total = self.confirmations + self.rejections if total == 0: return 0.5 # Unbestimmt return self.confirmations / total def to_dict(self) -> dict: # Backwards/robustness: older code paths may have appended raw dicts. review_history: List[dict] = [] for entry in self.review_history: if isinstance(entry, dict): review_history.append(entry) else: review_history.append(entry.to_dict()) return { "verdict": self.verdict, "evidence": self.evidence, "confirmed": self.confirmed, "confirmations": self.confirmations, "rejections": self.rejections, "last_reviewed": self.last_reviewed, "review_history": review_history, } @classmethod def from_dict(cls, d: dict) -> "Correctness": c = cls() verdict = d.get("verdict") if isinstance(verdict, str) and verdict.strip(): c.verdict = verdict.strip() c.confirmed = d.get("confirmed", False) c.confirmations = d.get("confirmations", 0) c.rejections = d.get("rejections", 0) c.last_reviewed = d.get("last_reviewed") ev = d.get("evidence", []) if isinstance(ev, list): c.evidence = [e for e in ev if isinstance(e, dict)] c.review_history = [ReviewEntry.from_dict(r) for r in d.get("review_history", [])] # Backfill verdict if missing/invalid. if c.verdict not in ("unknown", "probable_true", "probable_false", "confirmed_true", "confirmed_false"): if c.confirmed: c.verdict = "confirmed_true" elif c.rejections > 0: c.verdict = "confirmed_false" else: c.verdict = "unknown" # Ensure boolean stays consistent for older mixed data. if c.verdict == "confirmed_true": c.confirmed = True elif c.verdict == "confirmed_false": c.confirmed = False return c @dataclass class Engram: """ Eine Gedächtniseinheit (Engramm). Jedes Faktum, jede Beobachtung, jeder Fehler wird als Engramm gespeichert. Es trägt seinen eigenen Vertrauenswert und seinen Korrekturverlauf mit. """ id: UUID content: str metadata: Dict[str, Any] = field(default_factory=dict) correctness: Correctness = field(default_factory=Correctness) links: List[UUID] = field(default_factory=list) hierarchy: Dict[str, Any] = field(default_factory=dict) embedding: Optional[List[float]] = None # Wird bei Bedarf berechnet @classmethod def create( cls, content: str, source: str = "agent", confidence: float = 0.5, tags: Optional[List[str]] = None, session_id: Optional[str] = None, agent_id: Optional[str] = None, grounding: Grounding = Grounding.ASSUMPTION, parent: Optional[UUID] = None, ) -> "Engram": """Factory: Erstellt ein neues Engramm mit sinnvollen Defaults.""" now = _now() return cls( id=uuid4(), content=content, metadata={ "source": source, "confidence": confidence, "created": now, "modified": now, "access_count": 0, "last_accessed": now, "tags": tags or [], "session_id": session_id, "agent_id": agent_id, "grounding": grounding.value, "hash": _hash(content), }, correctness=Correctness(), links=[], hierarchy={"parent": str(parent) if parent else None, "children": [], "depth": 0}, ) def touch(self) -> None: """Markiert Zugriff, aktualisiert Zähler und Zeit.""" self.metadata["access_count"] = self.metadata.get("access_count", 0) + 1 self.metadata["last_accessed"] = _now() def add_link(self, other: "Engram") -> None: """Bidirektionale Verknüpfung mit anderem Engramm.""" if other.id not in self.links: self.links.append(other.id) if self.id not in other.links: other.links.append(self.id) def set_parent(self, parent: "Engram") -> None: """Setzt Eltern-Kind-Beziehung.""" self.hierarchy["parent"] = str(parent.id) self.hierarchy["depth"] = parent.hierarchy.get("depth", 0) + 1 if str(self.id) not in parent.hierarchy.get("children", []): parent.hierarchy.setdefault("children", []).append(str(self.id)) def compute_confidence(self) -> float: """ Berechnet Gesamt-Confidence aus mehreren Faktoren. Kein Neuronales Netz nötig - Heuristik für Phase 1. """ # Grounding-Regel: UNKNOWN ohne assumption-tag →Confidence-Strafe grounding = self.metadata.get("grounding", 0) if grounding == Grounding.UNKNOWN.value and "assumption" not in self.metadata.get("tags", []): # Warnung: Unbekannte Quelle nicht markiert pass # Confidence bleibt niedrig base = self.metadata.get("confidence", 0.5) # Korrektheit correctness_score = self.correctness.score() # Zugriffshäufigkeit (beliebte Engramme sind oft wichtiger) access = min(self.metadata.get("access_count", 0) / 10, 1.0) * 0.1 # Alter (neuere Informationen sind relevanter) age_days = _age_days(self.metadata.get("created", _now())) recency = max(0, 1.0 - (age_days / 30)) * 0.1 # Nach 30 Tagen = 0 # Grounding grounding_boost = (grounding / 4) * 0.2 combined = ( base * 0.3 + correctness_score * 0.3 + access + recency + grounding_boost ) return min(max(combined, 0.0), 1.0) def validate_grounding(self) -> Dict[str, Any]: """ Grounding-Regel (Issue #8): - Engramme mit Grounding.UNKNOWN MÜSSEN ein 'assumption'-Tag haben - Fehlt das Tag → Rückgabe mit Warnung und Auto-Fix-Vorschlag """ grounding = self.metadata.get("grounding", Grounding.UNKNOWN.value) tags = self.metadata.get("tags", []) if grounding == Grounding.UNKNOWN.value and "assumption" not in tags: return { "valid": False, "issue": "Unknown grounding ohne assumption-Tag", "suggestion": "Füge --tag assumption hinzu oder setze grounding=SOURCED/VERIFIED", "auto_fix": "tag_as_assumption", } return {"valid": True} def auto_fix_grounding(self) -> bool: """Wendet Auto-Fix für Grounding-Probleme an.""" validation = self.validate_grounding() if not validation["valid"] and validation.get("auto_fix") == "tag_as_assumption": tags = self.metadata.get("tags", []) if "assumption" not in tags: tags.append("assumption") self.metadata["tags"] = tags self.metadata["grounding"] = Grounding.ASSUMPTION.value return True return False def to_dict(self) -> dict: return { "id": str(self.id), "content": self.content, "metadata": self.metadata, "correctness": self.correctness.to_dict(), "links": [str(l) for l in self.links], "hierarchy": self.hierarchy, "embedding": self.embedding, } @classmethod def from_dict(cls, d: dict) -> "Engram": e = cls( id=UUID(d["id"]), content=d["content"], metadata=d.get("metadata", {}), correctness=Correctness.from_dict(d.get("correctness", {})), links=[UUID(l) for l in d.get("links", [])], hierarchy=d.get("hierarchy", {}), embedding=d.get("embedding"), ) return e def to_json(self) -> str: return json.dumps(self.to_dict(), ensure_ascii=False, indent=2) @classmethod def from_json(cls, s: str) -> "Engram": return cls.from_dict(json.loads(s)) # --- Helpers --- def _now() -> str: return datetime.now(timezone.utc).isoformat() def _hash(content: str) -> str: return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16] def _age_days(iso_str: str) -> float: try: dt = datetime.fromisoformat(iso_str) return (datetime.now(timezone.utc) - dt).total_seconds() / 86400 except Exception: return 0.0