Files
second-brain/src/loop_detector.py

116 lines
3.5 KiB
Python

"""
loop_detector.py - Session-Cache mit SHA256-Dedup.
Erkennt und bricht Loops bei wiederholten Anfragen/Antworten.
"""
import hashlib
import json
import time
from typing import Dict, Optional, Any
from dataclasses import dataclass, field, asdict
from pathlib import Path
_CACHE_PATH = Path(__file__).resolve().parent.parent / "data" / "loop_cache.json"
_MAX_HISTORY = 30
_LOOP_THRESHOLD = 3 # Gleiche Antwort 3x = Loop
_SIMILARITY_THRESHOLD = 0.92
def _sha(text: str) -> str:
return hashlib.sha256(text.encode("utf-8")).hexdigest()[:16]
def _normalize(text: str) -> str:
"""Entfernt Variationen für besseren Vergleich."""
return " ".join(text.lower().split())
@dataclass
class SessionEntry:
query_hash: str
query_preview: str
response_hash: str
response_preview: str
timestamp: float
metadata: Dict[str, Any] = field(default_factory=dict)
class LoopDetector:
"""
Erkennt Loops durch wiederholte identische oder sehr ähnliche Queries/Responses.
"""
def __init__(self, cache_path: Optional[str] = None):
self.path = Path(cache_path) if cache_path else _CACHE_PATH
self.path.parent.mkdir(parents=True, exist_ok=True)
self._history: list = []
self._load()
def _load(self):
if self.path.exists():
try:
with open(self.path, "r", encoding="utf-8") as f:
self._history = json.load(f)
except Exception:
self._history = []
def _save(self):
with open(self.path, "w", encoding="utf-8") as f:
json.dump(self._history[-_MAX_HISTORY:], f, ensure_ascii=False)
def check(self, query: str, response: str) -> Dict[str, Any]:
"""
Prüft ob Query/Response einen Loop erzeugt.
Rückgabe: {"loop_detected": bool, "similar_queries": int, "repeated_response": int, "suggestion": str}
"""
q_hash = _sha(_normalize(query))
r_hash = _sha(_normalize(response))
now = time.time()
similar_queries = 0
repeated_response = 0
for entry in self._history:
# Query ähnlich?
if entry.get("query_hash") == q_hash:
similar_queries += 1
# Response identisch?
if entry.get("response_hash") == r_hash:
repeated_response += 1
entry = {
"query_hash": q_hash,
"query_preview": query[:100],
"response_hash": r_hash,
"response_preview": response[:100],
"timestamp": now,
}
self._history.append(entry)
self._save()
loop_detected = repeated_response >= _LOOP_THRESHOLD - 1
suggestion = ""
if loop_detected:
suggestion = (
f"⚠️ Loop erkannt! Diese Antwort wurde {repeated_response}x wiederholt. "
"Versuch eine alternative Herangehensweise oder frage nach Klärung."
)
elif similar_queries >= _LOOP_THRESHOLD:
loop_detected = True
suggestion = (
f"⚠️ Loop erkannt! Ähnliche Anfrage {similar_queries}x gestellt. "
"Prüfe ob die Aufgabe sich geändert hat oder ob ein Problem blockiert."
)
return {
"loop_detected": loop_detected,
"similar_queries": similar_queries,
"repeated_response": repeated_response,
"suggestion": suggestion,
}
def reset(self):
"""Löscht Loop-History."""
self._history = []
self._save()