From 6d99c520e6cbc63acfeffe6cf51982ddfbc292ae Mon Sep 17 00:00:00 2001
From: Otto <otto@it-s-center.de>
Date: Fri, 29 May 2026 11:30:24 +0200
Subject: [PATCH] feat: add verdict/evidence verification model

---
 cron_tasks/verify_pending_external.py |  36 +------
 fastapi_app.py                        | 145 +++++++++++++++++++++++---
 src/engram.py                         |  63 ++++++++++-
 static/style.css                      |  18 ++++
 templates/dashboard.html              |  16 +++
 5 files changed, 227 insertions(+), 51 deletions(-)

diff --git a/cron_tasks/verify_pending_external.py b/cron_tasks/verify_pending_external.py
index b1ee909..24c11a1 100755
--- a/cron_tasks/verify_pending_external.py
+++ b/cron_tasks/verify_pending_external.py
@@ -78,7 +78,7 @@ def main() -> int:
     pending = [
         eg
         for eg in all_egs
-        if (not eg.correctness.confirmed and eg.correctness.rejections == 0)
+        if (not eg.correctness.is_final())
     ]
 
     confirmed = 0
@@ -94,16 +94,7 @@ def main() -> int:
         if src == "session" and (
             content.startswith("Session Summary (sess_") or content.startswith("Please remember ")
         ):
-            eg.correctness.rejections += 1
-            eg.correctness.last_reviewed = _now()
-            eg.correctness.review_history.append(
-                ReviewEntry(
-                    by="verify-pending",
-                    action="reject",
-                    at=_now(),
-                    note="Auto-reject: session placeholder",
-                )
-            )
+            eg.correctness.reject("verify-pending", "Auto-reject: session placeholder")
             store.save(eg)
             rejected += 1
             continue
@@ -118,30 +109,11 @@ def main() -> int:
                 still_pending += 1
                 continue
             if 200 <= status < 300:
-                eg.correctness.confirmed = True
-                eg.correctness.confirmations += 1
-                eg.correctness.last_reviewed = _now()
-                eg.correctness.review_history.append(
-                    ReviewEntry(
-                        by="verify-pending",
-                        action="confirm",
-                        at=_now(),
-                        note=f"Auto-confirm: web url ok ({status}) {url}",
-                    )
-                )
+                eg.correctness.confirm("verify-pending", f"Auto-confirm: web url ok ({status}) {url}")
                 store.save(eg)
                 confirmed += 1
             else:
-                eg.correctness.rejections += 1
-                eg.correctness.last_reviewed = _now()
-                eg.correctness.review_history.append(
-                    ReviewEntry(
-                        by="verify-pending",
-                        action="reject",
-                        at=_now(),
-                        note=f"Auto-reject: web url status={status} {url}",
-                    )
-                )
+                eg.correctness.reject("verify-pending", f"Auto-reject: web url status={status} {url}")
                 store.save(eg)
                 rejected += 1
             continue
diff --git a/fastapi_app.py b/fastapi_app.py
index 098714e..b5feed6 100644
--- a/fastapi_app.py
+++ b/fastapi_app.py
@@ -54,11 +54,22 @@ def get_db():
 def parse_engram(row: sqlite3.Row) -> dict:
     meta = json.loads(row["metadata_json"] or "{}")
     correctness = json.loads(row["correctness_json"] or "{}")
+    verdict = correctness.get("verdict")
+    if not isinstance(verdict, str) or not verdict:
+        # Back-compat inference for older rows
+        if correctness.get("confirmed", False):
+            verdict = "confirmed_true"
+        elif int(correctness.get("rejections", 0) or 0) > 0:
+            verdict = "confirmed_false"
+        else:
+            verdict = "unknown"
     return {
         "id": row["id"],
         "content": row["content"],
         "confidence": meta.get("confidence", 0.0),
         "confirmed": correctness.get("confirmed", False),
+        "verdict": verdict,
+        "evidence": correctness.get("evidence", []),
         "confirmations": correctness.get("confirmations", 0),
         "rejections": correctness.get("rejections", 0),
         "tags": meta.get("tags", []),
@@ -88,6 +99,8 @@ def _update_correctness(engram_id: str, *, action: str, reason: str | None = Non
         raise FileNotFoundError(f"Engram not found: {engram_id}")
 
     corr = json.loads(row["correctness_json"] or "{}")
+    corr.setdefault("verdict", None)
+    corr.setdefault("evidence", [])
     corr.setdefault("confirmed", False)
     corr.setdefault("confirmations", 0)
     corr.setdefault("rejections", 0)
@@ -106,10 +119,30 @@ def _update_correctness(engram_id: str, *, action: str, reason: str | None = Non
         corr["review_history"] = [entry]
 
     if action == "confirm":
+        corr["verdict"] = "confirmed_true"
         corr["confirmed"] = True
         corr["confirmations"] = int(corr.get("confirmations", 0) or 0) + 1
     elif action == "reject":
+        corr["verdict"] = "confirmed_false"
         corr["rejections"] = int(corr.get("rejections", 0) or 0) + 1
+        corr["confirmed"] = False
+
+    # Store minimal evidence for dashboard-driven actions.
+    try:
+        ev = corr.get("evidence")
+        if not isinstance(ev, list):
+            ev = []
+        ev.append(
+            {
+                "kind": "human",
+                "by": "dashboard",
+                "at": corr["last_reviewed"],
+                "action": action,
+            }
+        )
+        corr["evidence"] = ev[-50:]  # cap growth
+    except Exception:
+        pass
 
     c.execute(
         "UPDATE engrams SET correctness_json = ?, modified_at = ? WHERE id = ?",
@@ -232,8 +265,25 @@ def api_storage_stats():
     conn = get_db()
     c = conn.cursor()
     total = c.execute("SELECT COUNT(*) FROM engrams").fetchone()[0]
-    confirmed = c.execute(
-        "SELECT COUNT(*) FROM engrams WHERE json_extract(correctness_json, '$.confirmed') = 1"
+    confirmed_true = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_true'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL AND json_extract(correctness_json, '$.confirmed') = 1)
+        )
+        """
+    ).fetchone()[0]
+    confirmed_false = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_false'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL
+              AND json_extract(correctness_json, '$.confirmed') = 0
+              AND COALESCE(json_extract(correctness_json, '$.rejections'), 0) > 0)
+        )
+        """
     ).fetchone()[0]
     sources = {
         r[0]: r[1]
@@ -268,8 +318,9 @@ def api_storage_stats():
     return {
         "sql": {
             "total_engrams": total,
-            "confirmed": confirmed,
-            "pending": total - confirmed,
+            "confirmed": confirmed_true,
+            "rejected": confirmed_false,
+            "pending": total - confirmed_true - confirmed_false,
             "by_source": sources,
         },
         "vector": {
@@ -310,10 +361,27 @@ def api_insights(limit: int = Query(8, ge=1, le=50)):
         "SELECT id, metadata_json, correctness_json, created_at, modified_at FROM engrams ORDER BY created_at DESC LIMIT 2000"
     ).fetchall()
     total = c.execute("SELECT COUNT(*) FROM engrams").fetchone()[0]
-    confirmed = c.execute(
-        "SELECT COUNT(*) FROM engrams WHERE json_extract(correctness_json, '$.confirmed') = 1"
+    confirmed_true = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_true'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL AND json_extract(correctness_json, '$.confirmed') = 1)
+        )
+        """
     ).fetchone()[0]
-    pending = total - confirmed
+    confirmed_false = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_false'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL
+              AND json_extract(correctness_json, '$.confirmed') = 0
+              AND COALESCE(json_extract(correctness_json, '$.rejections'), 0) > 0)
+        )
+        """
+    ).fetchone()[0]
+    pending = total - confirmed_true - confirmed_false
 
     tag_counts: dict[str, int] = {}
     source_counts: dict[str, int] = {}
@@ -488,10 +556,27 @@ def api_stats():
     conn = get_db()
     c = conn.cursor()
     total = c.execute("SELECT COUNT(*) FROM engrams").fetchone()[0]
-    confirmed = c.execute(
-        "SELECT COUNT(*) FROM engrams WHERE json_extract(correctness_json, '$.confirmed') = 1"
+    confirmed_true = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_true'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL AND json_extract(correctness_json, '$.confirmed') = 1)
+        )
+        """
     ).fetchone()[0]
-    pending = total - confirmed
+    confirmed_false = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_false'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL
+              AND json_extract(correctness_json, '$.confirmed') = 0
+              AND COALESCE(json_extract(correctness_json, '$.rejections'), 0) > 0)
+        )
+        """
+    ).fetchone()[0]
+    pending = total - confirmed_true - confirmed_false
     errors = c.execute(
         "SELECT COUNT(*) FROM engrams WHERE json_extract(metadata_json, '$.tags') LIKE '%error%'"
     ).fetchone()[0]
@@ -501,7 +586,8 @@ def api_stats():
     conn.close()
     return {
         "total": total,
-        "confirmed": confirmed,
+        "confirmed": confirmed_true,
+        "rejected": confirmed_false,
         "pending": pending,
         "errors": errors,
         "avg_confidence": round(avg_conf, 2),
@@ -514,6 +600,7 @@ def api_engrams(
     offset: int = Query(0, ge=0),
     tag: str = Query(None),
     confirmed: bool = Query(None),
+    verdict: str = Query(None),
     search: str = Query(None),
     min_confidence: float = Query(0.0),
 ):
@@ -527,9 +614,30 @@ def api_engrams(
         params.append(f'%"{tag}"%')
 
     if confirmed is not None:
-        where_clauses.append(
-            f"json_extract(correctness_json, '$.confirmed') = {int(confirmed)}"
-        )
+        if confirmed:
+            # confirmed == statement is true (verdict confirmed_true)
+            where_clauses.append(
+                "("
+                "json_extract(correctness_json, '$.verdict') = 'confirmed_true' "
+                "OR (json_extract(correctness_json, '$.verdict') IS NULL AND json_extract(correctness_json, '$.confirmed') = 1)"
+                ")"
+            )
+        else:
+            # pending/unresolved (unknown/probable) but exclude confirmed_false.
+            where_clauses.append(
+                "("
+                "json_extract(correctness_json, '$.verdict') IN ('unknown','probable_true','probable_false') "
+                "OR (json_extract(correctness_json, '$.verdict') IS NULL "
+                "    AND json_extract(correctness_json, '$.confirmed') = 0 "
+                "    AND COALESCE(json_extract(correctness_json, '$.rejections'), 0) = 0)"
+                ")"
+            )
+
+    if verdict:
+        v = verdict.strip()
+        if v in ("unknown", "probable_true", "probable_false", "confirmed_true", "confirmed_false"):
+            where_clauses.append("json_extract(correctness_json, '$.verdict') = ?")
+            params.append(v)
 
     if search:
         # Use FTS
@@ -740,6 +848,8 @@ def api_create_engram(content: str = Form(...), tags: str = Form(""), source: st
         "hash": "",
     }
     correctness = {
+        "verdict": "unknown",
+        "evidence": [],
         "confirmed": False,
         "confirmations": 0,
         "rejections": 0,
@@ -767,7 +877,12 @@ def api_pending(limit: int = Query(20, ge=1, le=100), offset: int = Query(0, ge=
     rows = c.execute(
         """
         SELECT * FROM engrams
-        WHERE json_extract(correctness_json, '$.confirmed') = 0
+        WHERE (
+          json_extract(correctness_json, '$.verdict') IN ('unknown','probable_true','probable_false')
+          OR (json_extract(correctness_json, '$.verdict') IS NULL
+              AND json_extract(correctness_json, '$.confirmed') = 0
+              AND COALESCE(json_extract(correctness_json, '$.rejections'), 0) = 0)
+        )
         ORDER BY created_at DESC
         LIMIT ? OFFSET ?
         """,
diff --git a/src/engram.py b/src/engram.py
index 8eabbb8..8bd59e6 100644
--- a/src/engram.py
+++ b/src/engram.py
@@ -40,26 +40,60 @@ class ReviewEntry:
 @dataclass
 class Correctness:
     """Verfolgt die Korrektheit eines Engramms über Zeit."""
+    # verdict model (not only binary confirm/reject)
+    # Values:
+    # - unknown
+    # - probable_true / probable_false
+    # - confirmed_true / confirmed_false
+    verdict: str = "unknown"
+    evidence: List[Dict[str, Any]] = field(default_factory=list)
     confirmed: bool = False
     confirmations: int = 0
     rejections: int = 0
     last_reviewed: Optional[str] = None
     review_history: List[ReviewEntry] = field(default_factory=list)
 
+    def is_final(self) -> bool:
+        return self.verdict in ("confirmed_true", "confirmed_false")
+
+    def set_verdict(self, by: str, verdict: str, note: str = "", evidence: Optional[List[Dict[str, Any]]] = None) -> None:
+        verdict = (verdict or "").strip()
+        if verdict not in ("unknown", "probable_true", "probable_false", "confirmed_true", "confirmed_false"):
+            verdict = "unknown"
+        self.verdict = verdict
+        # Keep backward-compatible boolean in sync:
+        # historically, confirmed=True meant "this statement is correct".
+        self.confirmed = verdict == "confirmed_true"
+        self.last_reviewed = _now()
+        if evidence:
+            try:
+                self.evidence.extend([e for e in evidence if isinstance(e, dict)])
+            except Exception:
+                pass
+        self.review_history.append(ReviewEntry(by, "set_verdict", self.last_reviewed, f"{verdict}: {note}".strip()))
+
     def confirm(self, by: str, note: str = "") -> None:
         self.confirmations += 1
-        self.confirmed = True
-        self.last_reviewed = _now()
+        self.set_verdict(by, "confirmed_true", note)
+        # Preserve historic action tag too
         self.review_history.append(ReviewEntry(by, "confirm", self.last_reviewed, note))
 
     def reject(self, by: str, note: str = "") -> None:
         self.rejections += 1
-        self.confirmed = False
-        self.last_reviewed = _now()
+        self.set_verdict(by, "confirmed_false", note)
         self.review_history.append(ReviewEntry(by, "reject", self.last_reviewed, note))
 
     def score(self) -> float:
         """Confidence-Score aus Korrekturhistorie."""
+        # verdict-first scoring (explicit, non-binary)
+        if self.verdict == "confirmed_true":
+            return 1.0
+        if self.verdict == "confirmed_false":
+            return 0.0
+        if self.verdict == "probable_true":
+            return 0.75
+        if self.verdict == "probable_false":
+            return 0.25
         total = self.confirmations + self.rejections
         if total == 0:
             return 0.5  # Unbestimmt
@@ -74,6 +108,8 @@ class Correctness:
             else:
                 review_history.append(entry.to_dict())
         return {
+            "verdict": self.verdict,
+            "evidence": self.evidence,
             "confirmed": self.confirmed,
             "confirmations": self.confirmations,
             "rejections": self.rejections,
@@ -84,11 +120,30 @@ class Correctness:
     @classmethod
     def from_dict(cls, d: dict) -> "Correctness":
         c = cls()
+        verdict = d.get("verdict")
+        if isinstance(verdict, str) and verdict.strip():
+            c.verdict = verdict.strip()
         c.confirmed = d.get("confirmed", False)
         c.confirmations = d.get("confirmations", 0)
         c.rejections = d.get("rejections", 0)
         c.last_reviewed = d.get("last_reviewed")
+        ev = d.get("evidence", [])
+        if isinstance(ev, list):
+            c.evidence = [e for e in ev if isinstance(e, dict)]
         c.review_history = [ReviewEntry.from_dict(r) for r in d.get("review_history", [])]
+        # Backfill verdict if missing/invalid.
+        if c.verdict not in ("unknown", "probable_true", "probable_false", "confirmed_true", "confirmed_false"):
+            if c.confirmed:
+                c.verdict = "confirmed_true"
+            elif c.rejections > 0:
+                c.verdict = "confirmed_false"
+            else:
+                c.verdict = "unknown"
+        # Ensure boolean stays consistent for older mixed data.
+        if c.verdict == "confirmed_true":
+            c.confirmed = True
+        elif c.verdict == "confirmed_false":
+            c.confirmed = False
         return c
 
 
diff --git a/static/style.css b/static/style.css
index 6a3e8f3..f2efe55 100644
--- a/static/style.css
+++ b/static/style.css
@@ -124,6 +124,24 @@ body {
     color: #8a9aff;
     font-size: 0.72rem;
 }
+
+.verdict-pill{
+    display:inline-block;
+    margin: 2px 6px 2px 0;
+    padding: 2px 8px;
+    border-radius: 999px;
+    font-size: 0.72rem;
+    font-weight: 800;
+    letter-spacing: 0.4px;
+    border: 1px solid #2a2a3a;
+    background: #1e1e28;
+    color: #cfd3ff;
+}
+.verdict-pill.v-true{ border-color:#2f6b3f; color:#aaf0b6; }
+.verdict-pill.v-false{ border-color:#7a2c2c; color:#ffb3b3; }
+.verdict-pill.v-prob-true{ border-color:#6c8af5; color:#cfd9ff; }
+.verdict-pill.v-prob-false{ border-color:#b08a2a; color:#ffe2a3; }
+.verdict-pill.v-unknown{ border-color:#3a3a55; color:#b9b9c9; }
 .muted {
     color: #888899;
     font-size: 0.8rem;
diff --git a/templates/dashboard.html b/templates/dashboard.html
index 8b1f378..4c3bd0e 100644
--- a/templates/dashboard.html
+++ b/templates/dashboard.html
@@ -29,6 +29,7 @@
                 <option value="all">Alle</option>
                 <option value="pending">Pending</option>
                 <option value="confirmed">Confirmed</option>
+                <option value="rejected">Rejected</option>
                 <option value="errors">Errors</option>
             </select>
         </div>
@@ -130,6 +131,7 @@ async function loadCards() {
     if (state.search) url += `&search=${encodeURIComponent(state.search)}`;
     if (state.filter === 'confirmed') url += '&confirmed=1';
     if (state.filter === 'pending') url += '&confirmed=0';
+    if (state.filter === 'rejected') url += '&verdict=confirmed_false';
     if (state.filter === 'errors') url += '&tag=error';
 
     const data = await api(url);
@@ -347,6 +349,7 @@ function renderCards() {
         <div class="card ${item.confirmed ? 'confirmed' : ''} ${item.rejections > 0 ? 'rejected' : ''}" data-id="${item.id}">
             <div class="card-header">
                 <span class="conf-badge" style="background:hsl(${item.confidence*120},70%,40%)">${Math.round(item.confidence*100)}%</span>
+                ${renderVerdictPill(item)}
                 <span class="tags">${item.tags.map(t => '<span class="tag">'+t+'</span>').join('')}</span>
                 <span class="date">${fmtDate(item.created)}</span>
             </div>
@@ -365,6 +368,19 @@ function renderCards() {
     `).join('');
 }
 
+function renderVerdictPill(item) {
+    const v = (item.verdict || '').toString();
+    if (!v) return '';
+    let cls = 'v-unknown';
+    let label = v;
+    if (v === 'confirmed_true') { cls = 'v-true'; label = 'TRUE'; }
+    else if (v === 'confirmed_false') { cls = 'v-false'; label = 'FALSE'; }
+    else if (v === 'probable_true') { cls = 'v-prob-true'; label = 'LIKELY'; }
+    else if (v === 'probable_false') { cls = 'v-prob-false'; label = 'UNLIKELY'; }
+    else if (v === 'unknown') { cls = 'v-unknown'; label = 'UNKNOWN'; }
+    return `<span class="verdict-pill ${cls}">${label}</span>`;
+}
+
 function fmtDate(iso) {
     const d = new Date(iso);
     return `${d.getDate().toString().padStart(2,'0')}.${(d.getMonth()+1).toString().padStart(2,'0')} ${d.getHours().toString().padStart(2,'0')}:${d.getMinutes().toString().padStart(2,'0')}`;