feat: add verdict/evidence verification model

2026-05-29 11:30:24 +02:00
parent f10a5b9f19
commit 6d99c520e6
5 changed files with 227 additions and 51 deletions
--- a/fastapi_app.py
+++ b/fastapi_app.py
@@ -54,11 +54,22 @@ def get_db():
 def parse_engram(row: sqlite3.Row) -> dict:
    meta = json.loads(row["metadata_json"] or "{}")
    correctness = json.loads(row["correctness_json"] or "{}")
+    verdict = correctness.get("verdict")
+    if not isinstance(verdict, str) or not verdict:
+        # Back-compat inference for older rows
+        if correctness.get("confirmed", False):
+            verdict = "confirmed_true"
+        elif int(correctness.get("rejections", 0) or 0) > 0:
+            verdict = "confirmed_false"
+        else:
+            verdict = "unknown"
    return {
        "id": row["id"],
        "content": row["content"],
        "confidence": meta.get("confidence", 0.0),
        "confirmed": correctness.get("confirmed", False),
+        "verdict": verdict,
+        "evidence": correctness.get("evidence", []),
        "confirmations": correctness.get("confirmations", 0),
        "rejections": correctness.get("rejections", 0),
        "tags": meta.get("tags", []),
@@ -88,6 +99,8 @@ def _update_correctness(engram_id: str, *, action: str, reason: str | None = Non
        raise FileNotFoundError(f"Engram not found: {engram_id}")

    corr = json.loads(row["correctness_json"] or "{}")
+    corr.setdefault("verdict", None)
+    corr.setdefault("evidence", [])
    corr.setdefault("confirmed", False)
    corr.setdefault("confirmations", 0)
    corr.setdefault("rejections", 0)
@@ -106,10 +119,30 @@ def _update_correctness(engram_id: str, *, action: str, reason: str | None = Non
        corr["review_history"] = [entry]

    if action == "confirm":
+        corr["verdict"] = "confirmed_true"
        corr["confirmed"] = True
        corr["confirmations"] = int(corr.get("confirmations", 0) or 0) + 1
    elif action == "reject":
+        corr["verdict"] = "confirmed_false"
        corr["rejections"] = int(corr.get("rejections", 0) or 0) + 1
+        corr["confirmed"] = False
+
+    # Store minimal evidence for dashboard-driven actions.
+    try:
+        ev = corr.get("evidence")
+        if not isinstance(ev, list):
+            ev = []
+        ev.append(
+            {
+                "kind": "human",
+                "by": "dashboard",
+                "at": corr["last_reviewed"],
+                "action": action,
+            }
+        )
+        corr["evidence"] = ev[-50:]  # cap growth
+    except Exception:
+        pass

    c.execute(
        "UPDATE engrams SET correctness_json = ?, modified_at = ? WHERE id = ?",
@@ -232,8 +265,25 @@ def api_storage_stats():
    conn = get_db()
    c = conn.cursor()
    total = c.execute("SELECT COUNT(*) FROM engrams").fetchone()[0]
-    confirmed = c.execute(
-        "SELECT COUNT(*) FROM engrams WHERE json_extract(correctness_json, '$.confirmed') = 1"
+    confirmed_true = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_true'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL AND json_extract(correctness_json, '$.confirmed') = 1)
+        )
+        """
+    ).fetchone()[0]
+    confirmed_false = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_false'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL
+              AND json_extract(correctness_json, '$.confirmed') = 0
+              AND COALESCE(json_extract(correctness_json, '$.rejections'), 0) > 0)
+        )
+        """
    ).fetchone()[0]
    sources = {
        r[0]: r[1]
@@ -268,8 +318,9 @@ def api_storage_stats():
    return {
        "sql": {
            "total_engrams": total,
-            "confirmed": confirmed,
-            "pending": total - confirmed,
+            "confirmed": confirmed_true,
+            "rejected": confirmed_false,
+            "pending": total - confirmed_true - confirmed_false,
            "by_source": sources,
        },
        "vector": {
@@ -310,10 +361,27 @@ def api_insights(limit: int = Query(8, ge=1, le=50)):
        "SELECT id, metadata_json, correctness_json, created_at, modified_at FROM engrams ORDER BY created_at DESC LIMIT 2000"
    ).fetchall()
    total = c.execute("SELECT COUNT(*) FROM engrams").fetchone()[0]
-    confirmed = c.execute(
-        "SELECT COUNT(*) FROM engrams WHERE json_extract(correctness_json, '$.confirmed') = 1"
+    confirmed_true = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_true'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL AND json_extract(correctness_json, '$.confirmed') = 1)
+        )
+        """
    ).fetchone()[0]
-    pending = total - confirmed
+    confirmed_false = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_false'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL
+              AND json_extract(correctness_json, '$.confirmed') = 0
+              AND COALESCE(json_extract(correctness_json, '$.rejections'), 0) > 0)
+        )
+        """
+    ).fetchone()[0]
+    pending = total - confirmed_true - confirmed_false

    tag_counts: dict[str, int] = {}
    source_counts: dict[str, int] = {}
@@ -488,10 +556,27 @@ def api_stats():
    conn = get_db()
    c = conn.cursor()
    total = c.execute("SELECT COUNT(*) FROM engrams").fetchone()[0]
-    confirmed = c.execute(
-        "SELECT COUNT(*) FROM engrams WHERE json_extract(correctness_json, '$.confirmed') = 1"
+    confirmed_true = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_true'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL AND json_extract(correctness_json, '$.confirmed') = 1)
+        )
+        """
    ).fetchone()[0]
-    pending = total - confirmed
+    confirmed_false = c.execute(
+        """
+        SELECT COUNT(*) FROM engrams
+        WHERE (
+          json_extract(correctness_json, '$.verdict') = 'confirmed_false'
+          OR (json_extract(correctness_json, '$.verdict') IS NULL
+              AND json_extract(correctness_json, '$.confirmed') = 0
+              AND COALESCE(json_extract(correctness_json, '$.rejections'), 0) > 0)
+        )
+        """
+    ).fetchone()[0]
+    pending = total - confirmed_true - confirmed_false
    errors = c.execute(
        "SELECT COUNT(*) FROM engrams WHERE json_extract(metadata_json, '$.tags') LIKE '%error%'"
    ).fetchone()[0]
@@ -501,7 +586,8 @@ def api_stats():
    conn.close()
    return {
        "total": total,
-        "confirmed": confirmed,
+        "confirmed": confirmed_true,
+        "rejected": confirmed_false,
        "pending": pending,
        "errors": errors,
        "avg_confidence": round(avg_conf, 2),
@@ -514,6 +600,7 @@ def api_engrams(
    offset: int = Query(0, ge=0),
    tag: str = Query(None),
    confirmed: bool = Query(None),
+    verdict: str = Query(None),
    search: str = Query(None),
    min_confidence: float = Query(0.0),
 ):
@@ -527,9 +614,30 @@ def api_engrams(
        params.append(f'%"{tag}"%')

    if confirmed is not None:
-        where_clauses.append(
-            f"json_extract(correctness_json, '$.confirmed') = {int(confirmed)}"
-        )
+        if confirmed:
+            # confirmed == statement is true (verdict confirmed_true)
+            where_clauses.append(
+                "("
+                "json_extract(correctness_json, '$.verdict') = 'confirmed_true' "
+                "OR (json_extract(correctness_json, '$.verdict') IS NULL AND json_extract(correctness_json, '$.confirmed') = 1)"
+                ")"
+            )
+        else:
+            # pending/unresolved (unknown/probable) but exclude confirmed_false.
+            where_clauses.append(
+                "("
+                "json_extract(correctness_json, '$.verdict') IN ('unknown','probable_true','probable_false') "
+                "OR (json_extract(correctness_json, '$.verdict') IS NULL "
+                "    AND json_extract(correctness_json, '$.confirmed') = 0 "
+                "    AND COALESCE(json_extract(correctness_json, '$.rejections'), 0) = 0)"
+                ")"
+            )
+
+    if verdict:
+        v = verdict.strip()
+        if v in ("unknown", "probable_true", "probable_false", "confirmed_true", "confirmed_false"):
+            where_clauses.append("json_extract(correctness_json, '$.verdict') = ?")
+            params.append(v)

    if search:
        # Use FTS
@@ -740,6 +848,8 @@ def api_create_engram(content: str = Form(...), tags: str = Form(""), source: st
        "hash": "",
    }
    correctness = {
+        "verdict": "unknown",
+        "evidence": [],
        "confirmed": False,
        "confirmations": 0,
        "rejections": 0,
@@ -767,7 +877,12 @@ def api_pending(limit: int = Query(20, ge=1, le=100), offset: int = Query(0, ge=
    rows = c.execute(
        """
        SELECT * FROM engrams
-        WHERE json_extract(correctness_json, '$.confirmed') = 0
+        WHERE (
+          json_extract(correctness_json, '$.verdict') IN ('unknown','probable_true','probable_false')
+          OR (json_extract(correctness_json, '$.verdict') IS NULL
+              AND json_extract(correctness_json, '$.confirmed') = 0
+              AND COALESCE(json_extract(correctness_json, '$.rejections'), 0) = 0)
+        )
        ORDER BY created_at DESC
        LIMIT ? OFFSET ?
        """,