second-brain/scripts/process_pending_engrams.py

#!/usr/bin/env python3
"""
Process pending second brain engrams.
- For unconfirmed, unrejected engrams: evaluate confidence
- If confidence > 0.8: confirm
- If confidence < 0.3: reject
- Otherwise: mark for review (leave as is)
- Check for stale topics and archive if needed
- Produce summary report
"""

import sys
import json
from datetime import datetime, timezone
from pathlib import Path

# Add src to path and set PYTHONPATH for proper module resolution
base_dir = Path(__file__).parent.parent
sys.path.insert(0, str(base_dir / "src"))

# Import using absolute module paths
from src.store import EngramStore
from src.engram import Engram, Grounding

DB_PATH = Path(__file__).parent.parent / "data" / "brain.sqlite"


def is_stale(engram: Engram, days_threshold: int = 90) -> bool:
    """Check if an engram is stale (old and rarely accessed)."""
    created = engram.metadata.get("created", "")
    access_count = engram.metadata.get("access_count", 0)
    last_accessed = engram.metadata.get("last_accessed", created)

    try:
        created_dt = datetime.fromisoformat(created.replace("Z", "+00:00"))
        last_accessed_dt = datetime.fromisoformat(last_accessed.replace("Z", "+00:00"))
        age_days = (datetime.now(timezone.utc) - created_dt).total_seconds() / 86400
        days_since_access = (datetime.now(timezone.utc) - last_accessed_dt).total_seconds() / 86400

        # Stale if: old (>90 days) AND rarely accessed (<3 times) AND not accessed recently (>60 days)
        if age_days > days_threshold and access_count < 3 and days_since_access > 60:
            return True
    except Exception:
        pass

    return False


def process_pending_engrams():
    """Main processing function."""
    store = EngramStore(str(DB_PATH))

    # Get all engrams
    all_engrams = store.get_all(limit=10000)
    print(f"Total engrams in database: {len(all_engrams)}")

    # Filter pending (unconfirmed and unrejected)
    # Unconfirmed: not confirmed_true, not confirmed_false
    pending = []
    for eg in all_engrams:
        verdict = eg.correctness.verdict
        if verdict not in ("confirmed_true", "confirmed_false"):
            pending.append(eg)

    print(f"Pending engrams (unconfirmed/unrejected): {len(pending)}")

    actions = {
        "confirmed": 0,
        "rejected": 0,
        "left_for_review": 0,
        "archived_stale": 0,
        "errors": 0
    }

    details = []

    for eg in pending:
        try:
            confidence = eg.compute_confidence()
            engram_id = str(eg.id)
            content_preview = eg.content[:80] + ("..." if len(eg.content) > 80 else "")

            # Check if stale and should be archived
            if is_stale(eg):
                # For stale engrams, we'll mark them in metadata for archiving
                # Instead of deleting, we'll add an "archived" tag and lower their priority
                tags = eg.metadata.get("tags", [])
                if "archived" not in tags:
                    tags.append("archived")
                    eg.metadata["tags"] = tags
                    eg.metadata["archived_at"] = datetime.now(timezone.utc).isoformat()
                    store.save(eg)
                    actions["archived_stale"] += 1
                    details.append(f"📦 Archived stale: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
                # Even if stale, we still evaluate confidence for reporting
                # But we don't confirm/reject stale ones automatically unless confidence is extreme
                # Actually, the task says to check for stale topics and archive if needed. We've done that.
                # We still need to apply confidence thresholds to non-stale or all pending?
                # Let's continue to evaluate all pending, including stale, but maybe skip confirm/reject for stale?
                # The task: "For each pending engram... evaluate... If >0.8 confirm, <0.3 reject, otherwise mark for review"
                # It doesn't say to skip stale ones. So we'll still apply thresholds.
                # But we already archived it. We can still confirm/reject it if confidence is extreme.
                # Let's continue.

            # Apply confidence thresholds
            if confidence > 0.8:
                eg.correctness.confirm(by="auto_processor", note=f"Auto-confirmed: confidence {confidence:.2f}")
                store.save(eg)
                actions["confirmed"] += 1
                details.append(f"✅ Confirmed: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
            elif confidence < 0.3:
                eg.correctness.reject(by="auto_processor", note=f"Auto-rejected: confidence {confidence:.2f}")
                store.save(eg)
                actions["rejected"] += 1
                details.append(f"❌ Rejected: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
            else:
                actions["left_for_review"] += 1
                details.append(f"⏳ Review later: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")

        except Exception as e:
            actions["errors"] += 1
            details.append(f"⚠️ Error processing engram: {str(e)}")

    # Generate summary report
    report_lines = []
    report_lines.append("=" * 60)
    report_lines.append("PENDING ENGRAMS PROCESSING REPORT")
    report_lines.append("=" * 60)
    report_lines.append(f"Timestamp: {datetime.now(timezone.utc).isoformat()}")
    report_lines.append(f"Total engrams: {len(all_engrams)}")
    report_lines.append(f"Pending engrams processed: {len(pending)}")
    report_lines.append("")
    report_lines.append("ACTIONS TAKEN:")
    report_lines.append(f"  ✅ Auto-confirmed (confidence > 0.8): {actions['confirmed']}")
    report_lines.append(f"  ❌ Auto-rejected (confidence < 0.3): {actions['rejected']}")
    report_lines.append(f"  ⏳ Left for review (0.3 ≤ confidence ≤ 0.8): {actions['left_for_review']}")
    report_lines.append(f"  📦 Archived stale topics: {actions['archived_stale']}")
    report_lines.append(f"  ⚠️ Errors: {actions['errors']}")
    report_lines.append("")
    report_lines.append("DETAILS:")
    report_lines.extend(details)
    report_lines.append("")
    report_lines.append("=" * 60)

    report = "\n".join(report_lines)

    # Print to stdout
    print("\n" + report)

    # Save report to file
    report_dir = Path(__file__).parent.parent / "reports"
    report_dir.mkdir(parents=True, exist_ok=True)
    report_file = report_dir / f"pending_engrams_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
    report_file.write_text(report, encoding="utf-8")
    print(f"\n📄 Report saved to: {report_file}")

    store.close()
    return actions


if __name__ == "__main__":
    result = process_pending_engrams()
    print("\nProcessing complete.")