Files
second-brain/scripts/process_pending_engrams.py

163 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""
Process pending second brain engrams.
- For unconfirmed, unrejected engrams: evaluate confidence
- If confidence > 0.8: confirm
- If confidence < 0.3: reject
- Otherwise: mark for review (leave as is)
- Check for stale topics and archive if needed
- Produce summary report
"""
import sys
import json
from datetime import datetime, timezone
from pathlib import Path
# Add src to path and set PYTHONPATH for proper module resolution
base_dir = Path(__file__).parent.parent
sys.path.insert(0, str(base_dir / "src"))
# Import using absolute module paths
from src.store import EngramStore
from src.engram import Engram, Grounding
DB_PATH = Path(__file__).parent.parent / "data" / "brain.sqlite"
def is_stale(engram: Engram, days_threshold: int = 90) -> bool:
"""Check if an engram is stale (old and rarely accessed)."""
created = engram.metadata.get("created", "")
access_count = engram.metadata.get("access_count", 0)
last_accessed = engram.metadata.get("last_accessed", created)
try:
created_dt = datetime.fromisoformat(created.replace("Z", "+00:00"))
last_accessed_dt = datetime.fromisoformat(last_accessed.replace("Z", "+00:00"))
age_days = (datetime.now(timezone.utc) - created_dt).total_seconds() / 86400
days_since_access = (datetime.now(timezone.utc) - last_accessed_dt).total_seconds() / 86400
# Stale if: old (>90 days) AND rarely accessed (<3 times) AND not accessed recently (>60 days)
if age_days > days_threshold and access_count < 3 and days_since_access > 60:
return True
except Exception:
pass
return False
def process_pending_engrams():
"""Main processing function."""
store = EngramStore(str(DB_PATH))
# Get all engrams
all_engrams = store.get_all(limit=10000)
print(f"Total engrams in database: {len(all_engrams)}")
# Filter pending (unconfirmed and unrejected)
# Unconfirmed: not confirmed_true, not confirmed_false
pending = []
for eg in all_engrams:
verdict = eg.correctness.verdict
if verdict not in ("confirmed_true", "confirmed_false"):
pending.append(eg)
print(f"Pending engrams (unconfirmed/unrejected): {len(pending)}")
actions = {
"confirmed": 0,
"rejected": 0,
"left_for_review": 0,
"archived_stale": 0,
"errors": 0
}
details = []
for eg in pending:
try:
confidence = eg.compute_confidence()
engram_id = str(eg.id)
content_preview = eg.content[:80] + ("..." if len(eg.content) > 80 else "")
# Check if stale and should be archived
if is_stale(eg):
# For stale engrams, we'll mark them in metadata for archiving
# Instead of deleting, we'll add an "archived" tag and lower their priority
tags = eg.metadata.get("tags", [])
if "archived" not in tags:
tags.append("archived")
eg.metadata["tags"] = tags
eg.metadata["archived_at"] = datetime.now(timezone.utc).isoformat()
store.save(eg)
actions["archived_stale"] += 1
details.append(f"📦 Archived stale: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
# Even if stale, we still evaluate confidence for reporting
# But we don't confirm/reject stale ones automatically unless confidence is extreme
# Actually, the task says to check for stale topics and archive if needed. We've done that.
# We still need to apply confidence thresholds to non-stale or all pending?
# Let's continue to evaluate all pending, including stale, but maybe skip confirm/reject for stale?
# The task: "For each pending engram... evaluate... If >0.8 confirm, <0.3 reject, otherwise mark for review"
# It doesn't say to skip stale ones. So we'll still apply thresholds.
# But we already archived it. We can still confirm/reject it if confidence is extreme.
# Let's continue.
# Apply confidence thresholds
if confidence > 0.8:
eg.correctness.confirm(by="auto_processor", note=f"Auto-confirmed: confidence {confidence:.2f}")
store.save(eg)
actions["confirmed"] += 1
details.append(f"✅ Confirmed: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
elif confidence < 0.3:
eg.correctness.reject(by="auto_processor", note=f"Auto-rejected: confidence {confidence:.2f}")
store.save(eg)
actions["rejected"] += 1
details.append(f"❌ Rejected: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
else:
actions["left_for_review"] += 1
details.append(f"⏳ Review later: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
except Exception as e:
actions["errors"] += 1
details.append(f"⚠️ Error processing engram: {str(e)}")
# Generate summary report
report_lines = []
report_lines.append("=" * 60)
report_lines.append("PENDING ENGRAMS PROCESSING REPORT")
report_lines.append("=" * 60)
report_lines.append(f"Timestamp: {datetime.now(timezone.utc).isoformat()}")
report_lines.append(f"Total engrams: {len(all_engrams)}")
report_lines.append(f"Pending engrams processed: {len(pending)}")
report_lines.append("")
report_lines.append("ACTIONS TAKEN:")
report_lines.append(f" ✅ Auto-confirmed (confidence > 0.8): {actions['confirmed']}")
report_lines.append(f" ❌ Auto-rejected (confidence < 0.3): {actions['rejected']}")
report_lines.append(f" ⏳ Left for review (0.3 ≤ confidence ≤ 0.8): {actions['left_for_review']}")
report_lines.append(f" 📦 Archived stale topics: {actions['archived_stale']}")
report_lines.append(f" ⚠️ Errors: {actions['errors']}")
report_lines.append("")
report_lines.append("DETAILS:")
report_lines.extend(details)
report_lines.append("")
report_lines.append("=" * 60)
report = "\n".join(report_lines)
# Print to stdout
print("\n" + report)
# Save report to file
report_dir = Path(__file__).parent.parent / "reports"
report_dir.mkdir(parents=True, exist_ok=True)
report_file = report_dir / f"pending_engrams_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
report_file.write_text(report, encoding="utf-8")
print(f"\n📄 Report saved to: {report_file}")
store.close()
return actions
if __name__ == "__main__":
result = process_pending_engrams()
print("\nProcessing complete.")