chore: sync local workspace state

This commit is contained in:
2026-05-30 00:38:57 +02:00
parent 20098a3253
commit e6e8eba8f6
8 changed files with 5626 additions and 68 deletions

View File

@@ -0,0 +1,191 @@
#!/usr/bin/env python3
import argparse
import json
import hashlib
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple
from src.engram import Engram, Grounding
from src.store import EngramStore
def _now_utc_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def _hash16(text: str) -> str:
return hashlib.sha256(text.encode("utf-8")).hexdigest()[:16]
def _iter_jsonl(path: Path) -> Iterable[Dict[str, Any]]:
with path.open("r", encoding="utf-8") as f:
for line_no, line in enumerate(f, start=1):
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
except Exception:
raise SystemExit(f"Invalid JSON at {path}:{line_no}")
if not isinstance(obj, dict):
continue
yield obj
def _marker_to_content(marker_obj: Dict[str, Any]) -> Tuple[str, List[Dict[str, Any]]]:
marker = str(marker_obj.get("marker", "")).strip()
details = str(marker_obj.get("details", "")).strip()
checks = marker_obj.get("checks") or []
sources = marker_obj.get("sources") or []
if not marker:
raise ValueError("missing marker")
evidence: List[Dict[str, Any]] = []
for src in sources:
if not isinstance(src, dict):
continue
url = (src.get("url") or "").strip()
title = (src.get("title") or "").strip()
if not url:
continue
evidence.append({"url": url, "title": title})
lines: List[str] = []
lines.append(f"WEBDEV_MARKER: {marker}")
if details:
lines.append("")
lines.append(f"Details: {details}")
if isinstance(checks, list) and checks:
lines.append("")
lines.append("Checks:")
for c in checks[:8]:
c = str(c).strip()
if c:
lines.append(f"- {c}")
if evidence:
lines.append("")
lines.append("Sources:")
for ev in evidence[:12]:
title = (ev.get("title") or "").strip()
url = (ev.get("url") or "").strip()
if title:
lines.append(f"- {title}: {url}")
else:
lines.append(f"- {url}")
return "\n".join(lines).strip(), evidence
def _tags_for(marker_obj: Dict[str, Any]) -> List[str]:
tags = ["web_design", "web_development", "mobile"]
area = str(marker_obj.get("area", "")).strip()
if area:
tags.append(area)
return tags
def import_markers(
db_path: Path,
jsonl_paths: List[Path],
source: str,
verdict: str,
agent_id: str,
dry_run: bool,
) -> Dict[str, int]:
store = EngramStore(str(db_path))
stats = {"seen": 0, "imported": 0, "skipped_dup": 0, "skipped_invalid": 0}
seen_hashes: set[str] = set()
# Preload existing hashes (fast-ish; avoids duplicate spam).
existing_hashes: set[str] = set()
try:
cur = store._conn.execute("SELECT metadata_json FROM engrams") # noqa: SLF001
for row in cur.fetchall():
try:
meta = json.loads(row["metadata_json"])
h = meta.get("hash")
if isinstance(h, str) and h:
existing_hashes.add(h)
except Exception:
continue
except Exception:
# If this fails (schema mismatch), proceed without preload.
existing_hashes = set()
for path in jsonl_paths:
for marker_obj in _iter_jsonl(path):
if (marker_obj.get("kind") or "") != "web_design_marker":
continue
stats["seen"] += 1
try:
content, evidence = _marker_to_content(marker_obj)
except Exception:
stats["skipped_invalid"] += 1
continue
h = _hash16(content)
if h in seen_hashes or h in existing_hashes:
stats["skipped_dup"] += 1
continue
seen_hashes.add(h)
eg = Engram.create(
content=content,
source=source,
confidence=0.75,
tags=_tags_for(marker_obj),
session_id=None,
agent_id=agent_id or str(marker_obj.get("agent_id") or ""),
grounding=Grounding.SOURCED,
)
# Overwrite hash to exactly match our content representation.
eg.metadata["hash"] = h
eg.metadata["modified"] = _now_utc_iso()
eg.metadata["created"] = marker_obj.get("created_at") or eg.metadata["created"]
eg.correctness.set_verdict(
by=agent_id or "importer",
verdict=verdict,
note=f"Imported from {path.name}",
evidence=evidence,
)
if not dry_run:
store.save(eg)
stats["imported"] += 1
return stats
def main() -> None:
p = argparse.ArgumentParser(description="Import web_design_marker JSONL files into brain.sqlite")
p.add_argument("--db", default="second-brain/data/brain.sqlite", help="Path to brain.sqlite")
p.add_argument("--glob", default="/tmp/web_design_markers_*.jsonl", help="Glob for marker JSONL files")
p.add_argument("--source", default="web_research", help="Engram source")
p.add_argument("--verdict", default="probable_true", help="Correctness verdict")
p.add_argument("--agent-id", default="web_research_import", help="Agent id to record")
p.add_argument("--dry-run", action="store_true", help="Parse/dedupe but do not write to DB")
args = p.parse_args()
db_path = Path(args.db)
jsonl_paths = sorted(Path("/").glob(args.glob.lstrip("/"))) if args.glob.startswith("/") else sorted(Path(".").glob(args.glob))
if not jsonl_paths:
raise SystemExit(f"No files match glob: {args.glob}")
stats = import_markers(
db_path=db_path,
jsonl_paths=jsonl_paths,
source=args.source,
verdict=args.verdict,
agent_id=args.agent_id,
dry_run=bool(args.dry_run),
)
print(json.dumps({"db": str(db_path), "files": [str(p) for p in jsonl_paths], "stats": stats}, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,163 @@
#!/usr/bin/env python3
"""
Process pending second brain engrams.
- For unconfirmed, unrejected engrams: evaluate confidence
- If confidence > 0.8: confirm
- If confidence < 0.3: reject
- Otherwise: mark for review (leave as is)
- Check for stale topics and archive if needed
- Produce summary report
"""
import sys
import json
from datetime import datetime, timezone
from pathlib import Path
# Add src to path and set PYTHONPATH for proper module resolution
base_dir = Path(__file__).parent.parent
sys.path.insert(0, str(base_dir / "src"))
# Import using absolute module paths
from src.store import EngramStore
from src.engram import Engram, Grounding
DB_PATH = Path(__file__).parent.parent / "data" / "brain.sqlite"
def is_stale(engram: Engram, days_threshold: int = 90) -> bool:
"""Check if an engram is stale (old and rarely accessed)."""
created = engram.metadata.get("created", "")
access_count = engram.metadata.get("access_count", 0)
last_accessed = engram.metadata.get("last_accessed", created)
try:
created_dt = datetime.fromisoformat(created.replace("Z", "+00:00"))
last_accessed_dt = datetime.fromisoformat(last_accessed.replace("Z", "+00:00"))
age_days = (datetime.now(timezone.utc) - created_dt).total_seconds() / 86400
days_since_access = (datetime.now(timezone.utc) - last_accessed_dt).total_seconds() / 86400
# Stale if: old (>90 days) AND rarely accessed (<3 times) AND not accessed recently (>60 days)
if age_days > days_threshold and access_count < 3 and days_since_access > 60:
return True
except Exception:
pass
return False
def process_pending_engrams():
"""Main processing function."""
store = EngramStore(str(DB_PATH))
# Get all engrams
all_engrams = store.get_all(limit=10000)
print(f"Total engrams in database: {len(all_engrams)}")
# Filter pending (unconfirmed and unrejected)
# Unconfirmed: not confirmed_true, not confirmed_false
pending = []
for eg in all_engrams:
verdict = eg.correctness.verdict
if verdict not in ("confirmed_true", "confirmed_false"):
pending.append(eg)
print(f"Pending engrams (unconfirmed/unrejected): {len(pending)}")
actions = {
"confirmed": 0,
"rejected": 0,
"left_for_review": 0,
"archived_stale": 0,
"errors": 0
}
details = []
for eg in pending:
try:
confidence = eg.compute_confidence()
engram_id = str(eg.id)
content_preview = eg.content[:80] + ("..." if len(eg.content) > 80 else "")
# Check if stale and should be archived
if is_stale(eg):
# For stale engrams, we'll mark them in metadata for archiving
# Instead of deleting, we'll add an "archived" tag and lower their priority
tags = eg.metadata.get("tags", [])
if "archived" not in tags:
tags.append("archived")
eg.metadata["tags"] = tags
eg.metadata["archived_at"] = datetime.now(timezone.utc).isoformat()
store.save(eg)
actions["archived_stale"] += 1
details.append(f"📦 Archived stale: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
# Even if stale, we still evaluate confidence for reporting
# But we don't confirm/reject stale ones automatically unless confidence is extreme
# Actually, the task says to check for stale topics and archive if needed. We've done that.
# We still need to apply confidence thresholds to non-stale or all pending?
# Let's continue to evaluate all pending, including stale, but maybe skip confirm/reject for stale?
# The task: "For each pending engram... evaluate... If >0.8 confirm, <0.3 reject, otherwise mark for review"
# It doesn't say to skip stale ones. So we'll still apply thresholds.
# But we already archived it. We can still confirm/reject it if confidence is extreme.
# Let's continue.
# Apply confidence thresholds
if confidence > 0.8:
eg.correctness.confirm(by="auto_processor", note=f"Auto-confirmed: confidence {confidence:.2f}")
store.save(eg)
actions["confirmed"] += 1
details.append(f"✅ Confirmed: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
elif confidence < 0.3:
eg.correctness.reject(by="auto_processor", note=f"Auto-rejected: confidence {confidence:.2f}")
store.save(eg)
actions["rejected"] += 1
details.append(f"❌ Rejected: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
else:
actions["left_for_review"] += 1
details.append(f"⏳ Review later: [{engram_id[:8]}] {content_preview} (conf: {confidence:.2f})")
except Exception as e:
actions["errors"] += 1
details.append(f"⚠️ Error processing engram: {str(e)}")
# Generate summary report
report_lines = []
report_lines.append("=" * 60)
report_lines.append("PENDING ENGRAMS PROCESSING REPORT")
report_lines.append("=" * 60)
report_lines.append(f"Timestamp: {datetime.now(timezone.utc).isoformat()}")
report_lines.append(f"Total engrams: {len(all_engrams)}")
report_lines.append(f"Pending engrams processed: {len(pending)}")
report_lines.append("")
report_lines.append("ACTIONS TAKEN:")
report_lines.append(f" ✅ Auto-confirmed (confidence > 0.8): {actions['confirmed']}")
report_lines.append(f" ❌ Auto-rejected (confidence < 0.3): {actions['rejected']}")
report_lines.append(f" ⏳ Left for review (0.3 ≤ confidence ≤ 0.8): {actions['left_for_review']}")
report_lines.append(f" 📦 Archived stale topics: {actions['archived_stale']}")
report_lines.append(f" ⚠️ Errors: {actions['errors']}")
report_lines.append("")
report_lines.append("DETAILS:")
report_lines.extend(details)
report_lines.append("")
report_lines.append("=" * 60)
report = "\n".join(report_lines)
# Print to stdout
print("\n" + report)
# Save report to file
report_dir = Path(__file__).parent.parent / "reports"
report_dir.mkdir(parents=True, exist_ok=True)
report_file = report_dir / f"pending_engrams_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
report_file.write_text(report, encoding="utf-8")
print(f"\n📄 Report saved to: {report_file}")
store.close()
return actions
if __name__ == "__main__":
result = process_pending_engrams()
print("\nProcessing complete.")