2 Commits

3 changed files with 116 additions and 77 deletions

1
.streamlit/secrets.toml Normal file
View File

@@ -0,0 +1 @@
[default]

View File

@@ -1,174 +1,210 @@
""" """
app_dashboard.py - Streamlit-Dashboard für Second Brain. app_dashboard.py - Streamlit-Dashboard für Second Brain.
Seiten: Übersicht, Engramme, Suche, Graph, Stats. Seiten: Übersicht, Engramme, Suche, Graph, Heal-Log, Neural Scorer.
""" """
import json import json
import sys import sys
import os
from pathlib import Path from pathlib import Path
import streamlit as st import streamlit as st
sys.path.insert(0, str(Path(__file__).resolve().parent)) _root = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(_root))
from src.engram import Engram from src.engram import Engram
from src.store import EngramStore from src.store import EngramStore
from src.chroma_store import ChromaStore from src.chroma_store import ChromaStore
from src.retriever import Retriever from src.retriever import Retriever
from src.neural_scorer import NeuralScorer from src.neural_scorer import NeuralScorer
from src.graph_view import generate_graph_html
from src.loop_detector import LoopDetector
from src.error_healer import ErrorHealer
_DEFAULT_DB = Path(__file__).resolve().parent.parent / "data" / "brain.sqlite" _DEFAULT_DB = _root / "data" / "brain.sqlite"
_DB_PATH = str(st.secrets.get("db_path", _DEFAULT_DB) if hasattr(st, "secrets") else _DEFAULT_DB)
@st.cache_resource
def _store(): def _store():
return EngramStore(_DB_PATH) return EngramStore(str(_DEFAULT_DB))
@st.cache_resource
def _chroma(): def _chroma():
p = Path(_DB_PATH).parent / "chroma" p = Path(str(_DEFAULT_DB)).parent / "chroma"
return ChromaStore(str(p)) return ChromaStore(str(p))
_retriever_cache = None
def _retriever(): def _retriever():
return Retriever(_store(), _chroma()) global _retriever_cache
if _retriever_cache is None:
_retriever_cache = Retriever(_store(), _chroma())
return _retriever_cache
@st.cache_resource
def _scorer(): def _scorer():
return NeuralScorer() return NeuralScorer()
st.set_page_config(page_title="Second Brain Dashboard", layout="wide") @st.cache_resource
st.title("🧠 Second Brain Dashboard") def _healer():
return ErrorHealer(_store())
page = st.sidebar.radio("Seite", ["Übersicht", "Engramme", "Suche", "Graph", "Stats", "Neural Scorer"])
st.set_page_config(page_title="Second Brain Dashboard", layout="wide")
st.title("🧠 2.Brain v0.3.1")
page = st.sidebar.radio("Seite", ["Übersicht", "Engramme", "Suche", "Graph", "Heal-Log", "Neural Scorer"])
if page == "Übersicht": if page == "Übersicht":
store = _store() store = _store()
engrams = store.get_all() engrams = store.get_all(limit=10000)
confirmed = sum(1 for e in engrams if e.correctness.confirmed) confirmed = sum(1 for e in engrams if e.correctness.confirmed)
unconfirmed = len(engrams) - confirmed unconfirmed = len(engrams) - confirmed
avg_conf = sum(e.compute_confidence() for e in engrams) / max(1, len(engrams)) avg_conf = sum(e.compute_confidence() for e in engrams) / max(1, len(engrams))
errors = [e for e in engrams if "error" in e.metadata.get("tags", [])]
c1, c2, c3, c4 = st.columns(4) c1, c2, c3, c4, c5 = st.columns(5)
c1.metric("Total", len(engrams)) c1.metric("Total", len(engrams))
c2.metric("Confirmed", confirmed) c2.metric("Confirmed", confirmed)
c3.metric("Pending", unconfirmed) c3.metric("Pending", unconfirmed)
c4.metric("Avg Confidence", f"{avg_conf:.2f}") c4.metric("Avg Confidence", f"{avg_conf:.2f}")
c5.metric("Errors", len(errors))
st.subheader("Recent Engramme") st.subheader("Recent Engramme")
for eg in sorted(engrams, key=lambda e: e.metadata.get("modified", ""), reverse=True)[:5]: for eg in sorted(engrams, key=lambda e: e.metadata.get("modified", ""), reverse=True)[:5]:
with st.expander(f"{eg.content[:80]}..."): valid = eg.validate_grounding()
marker = "" if valid["valid"] else "⚠️"
with st.expander(f"{marker} {eg.content[:80]}..."):
st.write(f"ID: `{eg.id}`")
st.write(f"Source: {eg.metadata.get('source')}") st.write(f"Source: {eg.metadata.get('source')}")
st.write(f"Confidence: {eg.compute_confidence():.2f}") st.write(f"Confidence: {eg.compute_confidence():.2f}")
st.write(f"Confirmed: {'' if eg.correctness.confirmed else ''}") st.write(f"Confirmed: {'' if eg.correctness.confirmed else ''}")
st.write("Tags:", ", ".join(eg.metadata.get("tags", []))) st.write("Tags:", ", ".join(eg.metadata.get("tags", [])))
if not valid["valid"]:
st.warning(f"Grounding: {valid['issue']}")
if st.button("Auto-Fix", key=f"af_{eg.id}"):
eg.auto_fix_grounding()
store.save(eg)
st.experimental_rerun()
elif page == "Engramme": elif page == "Engramme":
store = _store() store = _store()
st.subheader("Alle Engramme") st.subheader("Alle Engramme (max 1000)")
tag_filter = st.text_input("Filter tags") tag_filter = st.text_input("Filter tags")
source_filter = st.selectbox("Source", ["alle", "user", "agent", "web", "file", "system"]) source_filter = st.selectbox("Source", ["alle", "user", "agent", "web", "file", "system"])
for eg in store.get_all(): for eg in store.get_all(limit=1000):
tags = eg.metadata.get("tags", []) tags = eg.metadata.get("tags", [])
src = eg.metadata.get("source", "") src = eg.metadata.get("source", "")
if tag_filter and tag_filter not in tags: if tag_filter and tag_filter not in tags:
continue continue
if source_filter != "alle" and source_filter != src: if source_filter != "alle" and source_filter != src:
continue continue
with st.expander(f"{eg.content[:100]}"): col1, col2 = st.columns([4, 1])
st.write("Confidence:", f"{eg.compute_confidence():.2f}") with col1:
st.write("Tags:", ", ".join(tags)) conf = eg.compute_confidence()
st.write("Source:", src) marker = "" if conf > 0.7 else "⚠️"
c1, c2 = st.columns(2) st.markdown(f"{marker} **{eg.content[:100]}**")
if c1.button("Confirm", key=f"conf_{eg.id}"): st.caption(f"Conf: {conf:.2f} | Tags: {', '.join(tags)} | Source: {src}")
with col2:
if st.button("✅ Confirm", key=f"conf_{eg.id}"):
eg.correctness.confirm("user") eg.correctness.confirm("user")
store.save(eg) store.save(eg)
st.success("Confirmed!") st.success("Confirmed")
if c2.button("❌ Reject", key=f"rej_{eg.id}"): if st.button("❌ Reject", key=f"rej_{eg.id}"):
eg.correctness.reject("user") eg.correctness.reject("user")
store.save(eg) store.save(eg)
st.warning("Rejected.") st.warning("Rejected")
st.divider()
elif page == "Suche": elif page == "Suche":
st.subheader("Semantic + Keyword Suche") st.subheader("Hybrid Search (Semantic + Keyword)")
query = st.text_input("Query") query = st.text_input("Query", placeholder="Suchbegriff eingeben...")
mode = st.radio("Modus", ["Hybrid", "Keyword", "Semantic"]) mode = st.radio("Modus", ["Hybrid", "Keyword", "Semantic"], horizontal=True)
if st.button("Suchen") and query: if st.button("Suchen") and query:
ret = _retriever() ret = _retriever()
if mode == "Hybrid": results = ret.hybrid_retrieve(query, limit=10) if mode == "Hybrid" else \
results = ret.hybrid_retrieve(query, limit=10) ret.semantic_retrieve(query, limit=10) if mode == "Semantic" else \
elif mode == "Semantic": ret.retrieve(query, limit=10)
results = ret.semantic_retrieve(query, limit=10) if not results:
else: st.info("Keine Ergebnisse gefunden.")
results = ret.retrieve(query, limit=10)
for r in results: for r in results:
eg = r["engram"] eg = r["engram"]
with st.container(): with st.container():
st.markdown(f"**{eg.content[:200]}...**") st.markdown(f"**{eg.content[:200]}...**")
st.write(f"Score: {r['score']:.3f} | Match: {r['match_type']} | Conf: {eg.compute_confidence():.2f}") st.write(f"Score: `{r['score']:.3f}` | Match: `{r['match_type']}` | Conf: `{eg.compute_confidence():.2f}`")
c1, c2 = st.columns(2) c1, c2 = st.columns(2)
if c1.button("✅ Confirm", key=f"sc_{eg.id}"): if c1.button("✅ Confirm", key=f"sc_{eg.id}"):
eg.correctness.confirm("user") eg.correctness.confirm("user")
store = _store() _store().save(eg)
store.save(eg)
st.success("Confirmed") st.success("Confirmed")
if c2.button("❌ Reject", key=f"sr_{eg.id}"): if c2.button("❌ Reject", key=f"sr_{eg.id}"):
eg.correctness.reject("user") eg.correctness.reject("user")
store = _store() _store().save(eg)
store.save(eg)
st.warning("Rejected") st.warning("Rejected")
elif page == "Graph": elif page == "Graph":
st.subheader("Graph-Visualisierung") st.subheader("Graph-Visualisierung")
graph_html_path = Path(_DB_PATH).parent / "graph_view.html" graph_html_path = Path(str(_DEFAULT_DB)).parent / "graph_view.html"
if st.button("Graph neu generieren"):
with st.spinner("Generiere Graph..."):
path = generate_graph_html(_store(), str(graph_html_path))
st.success(f"Graph generiert: {path}")
if graph_html_path.exists(): if graph_html_path.exists():
with open(graph_html_path, "r", encoding="utf-8") as f: with open(graph_html_path, "r", encoding="utf-8") as f:
html = f.read() html = f.read()
# iframe st.components.v1.html(html, height=800)
st.components.v1.html(html, height=800, scrolling=True)
else: else:
st.info("Graph nicht generiert. Führe `python -m src.cli graph` aus.") st.info("Graph noch nicht generiert. Klicke oben.")
if st.button("Graph generieren"):
from src.graph_view import generate_graph_html
store = _store()
path = generate_graph_html(store, str(Path(_DB_PATH).parent / "graph_view.html"))
st.success(f"Graph generiert: {path}")
elif page == "Stats": elif page == "Heal-Log":
store = _store() st.subheader("Error Healing & Loop Detection")
engrams = store.get_all() healer = _healer()
st.json({ stats = healer.get_error_stats()
"total": len(engrams), c1, c2, c3 = st.columns(3)
"confirmed": sum(1 for e in engrams if e.correctness.confirmed), c1.metric("Total Errors", stats["total_errors"])
"pending": sum(1 for e in engrams if not e.correctness.confirmed), c2.metric("Repeated", stats["repeated_errors"])
"sources": {s: sum(1 for e in engrams if e.metadata.get("source") == s) for s in {e.metadata.get("source") for e in engrams}}, c3.metric("Error Types", len(stats.get("error_types", {})))
"tags": {t: sum(1 for e in engrams for t2 in e.metadata.get("tags", []) if t2 == t) for t in {t for e in engrams for t in e.metadata.get("tags", [])}},
"avg_confidence": sum(e.compute_confidence() for e in engrams) / max(1, len(engrams)), st.subheader("Error Types")
}) for etype, count in stats.get("error_types", {}).items():
st.write(f"- **{etype}**: {count}")
st.subheader("Loop-Checker")
q = st.text_input("Query")
r = st.text_input("Response")
if st.button("Check Loop") and q and r:
detector = LoopDetector()
result = detector.check(q, r)
st.json(result)
if result["loop_detected"]:
st.error(result["suggestion"])
elif page == "Neural Scorer": elif page == "Neural Scorer":
st.subheader("Neural Scorer Training") st.subheader("Neural Scorer Training")
scorer = _scorer() scorer = _scorer()
store = _store() store = _store()
engrams = store.get_all() engrams = store.get_all(limit=10000)
labeled = [e for e in engrams if e.correctness.confirmed or e.correctness.rejections > 0] labeled = [e for e in engrams if e.correctness.confirmed or e.correctness.rejections > 0]
st.write(f"Labelled Engramme: {len(labeled)}") st.write(f"Labelled Engramme: **{len(labeled)}**")
if st.button("Train Neural Scorer"): if st.button("Train Neural Scorer"):
if len(labeled) < 2: if len(labeled) < 2:
st.error("Mindestens 2 labelierte Engramme nötig (confirm + reject).") st.error("Mindestens 2 labelierte Engramme nötig (confirm + reject).")
else: else:
with st.spinner("Training läuft..."):
result = scorer.train(labeled, epochs=30) result = scorer.train(labeled, epochs=30)
st.json(result) st.json(result)
st.success("Training abgeschlossen!") st.success("Training abgeschlossen!")
if st.button("Predict All"): if st.button("Predict All"):
for eg in engrams[:10]: for eg in engrams[:20]:
pred = scorer.predict(eg) pred = scorer.predict(eg)
st.write(f"{eg.content[:60]}... → {pred:.3f}") st.write(f"{eg.content[:50]}... → **{pred:.3f}**")

View File

@@ -31,19 +31,21 @@ class ChromaStore:
) )
def _build_metadata(self, engram: Engram) -> Dict[str, Any]: def _build_metadata(self, engram: Engram) -> Dict[str, Any]:
"""Serialisierte Metadaten für ChromaDB (nur primitives).""" """Serialisierte Metadaten für ChromaDB (nur primitiv/scalar/Str)."""
meta = engram.metadata.copy() m = engram.metadata
# ChromaDB akzeptiert nur Listen/Strings/Numbers/Bools safe: Dict[str, Any] = {}
tags = meta.pop("tags", []) # Nur explizit erlaubte Felder übernehmen
if isinstance(tags, list): safe["source"] = str(m.get("source", "agent"))
meta["tags"] = ",".join(str(t) for t in tags) safe["confidence"] = float(m.get("confidence", 0.5))
meta.setdefault("source", "agent") safe["grounding"] = int(m.get("grounding", 1))
meta.setdefault("confidence", 0.5) tags = m.get("tags", [])
meta.setdefault("correctness", "unconfirmed") safe["tags"] = ",".join(str(t) for t in tags) if isinstance(tags, list) else str(tags)
# Hierarchy als JSON-String safe["created"] = str(m.get("created", ""))
if "hierarchy" in meta: safe["modified"] = str(m.get("modified", ""))
meta["hierarchy"] = json.dumps(meta["hierarchy"]) safe["access_count"] = int(m.get("access_count", 0))
return meta safe["correctness"] = "confirmed" if engram.correctness.confirmed else "unconfirmed"
safe["content"] = str(engram.content)[:500] # Chroma akzeptiert kurze Strings besser
return safe
def add(self, engram: Engram, embedding: Optional[List[float]] = None) -> None: def add(self, engram: Engram, embedding: Optional[List[float]] = None) -> None:
"""Engramm mit Embedding zur Vektor-DB hinzufügen.""" """Engramm mit Embedding zur Vektor-DB hinzufügen."""