feat(complete): Phase 2-5 - Vektor-Embeddings, ChromaDB, Neural Scorer, Streamlit Dashboard, Graph-Visualisierung

This commit is contained in:
2026-05-25 09:43:04 +02:00
parent 08d21f8087
commit 59f4059cd8
6 changed files with 842 additions and 2 deletions

174
src/app_dashboard.py Normal file
View File

@@ -0,0 +1,174 @@
"""
app_dashboard.py - Streamlit-Dashboard für Second Brain.
Seiten: Übersicht, Engramme, Suche, Graph, Stats.
"""
import json
import sys
from pathlib import Path
import streamlit as st
sys.path.insert(0, str(Path(__file__).resolve().parent))
from src.engram import Engram
from src.store import EngramStore
from src.chroma_store import ChromaStore
from src.retriever import Retriever
from src.neural_scorer import NeuralScorer
_DEFAULT_DB = Path(__file__).resolve().parent.parent / "data" / "brain.sqlite"
_DB_PATH = str(st.secrets.get("db_path", _DEFAULT_DB) if hasattr(st, "secrets") else _DEFAULT_DB)
def _store():
return EngramStore(_DB_PATH)
def _chroma():
p = Path(_DB_PATH).parent / "chroma"
return ChromaStore(str(p))
def _retriever():
return Retriever(_store(), _chroma())
def _scorer():
return NeuralScorer()
st.set_page_config(page_title="Second Brain Dashboard", layout="wide")
st.title("🧠 Second Brain Dashboard")
page = st.sidebar.radio("Seite", ["Übersicht", "Engramme", "Suche", "Graph", "Stats", "Neural Scorer"])
if page == "Übersicht":
store = _store()
engrams = store.get_all()
confirmed = sum(1 for e in engrams if e.correctness.confirmed)
unconfirmed = len(engrams) - confirmed
avg_conf = sum(e.compute_confidence() for e in engrams) / max(1, len(engrams))
c1, c2, c3, c4 = st.columns(4)
c1.metric("Total", len(engrams))
c2.metric("Confirmed", confirmed)
c3.metric("Pending", unconfirmed)
c4.metric("Avg Confidence", f"{avg_conf:.2f}")
st.subheader("Recent Engramme")
for eg in sorted(engrams, key=lambda e: e.metadata.get("modified", ""), reverse=True)[:5]:
with st.expander(f"{eg.content[:80]}..."):
st.write(f"Source: {eg.metadata.get('source')}")
st.write(f"Confidence: {eg.compute_confidence():.2f}")
st.write(f"Confirmed: {'' if eg.correctness.confirmed else ''}")
st.write("Tags:", ", ".join(eg.metadata.get("tags", [])))
elif page == "Engramme":
store = _store()
st.subheader("Alle Engramme")
tag_filter = st.text_input("Filter tags")
source_filter = st.selectbox("Source", ["alle", "user", "agent", "web", "file", "system"])
for eg in store.get_all():
tags = eg.metadata.get("tags", [])
src = eg.metadata.get("source", "")
if tag_filter and tag_filter not in tags:
continue
if source_filter != "alle" and source_filter != src:
continue
with st.expander(f"{eg.content[:100]}"):
st.write("Confidence:", f"{eg.compute_confidence():.2f}")
st.write("Tags:", ", ".join(tags))
st.write("Source:", src)
c1, c2 = st.columns(2)
if c1.button("✅ Confirm", key=f"conf_{eg.id}"):
eg.correctness.confirm("user")
store.save(eg)
st.success("Confirmed!")
if c2.button("❌ Reject", key=f"rej_{eg.id}"):
eg.correctness.reject("user")
store.save(eg)
st.warning("Rejected.")
elif page == "Suche":
st.subheader("Semantic + Keyword Suche")
query = st.text_input("Query")
mode = st.radio("Modus", ["Hybrid", "Keyword", "Semantic"])
if st.button("Suchen") and query:
ret = _retriever()
if mode == "Hybrid":
results = ret.hybrid_retrieve(query, limit=10)
elif mode == "Semantic":
results = ret.semantic_retrieve(query, limit=10)
else:
results = ret.retrieve(query, limit=10)
for r in results:
eg = r["engram"]
with st.container():
st.markdown(f"**{eg.content[:200]}...**")
st.write(f"Score: {r['score']:.3f} | Match: {r['match_type']} | Conf: {eg.compute_confidence():.2f}")
c1, c2 = st.columns(2)
if c1.button("✅ Confirm", key=f"sc_{eg.id}"):
eg.correctness.confirm("user")
store = _store()
store.save(eg)
st.success("Confirmed")
if c2.button("❌ Reject", key=f"sr_{eg.id}"):
eg.correctness.reject("user")
store = _store()
store.save(eg)
st.warning("Rejected")
elif page == "Graph":
st.subheader("Graph-Visualisierung")
graph_html_path = Path(_DB_PATH).parent / "graph_view.html"
if graph_html_path.exists():
with open(graph_html_path, "r", encoding="utf-8") as f:
html = f.read()
# iframe
st.components.v1.html(html, height=800, scrolling=True)
else:
st.info("Graph nicht generiert. Führe `python -m src.cli graph` aus.")
if st.button("Graph generieren"):
from src.graph_view import generate_graph_html
store = _store()
path = generate_graph_html(store, str(Path(_DB_PATH).parent / "graph_view.html"))
st.success(f"Graph generiert: {path}")
elif page == "Stats":
store = _store()
engrams = store.get_all()
st.json({
"total": len(engrams),
"confirmed": sum(1 for e in engrams if e.correctness.confirmed),
"pending": sum(1 for e in engrams if not e.correctness.confirmed),
"sources": {s: sum(1 for e in engrams if e.metadata.get("source") == s) for s in {e.metadata.get("source") for e in engrams}},
"tags": {t: sum(1 for e in engrams for t2 in e.metadata.get("tags", []) if t2 == t) for t in {t for e in engrams for t in e.metadata.get("tags", [])}},
"avg_confidence": sum(e.compute_confidence() for e in engrams) / max(1, len(engrams)),
})
elif page == "Neural Scorer":
st.subheader("Neural Scorer Training")
scorer = _scorer()
store = _store()
engrams = store.get_all()
labeled = [e for e in engrams if e.correctness.confirmed or e.correctness.rejections > 0]
st.write(f"Labelled Engramme: {len(labeled)}")
if st.button("Train Neural Scorer"):
if len(labeled) < 2:
st.error("Mindestens 2 labelierte Engramme nötig (confirm + reject).")
else:
result = scorer.train(labeled, epochs=30)
st.json(result)
st.success("Training abgeschlossen!")
if st.button("Predict All"):
for eg in engrams[:10]:
pred = scorer.predict(eg)
st.write(f"{eg.content[:60]}... → {pred:.3f}")