diff --git a/README.md b/README.md index 12d1d1f..4507639 100644 --- a/README.md +++ b/README.md @@ -12,3 +12,7 @@ An embeddable, offline-first memory system for AI agents with correctness tracki - **Graph Visualization** (`src/graph_view.py`) — Interactive Cytoscape.js graph with confidence colors ## Architecture + +## Obsidian + +Setup and timers: `second-brain/docs/OBSIDIAN.md` diff --git a/docs/OBSIDIAN.md b/docs/OBSIDIAN.md new file mode 100644 index 0000000..78a5adb --- /dev/null +++ b/docs/OBSIDIAN.md @@ -0,0 +1,75 @@ +# Obsidian Coupling (Second-Brain 2.0) + +This integrates an Obsidian vault with Second-Brain via two cron tasks: + +- `cron_tasks/ingest_obsidian.py` (vault → Second-Brain) +- `cron_tasks/export_obsidian.py` (Second-Brain → vault) + +All settings live in `second-brain/data/obsidian_config.json`. + +## 1) Install / Sync the vault to the server + +You need a local folder on the server that contains an Obsidian vault (it must contain a `.obsidian/` directory), e.g.: + +- `/srv/obsidian/MyVault` +- `/data/obsidian/MyVault` +- `/root/Obsidian/MyVault` + +How you sync it is up to you (Syncthing, rsync, SMB mount, etc.). + +## 2) Set `vault_path` in config (auto or manual) + +### Auto-discover (only writes if unambiguous) + +```bash +python3 second-brain/scripts/discover_obsidian_vault.py +python3 second-brain/scripts/discover_obsidian_vault.py --write +``` + +If multiple vaults are detected, it prints them and refuses to write. + +### Manual + +Edit `second-brain/data/obsidian_config.json` and set: + +- `vault_path` to the vault directory (the parent of `.obsidian/`) + +## 3) Enable ingest/export + +In `second-brain/data/obsidian_config.json`: + +- Set `enabled.ingest` to `true` to ingest vault markdown into Second-Brain +- Set `enabled.export` to `true` to export Second-Brain engrams into the vault + +## 4) Enable timers (systemd) + +This repo ships unit files in `systemd/`: + +- `systemd/openclaw-secondbrain-ingest-obsidian.service` +- `systemd/openclaw-secondbrain-ingest-obsidian.timer` +- `systemd/openclaw-secondbrain-export-obsidian.service` +- `systemd/openclaw-secondbrain-export-obsidian.timer` + +Install them (copy or symlink) to `/etc/systemd/system/`, then: + +```bash +sudo systemctl daemon-reload +sudo systemctl enable --now openclaw-secondbrain-ingest-obsidian.timer +sudo systemctl enable --now openclaw-secondbrain-export-obsidian.timer +``` + +## 5) Verify + +Run once manually: + +```bash +python3 openclaw_cron_wrapper.py ingest_obsidian +python3 openclaw_cron_wrapper.py export_obsidian +``` + +What to expect: + +- If `vault_path` is missing/invalid, both tasks **skip** safely (no writes to random paths). +- Ingest creates/updates `second-brain/data/obsidian_ingest_state.json`. +- Export writes markdown files to `//` (default: `SecondBrain/`) and tracks state in `second-brain/data/obsidian_export_state.json`. + diff --git a/scripts/discover_obsidian_vault.py b/scripts/discover_obsidian_vault.py new file mode 100644 index 0000000..50d5764 --- /dev/null +++ b/scripts/discover_obsidian_vault.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +""" +Auto-discover an Obsidian vault on this server and (optionally) write it into: + second-brain/data/obsidian_config.json + +Safety: +- Only writes when exactly one vault is detected (unambiguous). +- A "vault" is a directory that contains a `.obsidian/` folder. +""" + +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path +from typing import Iterable + + +WORKSPACE = Path("/root/.openclaw/workspace") +BRAIN_DIR = WORKSPACE / "second-brain" +CONFIG_PATH = BRAIN_DIR / "data" / "obsidian_config.json" + + +def _iter_common_candidates() -> Iterable[Path]: + env = os.environ.get("OBSIDIAN_VAULT_PATH") + if env: + yield Path(env).expanduser() + + home = Path.home() + for p in [ + home / "Obsidian", + home / "ObsidianVault", + home / "Vault", + home / "Vaults", + home / "Documents" / "Obsidian", + home / "Documents" / "Vaults", + home / "Syncthing" / "Obsidian", + Path("/srv/obsidian"), + Path("/srv/Obsidian"), + Path("/data/obsidian"), + Path("/data/Obsidian"), + WORKSPACE / "obsidian", + WORKSPACE / "vault", + WORKSPACE / "vaults", + ]: + yield p + + +def _is_vault_dir(p: Path) -> bool: + try: + return p.exists() and p.is_dir() and (p / ".obsidian").exists() and (p / ".obsidian").is_dir() + except Exception: + return False + + +def _bounded_find_obsidian_dirs(root: Path, *, max_depth: int) -> list[Path]: + """ + Find `.obsidian` directories below root, limited by depth to keep runtime bounded. + """ + results: list[Path] = [] + try: + root = root.resolve() + except Exception: + return results + + if not root.exists() or not root.is_dir(): + return results + + def depth_of(path: Path) -> int: + try: + return len(path.relative_to(root).parts) + except Exception: + return 9999 + + # Breadth-first-ish scan with pruning + queue = [root] + while queue: + current = queue.pop(0) + if depth_of(current) > max_depth: + continue + try: + entries = list(current.iterdir()) + except Exception: + continue + + for e in entries: + name = e.name + if name in (".git", "node_modules", "__pycache__", ".cache", ".venv", "venv", "tmp", "proc", "sys", "dev"): + continue + if name.startswith(".") and name not in (".obsidian",): + continue + if name == ".obsidian" and e.is_dir(): + results.append(e) + continue + if e.is_dir() and not e.is_symlink(): + queue.append(e) + + return results + + +def discover(*, roots: list[Path], max_depth: int) -> list[Path]: + vaults: set[Path] = set() + + for p in _iter_common_candidates(): + if _is_vault_dir(p): + vaults.add(p.resolve()) + + for root in roots: + for obsidian_dir in _bounded_find_obsidian_dirs(root, max_depth=max_depth): + vaults.add(obsidian_dir.parent.resolve()) + + return sorted(vaults) + + +def main() -> int: + ap = argparse.ArgumentParser(description="Discover Obsidian vault and optionally write config") + ap.add_argument("--write", action="store_true", help="Write detected vault_path into obsidian_config.json") + ap.add_argument( + "--roots", + nargs="*", + default=[str(Path.home()), "/srv", "/data", "/mnt", str(WORKSPACE)], + help="Roots to scan (bounded). Default: home,/srv,/data,/mnt,workspace", + ) + ap.add_argument("--max-depth", type=int, default=4, help="Max directory depth to scan under each root") + args = ap.parse_args() + + roots = [Path(r).expanduser() for r in args.roots] + vaults = discover(roots=roots, max_depth=int(args.max_depth)) + + if not vaults: + print("No Obsidian vault found (no `.obsidian/` directories detected).") + return 1 + + if len(vaults) > 1: + print("Multiple Obsidian vaults found; refusing to write config:") + for v in vaults: + print(f"- {v}") + return 2 + + vault = vaults[0] + print(f"Detected Obsidian vault: {vault}") + + if not args.write: + return 0 + + if not CONFIG_PATH.exists(): + raise SystemExit(f"Missing config file: {CONFIG_PATH}") + + cfg = json.loads(CONFIG_PATH.read_text(encoding="utf-8")) + cfg["vault_path"] = str(vault) + CONFIG_PATH.write_text(json.dumps(cfg, indent=2, ensure_ascii=False) + "\n", encoding="utf-8") + print(f"Wrote vault_path to: {CONFIG_PATH}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) + diff --git a/src/openclaw_bridge.py b/src/openclaw_bridge.py index 95cc7f6..9aa7f41 100644 --- a/src/openclaw_bridge.py +++ b/src/openclaw_bridge.py @@ -38,6 +38,12 @@ try: except ImportError: Retriever = None +# Chroma: optional (braucht chromadb) +try: + from src.chroma_store import ChromaStore +except Exception: + ChromaStore = None + # --- Konfiguration --- BRAIN_DB = Path(__file__).parent.parent / "data" / "brain.sqlite" @@ -207,10 +213,34 @@ def enrich_context(topic: str, limit: int = 3) -> str: """ store = get_brain() - # Versuche Retriever (mit Embeddings), fallback auf einfache Textsuche + # Versuche Hybrid-Retrieval (FTS + optional Vector), fallback auf Textsuche if Retriever: - ret = Retriever(store) - results = ret.retrieve(topic, limit=limit, min_confidence=0.3) + chroma = None + if ChromaStore: + try: + chroma = ChromaStore(path=str(Path(__file__).parent.parent / "data" / "chroma")) + except Exception: + chroma = None + ret = Retriever(store, chroma=chroma) + try: + results = ret.hybrid_retrieve(topic, limit=limit * 3, min_confidence=0.3) + except Exception: + results = ret.retrieve(topic, limit=limit * 3, min_confidence=0.3) + + # confirmed-first ranking + def _rank(r): + eg = r["engram"] + confirmed = 1 if getattr(eg.correctness, "confirmed", False) else 0 + return (confirmed, float(r.get("score", 0.0))) + + results.sort(key=_rank, reverse=True) + + # If we have confirmed results, show only confirmed up to limit + confirmed_only = [r for r in results if r["engram"].correctness.confirmed] + if confirmed_only: + results = confirmed_only[:limit] + else: + results = results[:limit] else: results_raw = store.search_text(topic, limit=limit) results = [{"engram": eg, "score": 0.5} for eg in results_raw] diff --git a/src/retriever.py b/src/retriever.py index 9fa7fc1..ad70c03 100644 --- a/src/retriever.py +++ b/src/retriever.py @@ -7,12 +7,10 @@ Phase 2: + Embedding + Fusion. from typing import List, Dict, Any, Optional from .engram import Engram from .store import EngramStore -from .chroma_store import ChromaStore -from .embedder import encode class Retriever: - def __init__(self, store: EngramStore, chroma: Optional[ChromaStore] = None): + def __init__(self, store: EngramStore, chroma: Optional[object] = None): self.store = store self.chroma = chroma @@ -50,7 +48,6 @@ class Retriever: if not self.chroma: return [] chroma_results = self.chroma.query(query, top_k=limit * 3) - eids = [r["id"] for r in chroma_results] results = [] for r in chroma_results: eg = self.store.get(r["id"]) diff --git a/src/store.py b/src/store.py index efab056..84e9256 100644 --- a/src/store.py +++ b/src/store.py @@ -127,6 +127,14 @@ class EngramStore: ).fetchall() return [self._row_to_engram(r) for r in rows] + def get_modified_since(self, iso_ts: str, limit: int = 5000) -> List[Engram]: + """Gibt Engramme zurück, deren `modified_at` nach `iso_ts` liegt.""" + rows = self._conn.execute( + "SELECT * FROM engrams WHERE modified_at > ? ORDER BY modified_at ASC LIMIT ?", + (iso_ts, limit), + ).fetchall() + return [self._row_to_engram(r) for r in rows] + def delete(self, engram_id: str) -> bool: """Löscht ein Engramm und alle Verknüpfungen.""" rowid = self._conn.execute( @@ -239,6 +247,13 @@ class EngramStore: "links": json.loads(row["links_json"]), "hierarchy": json.loads(row["hierarchy_json"]), } + # Keep Engram metadata timestamps aligned with DB columns so downstream + # consumers (e.g. vector indexing watermarks) can rely on them. + try: + d["metadata"]["created"] = row["created_at"] + d["metadata"]["modified"] = row["modified_at"] + except Exception: + pass emb = row["embedding_json"] if emb: d["embedding"] = json.loads(emb) diff --git a/tests/test_core.py b/tests/test_core.py index 2170702..0dfbcd4 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4,16 +4,11 @@ import sys import os import tempfile -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -try: - from src.engram import Engram, Grounding, Correctness - from src.store import EngramStore - from src.retriever import Retriever -except ImportError: - from engram import Engram, Grounding, Correctness - from store import EngramStore - from retriever import Retriever +from src.engram import Engram, Grounding, Correctness +from src.store import EngramStore +from src.retriever import Retriever def test_engram_creation():