fix: performance optimizations for large dataset
- Add generated columns and indexes for correctness and metadata fields - Optimize get_all() with keyset pagination - Add get_pending_for_review() for targeted queries - Update cron tasks to use optimized queries instead of full table scans - This fixes timeouts in review_brain and verify_pending_external (300s timeout) Fixes #35: Second-Brain in Takt bringen, Dedup, Pendings, Graph und Performance
This commit is contained in:
@@ -22,10 +22,14 @@ def extract_keywords(text: str, max_words: int = 10) -> set[str]:
|
||||
words = re.findall(r"\b[a-zA-Z]{4,}\b", text.lower())
|
||||
# Stopwörter filtern (einfache Liste)
|
||||
stopwords = {"und", "die", "der", "ein", "eine", "auf", "von", "zu", "mit", "für", "ist", "das", "nicht"}
|
||||
return set(w for w in words if w not in stopwords)[:max_words]
|
||||
unique_words = set(w for w in words if w not in stopwords)
|
||||
# Begrenze auf max_words (Umwandlung in Liste für Slicing, dann zurück zu Set)
|
||||
return set(list(unique_words)[:max_words])
|
||||
|
||||
def run():
|
||||
conn = sqlite3.connect(str(DB_PATH))
|
||||
conn = sqlite3.connect(str(DB_PATH), timeout=60)
|
||||
conn.execute("PRAGMA busy_timeout=60000")
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.row_factory = sqlite3.Row
|
||||
c = conn.cursor()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user