dp-zp-agent/scripts/rebuild_index.py

108 lines
2.6 KiB
Python

from pathlib import Path
import argparse
import os
import sqlite3
import subprocess
import sys
import time
from rich import print
ZPWIKI_ROOT = Path(os.getenv("ZPWIKI_ROOT", "../zpwiki"))
DB_FILE = Path("data/zp_index.sqlite")
def run_command(command: list[str], cwd: Path | None = None) -> None:
print(f"[cyan]Spúšťam:[/cyan] {' '.join(command)}")
result = subprocess.run(
command,
cwd=cwd,
text=True,
capture_output=True,
)
if result.stdout:
print(result.stdout.strip())
if result.stderr:
print(result.stderr.strip())
if result.returncode != 0:
raise RuntimeError(
f"Príkaz zlyhal: {' '.join(command)}"
)
def git_pull() -> None:
if not ZPWIKI_ROOT.exists():
raise RuntimeError(f"ZPWIKI_ROOT neexistuje: {ZPWIKI_ROOT}")
if not (ZPWIKI_ROOT / ".git").exists():
raise RuntimeError(f"Nie je to git repozitár: {ZPWIKI_ROOT}")
run_command(["git", "pull"], cwd=ZPWIKI_ROOT)
def rebuild_index() -> None:
run_command([sys.executable, "scripts/scan_zpwiki.py"])
run_command([sys.executable, "scripts/build_chunks.py"])
run_command([sys.executable, "scripts/build_sqlite_index.py"])
def get_counts() -> dict:
if not DB_FILE.exists():
return {
"documents": 0,
"chunks": 0,
"tags": 0,
"categories": 0,
}
conn = sqlite3.connect(DB_FILE)
cursor = conn.cursor()
counts = {
"documents": cursor.execute("SELECT COUNT(*) FROM documents").fetchone()[0],
"chunks": cursor.execute("SELECT COUNT(*) FROM chunks").fetchone()[0],
"tags": cursor.execute("SELECT COUNT(*) FROM chunk_tags").fetchone()[0],
"categories": cursor.execute("SELECT COUNT(*) FROM chunk_categories").fetchone()[0],
}
conn.close()
return counts
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--pull",
action="store_true",
help="Pred reindexovaním spustí git pull v zpwiki repozitári.",
)
args = parser.parse_args()
start = time.time()
print(f"[green]ZPWIKI_ROOT:[/green] {ZPWIKI_ROOT}")
if args.pull:
git_pull()
rebuild_index()
counts = get_counts()
duration = round(time.time() - start, 2)
print("[green]Reindex hotový.[/green]")
print(f"Trvanie: {duration} s")
print(f"Dokumentov: {counts['documents']}")
print(f"Chunkov: {counts['chunks']}")
print(f"Tag záznamov: {counts['tags']}")
print(f"Kategória záznamov: {counts['categories']}")
if __name__ == "__main__":
main()