dp-zp-agent/scripts/rebuild_index.py

96 lines
2.2 KiB
Python

from __future__ import annotations
import argparse
import subprocess
import sys
import time
from pathlib import Path
from rich import print
PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from scripts.build_chunks import build_chunks
from scripts.build_sqlite_index import build_database
from scripts.common import DB_FILE, ZPWIKI_ROOT
from scripts.scan_zpwiki import scan_pages
def git_pull(repo_path: Path = ZPWIKI_ROOT) -> None:
if not repo_path.exists():
raise RuntimeError(f"ZPWIKI_ROOT neexistuje: {repo_path}")
if not (repo_path / ".git").exists():
raise RuntimeError(f"Nie je to git repozitár: {repo_path}")
result = subprocess.run(
["git", "pull"],
cwd=repo_path,
text=True,
capture_output=True,
)
if result.stdout:
print(result.stdout.strip())
if result.stderr:
print(result.stderr.strip())
if result.returncode != 0:
raise RuntimeError("Git pull zlyhal")
def rebuild_index(pull_git: bool = False) -> dict:
start = time.time()
print(f"[green]ZPWIKI_ROOT:[/green] {ZPWIKI_ROOT}")
if pull_git:
git_pull()
documents = scan_pages()
chunks = build_chunks()
counts = build_database()
duration = round(time.time() - start, 2)
return {
"duration_seconds": duration,
"documents_scanned": len(documents),
"chunks_created": len(chunks),
"counts": counts,
"database_path": str(DB_FILE),
}
def main() -> None:
parser = argparse.ArgumentParser(
description="Obnoví JSON súbory a SQLite index."
)
parser.add_argument(
"--pull",
action="store_true",
help="Pred reindexovaním spustí git pull v zpwiki repozitári.",
)
args = parser.parse_args()
result = rebuild_index(pull_git=args.pull)
counts = result["counts"]
print("[green]Reindex hotový.[/green]")
print(f"Trvanie: {result['duration_seconds']} s")
print(f"Dokumentov: {counts['documents']}")
print(f"Chunkov: {counts['chunks']}")
print(f"Tag záznamov: {counts['tags']}")
print(f"Kategória záznamov: {counts['categories']}")
if __name__ == "__main__":
main()