from __future__ import annotations import sys from collections import Counter from pathlib import Path from rich import print PROJECT_ROOT = Path(__file__).resolve().parents[1] if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) from scripts.common import DOCUMENTS_FILE, PAGES_ROOT, ZPWIKI_ROOT, load_zpwiki_page, write_json def scan_pages() -> list[dict]: if not PAGES_ROOT.exists(): raise SystemExit(f"Neexistuje priečinok: {PAGES_ROOT}") documents = [] metadata_keys = Counter() categories = Counter() tags = Counter() authors = Counter() for file_path in sorted(PAGES_ROOT.glob("**/README.md")): page = load_zpwiki_page(file_path) content = page.pop("content", "") for key in page["metadata"]: metadata_keys[key] += 1 for category in page["categories"]: categories[category] += 1 for tag in page["tags"]: tags[tag] += 1 if page.get("author"): authors[str(page["author"])] += 1 documents.append( { **page, "content_preview": content[:500], "content_length": len(content), } ) write_json(DOCUMENTS_FILE, documents) print_summary(documents, metadata_keys, categories, tags, authors) return documents def print_summary( documents: list[dict], metadata_keys: Counter, categories: Counter, tags: Counter, authors: Counter, ) -> None: print(f"[green]ZPWIKI_ROOT:[/green] {ZPWIKI_ROOT}") print(f"[green]Našiel som dokumentov:[/green] {len(documents)}") print(f"[green]Výstup uložený do:[/green] {DOCUMENTS_FILE}") print("\n[bold]Najčastejšie metadata kľúče:[/bold]") for key, count in metadata_keys.most_common(30): print(f"{key}: {count}") print("\n[bold]Najčastejšie kategórie:[/bold]") for key, count in categories.most_common(30): print(f"{key}: {count}") print("\n[bold]Najčastejšie tagy:[/bold]") for key, count in tags.most_common(40): print(f"{key}: {count}") print("\n[bold]Najčastejší autori:[/bold]") for key, count in authors.most_common(20): print(f"{key}: {count}") if documents: print("\n[bold]Ukážka prvého dokumentu:[/bold]") print(documents[0]) def main() -> None: scan_pages() if __name__ == "__main__": main()