106 lines
2.7 KiB
Python
106 lines
2.7 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import frontmatter
|
|
|
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
|
ZPWIKI_ROOT = Path(os.getenv("ZPWIKI_ROOT", str(PROJECT_ROOT.parent / "zpwiki"))).resolve()
|
|
PAGES_ROOT = ZPWIKI_ROOT / "pages"
|
|
|
|
DATA_DIR = PROJECT_ROOT / "data"
|
|
DOCUMENTS_FILE = DATA_DIR / "documents.json"
|
|
CHUNKS_FILE = DATA_DIR / "chunks.json"
|
|
DB_FILE = DATA_DIR / "zp_index.sqlite"
|
|
|
|
|
|
def json_safe(value: Any) -> Any:
|
|
"""Prevedie metadata do formátu vhodného pre JSON."""
|
|
if value is None or isinstance(value, (str, int, float, bool)):
|
|
return value
|
|
|
|
if isinstance(value, list):
|
|
return [json_safe(item) for item in value]
|
|
|
|
if isinstance(value, dict):
|
|
return {str(key): json_safe(item) for key, item in value.items()}
|
|
|
|
return str(value)
|
|
|
|
|
|
def normalize_list(value: Any) -> list[str]:
|
|
"""Zjednotí hodnotu na čistý zoznam bez duplicít."""
|
|
if value is None:
|
|
return []
|
|
|
|
if isinstance(value, list):
|
|
raw_items = [str(item).strip() for item in value]
|
|
elif isinstance(value, str):
|
|
raw_items = [item.strip() for item in value.split(",")]
|
|
else:
|
|
raw_items = [str(value).strip()]
|
|
|
|
items = []
|
|
seen = set()
|
|
|
|
for item in raw_items:
|
|
if item and item not in seen:
|
|
items.append(item)
|
|
seen.add(item)
|
|
|
|
return items
|
|
|
|
|
|
def read_json(path: Path) -> Any:
|
|
if not path.exists():
|
|
raise FileNotFoundError(f"Súbor neexistuje: {path}")
|
|
|
|
with path.open("r", encoding="utf-8") as file:
|
|
return json.load(file)
|
|
|
|
|
|
def write_json(path: Path, data: Any) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
with path.open("w", encoding="utf-8") as file:
|
|
json.dump(data, file, ensure_ascii=False, indent=2)
|
|
|
|
|
|
def load_zpwiki_page(file_path: Path) -> dict[str, Any]:
|
|
post = frontmatter.load(file_path)
|
|
|
|
metadata = {
|
|
key: json_safe(value)
|
|
for key, value in post.metadata.items()
|
|
}
|
|
|
|
taxonomy = metadata.get("taxonomy") or {}
|
|
|
|
categories = normalize_list(
|
|
metadata.get("category")
|
|
or taxonomy.get("category")
|
|
)
|
|
|
|
tags = normalize_list(
|
|
metadata.get("tag")
|
|
or metadata.get("tags")
|
|
or taxonomy.get("tag")
|
|
or taxonomy.get("tags")
|
|
)
|
|
|
|
return {
|
|
"path": str(file_path.relative_to(ZPWIKI_ROOT)),
|
|
"title": metadata.get("title"),
|
|
"categories": categories,
|
|
"tags": tags,
|
|
"published": metadata.get("published"),
|
|
"author": metadata.get("author") or taxonomy.get("author"),
|
|
"taxonomy": taxonomy,
|
|
"metadata": metadata,
|
|
"content": post.content.strip(),
|
|
}
|