dp-zp-agent/scripts/common.py

106 lines
2.7 KiB
Python

from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Any
import frontmatter
PROJECT_ROOT = Path(__file__).resolve().parents[1]
ZPWIKI_ROOT = Path(os.getenv("ZPWIKI_ROOT", str(PROJECT_ROOT.parent / "zpwiki"))).resolve()
PAGES_ROOT = ZPWIKI_ROOT / "pages"
DATA_DIR = PROJECT_ROOT / "data"
DOCUMENTS_FILE = DATA_DIR / "documents.json"
CHUNKS_FILE = DATA_DIR / "chunks.json"
DB_FILE = DATA_DIR / "zp_index.sqlite"
def json_safe(value: Any) -> Any:
"""Prevedie metadata do formátu vhodného pre JSON."""
if value is None or isinstance(value, (str, int, float, bool)):
return value
if isinstance(value, list):
return [json_safe(item) for item in value]
if isinstance(value, dict):
return {str(key): json_safe(item) for key, item in value.items()}
return str(value)
def normalize_list(value: Any) -> list[str]:
"""Zjednotí hodnotu na čistý zoznam bez duplicít."""
if value is None:
return []
if isinstance(value, list):
raw_items = [str(item).strip() for item in value]
elif isinstance(value, str):
raw_items = [item.strip() for item in value.split(",")]
else:
raw_items = [str(value).strip()]
items = []
seen = set()
for item in raw_items:
if item and item not in seen:
items.append(item)
seen.add(item)
return items
def read_json(path: Path) -> Any:
if not path.exists():
raise FileNotFoundError(f"Súbor neexistuje: {path}")
with path.open("r", encoding="utf-8") as file:
return json.load(file)
def write_json(path: Path, data: Any) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8") as file:
json.dump(data, file, ensure_ascii=False, indent=2)
def load_zpwiki_page(file_path: Path) -> dict[str, Any]:
post = frontmatter.load(file_path)
metadata = {
key: json_safe(value)
for key, value in post.metadata.items()
}
taxonomy = metadata.get("taxonomy") or {}
categories = normalize_list(
metadata.get("category")
or taxonomy.get("category")
)
tags = normalize_list(
metadata.get("tag")
or metadata.get("tags")
or taxonomy.get("tag")
or taxonomy.get("tags")
)
return {
"path": str(file_path.relative_to(ZPWIKI_ROOT)),
"title": metadata.get("title"),
"categories": categories,
"tags": tags,
"published": metadata.get("published"),
"author": metadata.get("author") or taxonomy.get("author"),
"taxonomy": taxonomy,
"metadata": metadata,
"content": post.content.strip(),
}