Bakalarska_praca/private_gpt/server/ingest/model.py
oleh 959a391334
Some checks failed
publish docs / publish-docs (push) Has been cancelled
release-please / release-please (push) Has been cancelled
tests / setup (push) Has been cancelled
tests / ${{ matrix.quality-command }} (black) (push) Has been cancelled
tests / ${{ matrix.quality-command }} (mypy) (push) Has been cancelled
tests / ${{ matrix.quality-command }} (ruff) (push) Has been cancelled
tests / test (push) Has been cancelled
tests / all_checks_passed (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
add self code
2024-09-27 18:52:16 +02:00

33 lines
981 B
Python

from typing import Any, Literal
from llama_index.core.schema import Document
from pydantic import BaseModel, Field
class IngestedDoc(BaseModel):
object: Literal["ingest.document"]
doc_id: str = Field(examples=["c202d5e6-7b69-4869-81cc-dd574ee8ee11"])
doc_metadata: dict[str, Any] | None = Field(
examples=[
{
"page_label": "2",
"file_name": "Sales Report Q3 2023.pdf",
}
]
)
@staticmethod
def curate_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
"""Remove unwanted metadata keys."""
for key in ["doc_id", "window", "original_text"]:
metadata.pop(key, None)
return metadata
@staticmethod
def from_document(document: Document) -> "IngestedDoc":
return IngestedDoc(
object="ingest.document",
doc_id=document.doc_id,
doc_metadata=IngestedDoc.curate_metadata(document.metadata),
)