Files
invoice-master-poc-v2/packages/inference/inference/web/schemas/admin/datasets.py
2026-01-27 23:58:17 +01:00

86 lines
2.1 KiB
Python

"""Admin Dataset Schemas."""
from datetime import datetime
from pydantic import BaseModel, Field
from .training import TrainingConfig
class DatasetCreateRequest(BaseModel):
"""Request to create a training dataset."""
name: str = Field(..., min_length=1, max_length=255, description="Dataset name")
description: str | None = Field(None, description="Optional description")
document_ids: list[str] = Field(..., min_length=1, description="Document UUIDs to include")
train_ratio: float = Field(0.8, ge=0.1, le=0.95, description="Training split ratio")
val_ratio: float = Field(0.1, ge=0.05, le=0.5, description="Validation split ratio")
seed: int = Field(42, description="Random seed for split")
class DatasetDocumentItem(BaseModel):
"""Document within a dataset."""
document_id: str
split: str
page_count: int
annotation_count: int
class DatasetResponse(BaseModel):
"""Response after creating a dataset."""
dataset_id: str
name: str
status: str
message: str
class DatasetDetailResponse(BaseModel):
"""Detailed dataset info with documents."""
dataset_id: str
name: str
description: str | None
status: str
train_ratio: float
val_ratio: float
seed: int
total_documents: int
total_images: int
total_annotations: int
dataset_path: str | None
error_message: str | None
documents: list[DatasetDocumentItem]
created_at: datetime
updated_at: datetime
class DatasetListItem(BaseModel):
"""Dataset in list view."""
dataset_id: str
name: str
description: str | None
status: str
total_documents: int
total_images: int
total_annotations: int
created_at: datetime
class DatasetListResponse(BaseModel):
"""Paginated dataset list."""
total: int
limit: int
offset: int
datasets: list[DatasetListItem]
class DatasetTrainRequest(BaseModel):
"""Request to start training from a dataset."""
name: str = Field(..., min_length=1, max_length=255, description="Training task name")
config: TrainingConfig = Field(..., description="Training configuration")