"""Admin Dataset Schemas.""" from datetime import datetime from pydantic import BaseModel, Field from .training import TrainingConfig class DatasetCreateRequest(BaseModel): """Request to create a training dataset.""" name: str = Field(..., min_length=1, max_length=255, description="Dataset name") description: str | None = Field(None, description="Optional description") document_ids: list[str] = Field(..., min_length=1, description="Document UUIDs to include") train_ratio: float = Field(0.8, ge=0.1, le=0.95, description="Training split ratio") val_ratio: float = Field(0.1, ge=0.05, le=0.5, description="Validation split ratio") seed: int = Field(42, description="Random seed for split") class DatasetDocumentItem(BaseModel): """Document within a dataset.""" document_id: str split: str page_count: int annotation_count: int class DatasetResponse(BaseModel): """Response after creating a dataset.""" dataset_id: str name: str status: str message: str class DatasetDetailResponse(BaseModel): """Detailed dataset info with documents.""" dataset_id: str name: str description: str | None status: str train_ratio: float val_ratio: float seed: int total_documents: int total_images: int total_annotations: int dataset_path: str | None error_message: str | None documents: list[DatasetDocumentItem] created_at: datetime updated_at: datetime class DatasetListItem(BaseModel): """Dataset in list view.""" dataset_id: str name: str description: str | None status: str total_documents: int total_images: int total_annotations: int created_at: datetime class DatasetListResponse(BaseModel): """Paginated dataset list.""" total: int limit: int offset: int datasets: list[DatasetListItem] class DatasetTrainRequest(BaseModel): """Request to start training from a dataset.""" name: str = Field(..., min_length=1, max_length=255, description="Training task name") config: TrainingConfig = Field(..., description="Training configuration")