86 lines
2.1 KiB
Python
86 lines
2.1 KiB
Python
"""Admin Dataset Schemas."""
|
|
|
|
from datetime import datetime
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
from .training import TrainingConfig
|
|
|
|
|
|
class DatasetCreateRequest(BaseModel):
|
|
"""Request to create a training dataset."""
|
|
|
|
name: str = Field(..., min_length=1, max_length=255, description="Dataset name")
|
|
description: str | None = Field(None, description="Optional description")
|
|
document_ids: list[str] = Field(..., min_length=1, description="Document UUIDs to include")
|
|
train_ratio: float = Field(0.8, ge=0.1, le=0.95, description="Training split ratio")
|
|
val_ratio: float = Field(0.1, ge=0.05, le=0.5, description="Validation split ratio")
|
|
seed: int = Field(42, description="Random seed for split")
|
|
|
|
|
|
class DatasetDocumentItem(BaseModel):
|
|
"""Document within a dataset."""
|
|
|
|
document_id: str
|
|
split: str
|
|
page_count: int
|
|
annotation_count: int
|
|
|
|
|
|
class DatasetResponse(BaseModel):
|
|
"""Response after creating a dataset."""
|
|
|
|
dataset_id: str
|
|
name: str
|
|
status: str
|
|
message: str
|
|
|
|
|
|
class DatasetDetailResponse(BaseModel):
|
|
"""Detailed dataset info with documents."""
|
|
|
|
dataset_id: str
|
|
name: str
|
|
description: str | None
|
|
status: str
|
|
train_ratio: float
|
|
val_ratio: float
|
|
seed: int
|
|
total_documents: int
|
|
total_images: int
|
|
total_annotations: int
|
|
dataset_path: str | None
|
|
error_message: str | None
|
|
documents: list[DatasetDocumentItem]
|
|
created_at: datetime
|
|
updated_at: datetime
|
|
|
|
|
|
class DatasetListItem(BaseModel):
|
|
"""Dataset in list view."""
|
|
|
|
dataset_id: str
|
|
name: str
|
|
description: str | None
|
|
status: str
|
|
total_documents: int
|
|
total_images: int
|
|
total_annotations: int
|
|
created_at: datetime
|
|
|
|
|
|
class DatasetListResponse(BaseModel):
|
|
"""Paginated dataset list."""
|
|
|
|
total: int
|
|
limit: int
|
|
offset: int
|
|
datasets: list[DatasetListItem]
|
|
|
|
|
|
class DatasetTrainRequest(BaseModel):
|
|
"""Request to start training from a dataset."""
|
|
|
|
name: str = Field(..., min_length=1, max_length=255, description="Training task name")
|
|
config: TrainingConfig = Field(..., description="Training configuration")
|