re-structure

This commit is contained in:
Yaojia Wang
2026-02-01 22:55:31 +01:00
parent 400b12a967
commit b602d0a340
176 changed files with 856 additions and 853 deletions

View File

@@ -12,7 +12,7 @@ from uuid import uuid4
import pytest
from inference.data.admin_models import (
from backend.data.admin_models import (
AdminAnnotation,
AdminDocument,
TrainingDataset,
@@ -105,7 +105,7 @@ class TestDatasetBuilder:
sample_documents, sample_annotations
):
"""Dataset builder should create images/ and labels/ with train/val/test subdirs."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
dataset_dir = tmp_path / "datasets" / "test"
builder = DatasetBuilder(
@@ -141,7 +141,7 @@ class TestDatasetBuilder:
sample_documents, sample_annotations
):
"""Images should be copied from admin_images to dataset folder."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -177,7 +177,7 @@ class TestDatasetBuilder:
sample_documents, sample_annotations
):
"""YOLO label files should be generated with correct format."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -221,7 +221,7 @@ class TestDatasetBuilder:
sample_documents, sample_annotations
):
"""data.yaml should be generated with correct field classes."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -257,7 +257,7 @@ class TestDatasetBuilder:
sample_documents, sample_annotations
):
"""Documents should be split into train/val/test according to ratios."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -294,7 +294,7 @@ class TestDatasetBuilder:
sample_documents, sample_annotations
):
"""After successful build, dataset status should be updated to 'ready'."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -327,7 +327,7 @@ class TestDatasetBuilder:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""If build fails, dataset status should be set to 'failed'."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -357,7 +357,7 @@ class TestDatasetBuilder:
sample_documents, sample_annotations
):
"""Same seed should produce same splits."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
results = []
for _ in range(2):
@@ -405,7 +405,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""Documents with unique group_key are distributed across splits."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -433,7 +433,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""Documents with null/empty group_key are each treated as independent single-doc groups."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -461,7 +461,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""Documents with same group_key should be assigned to the same split."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -494,7 +494,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""Multi-doc groups should be split according to train/val/test ratios."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -536,7 +536,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""Mix of single-doc and multi-doc groups should be handled correctly."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -574,7 +574,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""Same seed should produce same split assignments."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -601,7 +601,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""Different seeds should potentially produce different split assignments."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -627,7 +627,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""Every document should be assigned a split."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -654,7 +654,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""Empty document list should return empty result."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -671,7 +671,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""When all groups have multiple docs, splits should follow ratios."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -707,7 +707,7 @@ class TestAssignSplitsByGroup:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""When all groups have single doc, they are distributed across splits."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
builder = DatasetBuilder(
datasets_repo=mock_datasets_repo,
@@ -798,7 +798,7 @@ class TestBuildDatasetWithGroupKey:
mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""build_dataset should use group_key for split assignment."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
tmp_path, docs = grouped_documents
@@ -847,7 +847,7 @@ class TestBuildDatasetWithGroupKey:
self, tmp_path, mock_datasets_repo, mock_documents_repo, mock_annotations_repo
):
"""All docs with same group_key should go to same split."""
from inference.web.services.dataset_builder import DatasetBuilder
from backend.web.services.dataset_builder import DatasetBuilder
# Create 5 docs all with same group_key
docs = []