159 lines
4.5 KiB
Python
159 lines
4.5 KiB
Python
"""
|
|
Storage path prefixes for unified file organization.
|
|
|
|
Provides standardized path prefixes for organizing files within
|
|
the storage backend, ensuring consistent structure across
|
|
local, Azure Blob, and S3 storage.
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class StoragePrefixes:
|
|
"""Standardized storage path prefixes.
|
|
|
|
All paths are relative to the storage backend root.
|
|
These prefixes ensure consistent file organization across
|
|
all storage backends (local, Azure, S3).
|
|
|
|
Usage:
|
|
from shared.storage.prefixes import PREFIXES
|
|
|
|
path = f"{PREFIXES.DOCUMENTS}/{document_id}.pdf"
|
|
storage.upload_bytes(content, path)
|
|
"""
|
|
|
|
# Document storage
|
|
DOCUMENTS: str = "documents"
|
|
"""Original document files (PDFs, etc.)"""
|
|
|
|
IMAGES: str = "images"
|
|
"""Page images extracted from documents"""
|
|
|
|
# Processing directories
|
|
UPLOADS: str = "uploads"
|
|
"""Temporary upload staging area"""
|
|
|
|
RESULTS: str = "results"
|
|
"""Inference results and visualizations"""
|
|
|
|
EXPORTS: str = "exports"
|
|
"""Exported datasets and annotations"""
|
|
|
|
# Training data
|
|
DATASETS: str = "datasets"
|
|
"""Training dataset files"""
|
|
|
|
MODELS: str = "models"
|
|
"""Trained model weights and checkpoints"""
|
|
|
|
# Data pipeline directories (legacy compatibility)
|
|
RAW_PDFS: str = "raw_pdfs"
|
|
"""Raw PDF files for auto-labeling pipeline"""
|
|
|
|
STRUCTURED_DATA: str = "structured_data"
|
|
"""CSV/structured data for matching"""
|
|
|
|
ADMIN_IMAGES: str = "admin_images"
|
|
"""Admin UI page images"""
|
|
|
|
@staticmethod
|
|
def document_path(document_id: str, extension: str = ".pdf") -> str:
|
|
"""Get path for a document file.
|
|
|
|
Args:
|
|
document_id: Unique document identifier.
|
|
extension: File extension (include leading dot).
|
|
|
|
Returns:
|
|
Storage path like "documents/abc123.pdf"
|
|
"""
|
|
ext = extension if extension.startswith(".") else f".{extension}"
|
|
return f"{PREFIXES.DOCUMENTS}/{document_id}{ext}"
|
|
|
|
@staticmethod
|
|
def image_path(document_id: str, page_num: int, extension: str = ".png") -> str:
|
|
"""Get path for a page image file.
|
|
|
|
Args:
|
|
document_id: Unique document identifier.
|
|
page_num: Page number (1-indexed).
|
|
extension: File extension (include leading dot).
|
|
|
|
Returns:
|
|
Storage path like "images/abc123/page_1.png"
|
|
"""
|
|
ext = extension if extension.startswith(".") else f".{extension}"
|
|
return f"{PREFIXES.IMAGES}/{document_id}/page_{page_num}{ext}"
|
|
|
|
@staticmethod
|
|
def upload_path(filename: str, subfolder: str | None = None) -> str:
|
|
"""Get path for a temporary upload file.
|
|
|
|
Args:
|
|
filename: Original filename.
|
|
subfolder: Optional subfolder (e.g., "async").
|
|
|
|
Returns:
|
|
Storage path like "uploads/filename.pdf" or "uploads/async/filename.pdf"
|
|
"""
|
|
if subfolder:
|
|
return f"{PREFIXES.UPLOADS}/{subfolder}/{filename}"
|
|
return f"{PREFIXES.UPLOADS}/{filename}"
|
|
|
|
@staticmethod
|
|
def result_path(filename: str) -> str:
|
|
"""Get path for a result file.
|
|
|
|
Args:
|
|
filename: Result filename.
|
|
|
|
Returns:
|
|
Storage path like "results/filename.json"
|
|
"""
|
|
return f"{PREFIXES.RESULTS}/{filename}"
|
|
|
|
@staticmethod
|
|
def export_path(export_id: str, filename: str) -> str:
|
|
"""Get path for an export file.
|
|
|
|
Args:
|
|
export_id: Unique export identifier.
|
|
filename: Export filename.
|
|
|
|
Returns:
|
|
Storage path like "exports/abc123/filename.zip"
|
|
"""
|
|
return f"{PREFIXES.EXPORTS}/{export_id}/{filename}"
|
|
|
|
@staticmethod
|
|
def dataset_path(dataset_id: str, filename: str) -> str:
|
|
"""Get path for a dataset file.
|
|
|
|
Args:
|
|
dataset_id: Unique dataset identifier.
|
|
filename: Dataset filename.
|
|
|
|
Returns:
|
|
Storage path like "datasets/abc123/filename.yaml"
|
|
"""
|
|
return f"{PREFIXES.DATASETS}/{dataset_id}/{filename}"
|
|
|
|
@staticmethod
|
|
def model_path(version: str, filename: str) -> str:
|
|
"""Get path for a model file.
|
|
|
|
Args:
|
|
version: Model version string.
|
|
filename: Model filename.
|
|
|
|
Returns:
|
|
Storage path like "models/v1.0.0/best.pt"
|
|
"""
|
|
return f"{PREFIXES.MODELS}/{version}/{filename}"
|
|
|
|
|
|
# Default instance for convenient access
|
|
PREFIXES = StoragePrefixes()
|