From 15533285c65fa974ec410fa879b9c8c1a8444f14 Mon Sep 17 00:00:00 2001 From: Yaojia Wang Date: Wed, 4 Feb 2026 23:30:06 +0100 Subject: [PATCH] Fix the skill --- .claude/skills/backend-patterns/SKILL.md | 480 ++++++------- .claude/skills/coding-standards/SKILL.md | 779 +++++---------------- .opencode/skills/backend-patterns/SKILL.md | 480 ++++++------- .opencode/skills/coding-standards/SKILL.md | 779 +++++---------------- 4 files changed, 788 insertions(+), 1730 deletions(-) diff --git a/.claude/skills/backend-patterns/SKILL.md b/.claude/skills/backend-patterns/SKILL.md index 53bf07e..8111b1d 100644 --- a/.claude/skills/backend-patterns/SKILL.md +++ b/.claude/skills/backend-patterns/SKILL.md @@ -1,314 +1,274 @@ -# Backend Development Patterns +# .NET Development Best Practices -Backend architecture patterns for Python/FastAPI/PostgreSQL applications. - -## API Design - -### RESTful Structure +## Project Structure ``` -GET /api/v1/documents # List -GET /api/v1/documents/{id} # Get -POST /api/v1/documents # Create -PUT /api/v1/documents/{id} # Replace -PATCH /api/v1/documents/{id} # Update -DELETE /api/v1/documents/{id} # Delete - -GET /api/v1/documents?status=processed&sort=created_at&limit=20&offset=0 +src/ + Domain/ # Entities, value objects, domain events + Application/ # Use cases, DTOs, interfaces + Infrastructure/ # EF Core, external services + Api/ # Controllers, middleware, filters +tests/ + Unit/ + Integration/ ``` -### FastAPI Route Pattern +## Code Style -```python -from fastapi import APIRouter, HTTPException, Depends, Query, File, UploadFile -from pydantic import BaseModel +```csharp +// Use records for DTOs and value objects +public sealed record CreateDocumentRequest(string Name, string Type); -router = APIRouter(prefix="/api/v1", tags=["inference"]) +// Use primary constructors +public class DocumentService(IRepository repo, ILogger logger) +{ + public async Task GetAsync(Guid id, CancellationToken ct) => + await repo.GetByIdAsync(id, ct); +} -@router.post("/infer", response_model=ApiResponse[InferenceResult]) -async def infer_document( - file: UploadFile = File(...), - confidence_threshold: float = Query(0.5, ge=0, le=1), - service: InferenceService = Depends(get_inference_service) -) -> ApiResponse[InferenceResult]: - result = await service.process(file, confidence_threshold) - return ApiResponse(success=True, data=result) +// Prefer expression body for simple methods +public Document? FindById(Guid id) => _documents.FirstOrDefault(d => d.Id == id); + +// Use collection expressions +int[] numbers = [1, 2, 3]; +List names = ["Alice", "Bob"]; ``` -### Consistent Response Schema +## Async/Await -```python -from typing import Generic, TypeVar -T = TypeVar('T') +```csharp +// Always pass CancellationToken +public async Task GetAsync(Guid id, CancellationToken ct) -class ApiResponse(BaseModel, Generic[T]): - success: bool - data: T | None = None - error: str | None = None - meta: dict | None = None +// Use ConfigureAwait(false) in libraries +await _httpClient.GetAsync(url, ct).ConfigureAwait(false); + +// Avoid async void (except event handlers) +public async Task ProcessAsync() { } // Good +public async void Process() { } // Bad + +// Use ValueTask for hot paths with frequent sync completion +public ValueTask GetCachedCountAsync() ``` -## Core Patterns +## Dependency Injection -### Repository Pattern +```csharp +// Register by interface +builder.Services.AddScoped(); -```python -from typing import Protocol +// Use Options pattern for configuration +builder.Services.Configure(builder.Configuration.GetSection("App")); -class DocumentRepository(Protocol): - def find_all(self, filters: dict | None = None) -> list[Document]: ... - def find_by_id(self, id: str) -> Document | None: ... - def create(self, data: dict) -> Document: ... - def update(self, id: str, data: dict) -> Document: ... - def delete(self, id: str) -> None: ... +public class MyService(IOptions options) +{ + private readonly AppSettings _settings = options.Value; +} + +// Avoid service locator pattern +// Bad: var service = serviceProvider.GetService(); +// Good: Constructor injection ``` -### Service Layer +## Entity Framework Core -```python -class InferenceService: - def __init__(self, model_path: str, use_gpu: bool = True): - self.pipeline = InferencePipeline(model_path=model_path, use_gpu=use_gpu) +```csharp +// Always use AsNoTracking for read-only queries +await _context.Documents.AsNoTracking().ToListAsync(ct); - async def process(self, file: UploadFile, confidence_threshold: float) -> InferenceResult: - temp_path = self._save_temp_file(file) - try: - return self.pipeline.process_pdf(temp_path) - finally: - temp_path.unlink(missing_ok=True) -``` +// Use projection to select only needed fields +await _context.Documents + .Where(d => d.Status == "Active") + .Select(d => new DocumentDto(d.Id, d.Name)) + .ToListAsync(ct); -### Dependency Injection +// Prevent N+1 with Include or projection +await _context.Documents.Include(d => d.Labels).ToListAsync(ct); -```python -from functools import lru_cache -from pydantic_settings import BaseSettings +// Use explicit transactions for multiple operations +await using var tx = await _context.Database.BeginTransactionAsync(ct); -class Settings(BaseSettings): - db_host: str = "localhost" - db_password: str - model_path: str = "runs/train/invoice_fields/weights/best.pt" - class Config: - env_file = ".env" - -@lru_cache() -def get_settings() -> Settings: - return Settings() - -def get_inference_service(settings: Settings = Depends(get_settings)) -> InferenceService: - return InferenceService(model_path=settings.model_path) -``` - -## Database Patterns - -### Connection Pooling - -```python -from psycopg2 import pool -from contextlib import contextmanager - -db_pool = pool.ThreadedConnectionPool(minconn=2, maxconn=10, **db_config) - -@contextmanager -def get_db_connection(): - conn = db_pool.getconn() - try: - yield conn - finally: - db_pool.putconn(conn) -``` - -### Query Optimization - -```python -# GOOD: Select only needed columns -cur.execute(""" - SELECT id, status, fields->>'InvoiceNumber' as invoice_number - FROM documents WHERE status = %s - ORDER BY created_at DESC LIMIT %s -""", ('processed', 10)) - -# BAD: SELECT * FROM documents -``` - -### N+1 Prevention - -```python -# BAD: N+1 queries -for doc in documents: - doc.labels = get_labels(doc.id) # N queries - -# GOOD: Batch fetch with JOIN -cur.execute(""" - SELECT d.id, d.status, array_agg(l.label) as labels - FROM documents d - LEFT JOIN document_labels l ON d.id = l.document_id - GROUP BY d.id, d.status -""") -``` - -### Transaction Pattern - -```python -def create_document_with_labels(doc_data: dict, labels: list[dict]) -> str: - with get_db_connection() as conn: - try: - with conn.cursor() as cur: - cur.execute("INSERT INTO documents ... RETURNING id", ...) - doc_id = cur.fetchone()[0] - for label in labels: - cur.execute("INSERT INTO document_labels ...", ...) - conn.commit() - return doc_id - except Exception: - conn.rollback() - raise -``` - -## Caching - -```python -from cachetools import TTLCache - -_cache = TTLCache(maxsize=1000, ttl=300) - -def get_document_cached(doc_id: str) -> Document | None: - if doc_id in _cache: - return _cache[doc_id] - doc = repo.find_by_id(doc_id) - if doc: - _cache[doc_id] = doc - return doc +// Configure entities with IEntityTypeConfiguration +public class DocumentConfiguration : IEntityTypeConfiguration +{ + public void Configure(EntityTypeBuilder builder) + { + builder.HasKey(d => d.Id); + builder.Property(d => d.Name).HasMaxLength(200).IsRequired(); + builder.HasIndex(d => d.Status); + } +} ``` ## Error Handling -### Exception Hierarchy +```csharp +// Create domain-specific exceptions +public class NotFoundException(string resource, Guid id) + : Exception($"{resource} not found: {id}"); -```python -class AppError(Exception): - def __init__(self, message: str, status_code: int = 500): - self.message = message - self.status_code = status_code +// Use global exception handler +public class GlobalExceptionHandler(ILogger logger) : IExceptionHandler +{ + public async ValueTask TryHandleAsync(HttpContext ctx, Exception ex, CancellationToken ct) + { + logger.LogError(ex, "Error: {Message}", ex.Message); + ctx.Response.StatusCode = ex is NotFoundException ? 404 : 500; + await ctx.Response.WriteAsJsonAsync(new { error = ex.Message }, ct); + return true; + } +} -class NotFoundError(AppError): - def __init__(self, resource: str, id: str): - super().__init__(f"{resource} not found: {id}", 404) - -class ValidationError(AppError): - def __init__(self, message: str): - super().__init__(message, 400) +// Use Result pattern for expected failures +public Result Validate(CreateRequest request) => + string.IsNullOrEmpty(request.Name) + ? Result.Fail("Name is required") + : Result.Ok(new Document(request.Name)); ``` -### FastAPI Exception Handler +## Validation -```python -@app.exception_handler(AppError) -async def app_error_handler(request: Request, exc: AppError): - return JSONResponse(status_code=exc.status_code, content={"success": False, "error": exc.message}) +```csharp +// Use FluentValidation +public class CreateDocumentValidator : AbstractValidator +{ + public CreateDocumentValidator() + { + RuleFor(x => x.Name).NotEmpty().MaximumLength(200); + RuleFor(x => x.Type).Must(BeValidType).WithMessage("Invalid document type"); + } +} -@app.exception_handler(Exception) -async def generic_error_handler(request: Request, exc: Exception): - logger.error(f"Unexpected error: {exc}", exc_info=True) - return JSONResponse(status_code=500, content={"success": False, "error": "Internal server error"}) +// Or use Data Annotations for simple cases +public record CreateRequest( + [Required, MaxLength(200)] string Name, + [Range(1, 100)] int Quantity); ``` -### Retry with Backoff +## Logging -```python -async def retry_with_backoff(fn, max_retries: int = 3, base_delay: float = 1.0): - last_error = None - for attempt in range(max_retries): - try: - return await fn() if asyncio.iscoroutinefunction(fn) else fn() - except Exception as e: - last_error = e - if attempt < max_retries - 1: - await asyncio.sleep(base_delay * (2 ** attempt)) - raise last_error +```csharp +// Use structured logging with templates +logger.LogInformation("Processing document {DocumentId} for user {UserId}", docId, userId); + +// Use appropriate log levels +logger.LogDebug("Cache hit for key {Key}", key); // Development details +logger.LogInformation("Document {Id} created", id); // Normal operations +logger.LogWarning("Retry attempt {Attempt} for {Op}", n, op); // Potential issues +logger.LogError(ex, "Failed to process {DocumentId}", id); // Errors + +// Configure log filtering in appsettings +{ + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft.AspNetCore": "Warning", + "Microsoft.EntityFrameworkCore": "Warning" + } + } +} ``` -## Rate Limiting +## API Design -```python -from time import time -from collections import defaultdict +```csharp +[ApiController] +[Route("api/v1/[controller]")] +public class DocumentsController(IDocumentService service) : ControllerBase +{ + [HttpGet("{id:guid}")] + [ProducesResponseType(200)] + [ProducesResponseType(404)] + public async Task Get(Guid id, CancellationToken ct) + { + var doc = await service.GetAsync(id, ct); + return doc is null ? NotFound() : Ok(doc); + } -class RateLimiter: - def __init__(self): - self.requests: dict[str, list[float]] = defaultdict(list) - - def check_limit(self, identifier: str, max_requests: int, window_sec: int) -> bool: - now = time() - self.requests[identifier] = [t for t in self.requests[identifier] if now - t < window_sec] - if len(self.requests[identifier]) >= max_requests: - return False - self.requests[identifier].append(now) - return True - -limiter = RateLimiter() - -@app.middleware("http") -async def rate_limit_middleware(request: Request, call_next): - ip = request.client.host - if not limiter.check_limit(ip, max_requests=100, window_sec=60): - return JSONResponse(status_code=429, content={"error": "Rate limit exceeded"}) - return await call_next(request) + [HttpPost] + public async Task Create(CreateRequest request, CancellationToken ct) + { + var doc = await service.CreateAsync(request, ct); + return CreatedAtAction(nameof(Get), new { id = doc.Id }, doc); + } +} ``` -## Logging & Middleware +## Testing -### Request Logging +```csharp +// Use descriptive test names +[Fact] +public async Task GetById_WithValidId_ReturnsDocument() +{ + // Arrange + var repo = Substitute.For>(); + repo.GetByIdAsync(Arg.Any(), Arg.Any()) + .Returns(new Document("Test")); + var service = new DocumentService(repo); -```python -@app.middleware("http") -async def log_requests(request: Request, call_next): - request_id = str(uuid.uuid4())[:8] - start_time = time.time() - logger.info(f"[{request_id}] {request.method} {request.url.path}") - response = await call_next(request) - duration_ms = (time.time() - start_time) * 1000 - logger.info(f"[{request_id}] Completed {response.status_code} in {duration_ms:.2f}ms") - return response + // Act + var result = await service.GetAsync(Guid.NewGuid(), CancellationToken.None); + + // Assert + result.Should().NotBeNull(); + result!.Name.Should().Be("Test"); +} + +// Use WebApplicationFactory for integration tests +public class ApiTests(WebApplicationFactory factory) : IClassFixture> +{ + [Fact] + public async Task GetDocuments_ReturnsSuccess() + { + var client = factory.CreateClient(); + var response = await client.GetAsync("/api/v1/documents"); + response.StatusCode.Should().Be(HttpStatusCode.OK); + } +} ``` -### Structured Logging +## Performance -```python -class JSONFormatter(logging.Formatter): - def format(self, record): - return json.dumps({ - "timestamp": datetime.utcnow().isoformat(), - "level": record.levelname, - "message": record.getMessage(), - "module": record.module, - }) +```csharp +// Use IMemoryCache for frequently accessed data +public class CachedService(IMemoryCache cache, IRepository repo) +{ + public async Task GetAsync(Guid id, CancellationToken ct) => + await cache.GetOrCreateAsync($"doc:{id}", async entry => + { + entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(5); + return await repo.GetByIdAsync(id, ct); + }); +} + +// Use pagination for large collections +public async Task> GetPagedAsync(int page, int size, CancellationToken ct) => + new( + await _context.Documents.Skip((page - 1) * size).Take(size).ToListAsync(ct), + await _context.Documents.CountAsync(ct) + ); + +// Use IAsyncEnumerable for streaming large datasets +public async IAsyncEnumerable StreamAllAsync([EnumeratorCancellation] CancellationToken ct) +{ + await foreach (var doc in _context.Documents.AsAsyncEnumerable().WithCancellation(ct)) + yield return doc; +} ``` -## Background Tasks +## Security -```python -from fastapi import BackgroundTasks +```csharp +// Never hardcode secrets +var apiKey = builder.Configuration["ApiKey"]; // From environment/secrets -def send_notification(document_id: str, status: str): - logger.info(f"Notification: {document_id} -> {status}") +// Use parameterized queries (EF Core does this automatically) +// Bad: $"SELECT * FROM Users WHERE Id = {id}" +// Good: _context.Users.Where(u => u.Id == id) -@router.post("/infer") -async def infer(file: UploadFile, background_tasks: BackgroundTasks): - result = await process_document(file) - background_tasks.add_task(send_notification, result.document_id, "completed") - return result +// Validate and sanitize all inputs +// Use HTTPS in production +// Implement rate limiting +builder.Services.AddRateLimiter(options => { ... }); ``` - -## Key Principles - -- Repository pattern: Abstract data access -- Service layer: Business logic separated from routes -- Dependency injection via `Depends()` -- Connection pooling for database -- Parameterized queries only (no f-strings in SQL) -- Batch fetch to prevent N+1 -- Consistent `ApiResponse[T]` format -- Exception hierarchy with proper status codes -- Rate limit by IP -- Structured logging with request ID \ No newline at end of file diff --git a/.claude/skills/coding-standards/SKILL.md b/.claude/skills/coding-standards/SKILL.md index 4bb9b71..9fbb3ec 100644 --- a/.claude/skills/coding-standards/SKILL.md +++ b/.claude/skills/coding-standards/SKILL.md @@ -1,665 +1,234 @@ --- name: coding-standards -description: Universal coding standards, best practices, and patterns for Python, FastAPI, and data processing development. +description: .NET/C# coding standards and best practices. --- -# Coding Standards & Best Practices +# .NET Coding Standards -Python coding standards for the Invoice Master project. +## Core Principles -## Code Quality Principles +- **Readability First** - Clear names, self-documenting code +- **KISS** - Simplest solution that works +- **DRY** - Extract common logic, avoid copy-paste +- **YAGNI** - Don't build features before needed -### 1. Readability First -- Code is read more than written -- Clear variable and function names -- Self-documenting code preferred over comments -- Consistent formatting (follow PEP 8) +## Naming Conventions -### 2. KISS (Keep It Simple, Stupid) -- Simplest solution that works -- Avoid over-engineering -- No premature optimization -- Easy to understand > clever code +```csharp +// PascalCase: Types, methods, properties, public fields +public class DocumentService { } +public async Task GetByIdAsync(Guid id) { } +public string InvoiceNumber { get; init; } -### 3. DRY (Don't Repeat Yourself) -- Extract common logic into functions -- Create reusable utilities -- Share modules across the codebase -- Avoid copy-paste programming +// camelCase: Parameters, local variables, private fields with underscore +private readonly ILogger _logger; +public void Process(string documentId, int pageCount) { } -### 4. YAGNI (You Aren't Gonna Need It) -- Don't build features before they're needed -- Avoid speculative generality -- Add complexity only when required -- Start simple, refactor when needed +// Interfaces: I prefix +public interface IDocumentRepository { } -## Python Standards - -### Variable Naming - -```python -# GOOD: Descriptive names -invoice_number = "INV-2024-001" -is_valid_document = True -total_confidence_score = 0.95 - -# BAD: Unclear names -inv = "INV-2024-001" -flag = True -x = 0.95 +// Async methods: Async suffix +public async Task LoadAsync(CancellationToken ct) ``` -### Function Naming +## Modern C# Features -```python -# GOOD: Verb-noun pattern with type hints -def extract_invoice_fields(pdf_path: Path) -> dict[str, str]: - """Extract fields from invoice PDF.""" - ... +```csharp +// Records for DTOs and value objects +public sealed record CreateDocumentRequest(string Name, string Type); +public sealed record DocumentDto(Guid Id, string Name, DateTime CreatedAt); -def calculate_confidence(predictions: list[float]) -> float: - """Calculate average confidence score.""" - ... +// Primary constructors +public class DocumentService(IRepository repo, ILogger logger) +{ + public async Task GetAsync(Guid id, CancellationToken ct) => + await repo.GetByIdAsync(id, ct); +} -def is_valid_bankgiro(value: str) -> bool: - """Check if value is valid Bankgiro number.""" - ... +// Pattern matching +var message = result switch +{ + { IsSuccess: true, Value: var doc } => $"Found: {doc.Name}", + { Error: var err } => $"Error: {err}", + _ => "Unknown" +}; -# BAD: Unclear or noun-only -def invoice(path): - ... +// Collection expressions +int[] numbers = [1, 2, 3]; +List names = ["Alice", "Bob"]; -def confidence(p): - ... - -def bankgiro(v): - ... +// Null coalescing +var name = user?.Name ?? "Unknown"; +list ??= []; ``` -### Type Hints (REQUIRED) +## Immutability (Critical) -```python -# GOOD: Full type annotations -from typing import Optional -from pathlib import Path -from dataclasses import dataclass +```csharp +// GOOD: Create new objects +public record User(string Name, int Age) +{ + public User WithName(string newName) => this with { Name = newName }; +} -@dataclass -class InferenceResult: - document_id: str - fields: dict[str, str] - confidence: dict[str, float] - processing_time_ms: float +// GOOD: Immutable collections +public IReadOnlyList GetNames() => _names.AsReadOnly(); -def process_document( - pdf_path: Path, - confidence_threshold: float = 0.5 -) -> InferenceResult: - """Process PDF and return extracted fields.""" - ... - -# BAD: No type hints -def process_document(pdf_path, confidence_threshold=0.5): - ... +// BAD: Mutation +public void UpdateUser(User user, string name) +{ + user.Name = name; // MUTATION! +} ``` -### Immutability Pattern (CRITICAL) +## Error Handling -```python -# GOOD: Create new objects, don't mutate -def update_fields(fields: dict[str, str], updates: dict[str, str]) -> dict[str, str]: - return {**fields, **updates} +```csharp +// Domain exceptions +public class NotFoundException(string resource, Guid id) + : Exception($"{resource} not found: {id}"); -def add_item(items: list[str], new_item: str) -> list[str]: - return [*items, new_item] +// Comprehensive handling +public async Task LoadAsync(Guid id, CancellationToken ct) +{ + try + { + var doc = await _repo.GetByIdAsync(id, ct); + return doc ?? throw new NotFoundException("Document", id); + } + catch (Exception ex) when (ex is not NotFoundException) + { + _logger.LogError(ex, "Failed to load document {Id}", id); + throw; + } +} -# BAD: Direct mutation -def update_fields(fields: dict[str, str], updates: dict[str, str]) -> dict[str, str]: - fields.update(updates) # MUTATION! - return fields - -def add_item(items: list[str], new_item: str) -> list[str]: - items.append(new_item) # MUTATION! - return items +// Result pattern for expected failures +public Result Validate(CreateRequest request) => + string.IsNullOrEmpty(request.Name) + ? Result.Fail("Name required") + : Result.Ok(new Document(request.Name)); ``` -### Error Handling +## Async/Await -```python -import logging +```csharp +// Always pass CancellationToken +public async Task GetAsync(Guid id, CancellationToken ct) -logger = logging.getLogger(__name__) +// Use ConfigureAwait(false) in libraries +await _client.GetAsync(url, ct).ConfigureAwait(false); -# GOOD: Comprehensive error handling with logging -def load_model(model_path: Path) -> Model: - """Load YOLO model from path.""" - try: - if not model_path.exists(): - raise FileNotFoundError(f"Model not found: {model_path}") +// Avoid async void +public async Task ProcessAsync() { } // Good +public async void Process() { } // Bad - model = YOLO(str(model_path)) - logger.info(f"Model loaded: {model_path}") - return model - except Exception as e: - logger.error(f"Failed to load model: {e}") - raise RuntimeError(f"Model loading failed: {model_path}") from e - -# BAD: No error handling -def load_model(model_path): - return YOLO(str(model_path)) - -# BAD: Bare except -def load_model(model_path): - try: - return YOLO(str(model_path)) - except: # Never use bare except! - return None +// Parallel when independent +var tasks = ids.Select(id => GetAsync(id, ct)); +var results = await Task.WhenAll(tasks); ``` -### Async Best Practices +## LINQ Best Practices -```python -import asyncio +```csharp +// Prefer method syntax for complex queries +var result = documents + .Where(d => d.Status == "Active") + .OrderByDescending(d => d.CreatedAt) + .Select(d => new DocumentDto(d.Id, d.Name, d.CreatedAt)) + .Take(10); -# GOOD: Parallel execution when possible -async def process_batch(pdf_paths: list[Path]) -> list[InferenceResult]: - tasks = [process_document(path) for path in pdf_paths] - results = await asyncio.gather(*tasks, return_exceptions=True) +// Use Any() instead of Count() > 0 +if (documents.Any(d => d.IsValid)) { } - # Handle exceptions - valid_results = [] - for path, result in zip(pdf_paths, results): - if isinstance(result, Exception): - logger.error(f"Failed to process {path}: {result}") - else: - valid_results.append(result) - return valid_results - -# BAD: Sequential when unnecessary -async def process_batch(pdf_paths: list[Path]) -> list[InferenceResult]: - results = [] - for path in pdf_paths: - result = await process_document(path) - results.append(result) - return results -``` - -### Context Managers - -```python -from contextlib import contextmanager -from pathlib import Path -import tempfile - -# GOOD: Proper resource management -@contextmanager -def temp_pdf_copy(pdf_path: Path): - """Create temporary copy of PDF for processing.""" - with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: - tmp.write(pdf_path.read_bytes()) - tmp_path = Path(tmp.name) - try: - yield tmp_path - finally: - tmp_path.unlink(missing_ok=True) - -# Usage -with temp_pdf_copy(original_pdf) as tmp_pdf: - result = process_pdf(tmp_pdf) -``` - -## FastAPI Best Practices - -### Route Structure - -```python -from fastapi import APIRouter, HTTPException, Depends, Query, File, UploadFile -from pydantic import BaseModel - -router = APIRouter(prefix="/api/v1", tags=["inference"]) - -class InferenceResponse(BaseModel): - success: bool - document_id: str - fields: dict[str, str] - confidence: dict[str, float] - processing_time_ms: float - -@router.post("/infer", response_model=InferenceResponse) -async def infer_document( - file: UploadFile = File(...), - confidence_threshold: float = Query(0.5, ge=0.0, le=1.0) -) -> InferenceResponse: - """Process invoice PDF and extract fields.""" - if not file.filename.endswith(".pdf"): - raise HTTPException(status_code=400, detail="Only PDF files accepted") - - result = await inference_service.process(file, confidence_threshold) - return InferenceResponse( - success=True, - document_id=result.document_id, - fields=result.fields, - confidence=result.confidence, - processing_time_ms=result.processing_time_ms - ) -``` - -### Input Validation with Pydantic - -```python -from pydantic import BaseModel, Field, field_validator -from datetime import date -import re - -class InvoiceData(BaseModel): - invoice_number: str = Field(..., min_length=1, max_length=50) - invoice_date: date - amount: float = Field(..., gt=0) - bankgiro: str | None = None - ocr_number: str | None = None - - @field_validator("bankgiro") - @classmethod - def validate_bankgiro(cls, v: str | None) -> str | None: - if v is None: - return None - # Bankgiro: 7-8 digits - cleaned = re.sub(r"[^0-9]", "", v) - if not (7 <= len(cleaned) <= 8): - raise ValueError("Bankgiro must be 7-8 digits") - return cleaned - - @field_validator("ocr_number") - @classmethod - def validate_ocr(cls, v: str | None) -> str | None: - if v is None: - return None - # OCR: 2-25 digits - cleaned = re.sub(r"[^0-9]", "", v) - if not (2 <= len(cleaned) <= 25): - raise ValueError("OCR must be 2-25 digits") - return cleaned -``` - -### Response Format - -```python -from pydantic import BaseModel -from typing import Generic, TypeVar - -T = TypeVar("T") - -class ApiResponse(BaseModel, Generic[T]): - success: bool - data: T | None = None - error: str | None = None - meta: dict | None = None - -# Success response -return ApiResponse( - success=True, - data=result, - meta={"processing_time_ms": elapsed_ms} -) - -# Error response -return ApiResponse( - success=False, - error="Invalid PDF format" -) +// Avoid multiple enumerations +var list = documents.ToList(); // Materialize once +var count = list.Count; +var first = list.FirstOrDefault(); ``` ## File Organization -### Project Structure - ``` src/ -├── cli/ # Command-line interfaces -│ ├── autolabel.py -│ ├── train.py -│ └── infer.py -├── pdf/ # PDF processing -│ ├── extractor.py -│ └── renderer.py -├── ocr/ # OCR processing -│ ├── paddle_ocr.py -│ └── machine_code_parser.py -├── inference/ # Inference pipeline -│ ├── pipeline.py -│ ├── yolo_detector.py -│ └── field_extractor.py -├── normalize/ # Field normalization -│ ├── base.py -│ ├── date_normalizer.py -│ └── amount_normalizer.py -├── web/ # FastAPI application -│ ├── app.py -│ ├── routes.py -│ ├── services.py -│ └── schemas.py -└── utils/ # Shared utilities - ├── validators.py - ├── text_cleaner.py - └── logging.py -tests/ # Mirror of src structure - ├── test_pdf/ - ├── test_ocr/ - └── test_inference/ + Domain/ # Entities, value objects + Application/ # Use cases, DTOs, interfaces + Infrastructure/ # EF Core, external services + Api/ # Controllers, middleware +tests/ + Unit/ + Integration/ ``` -### File Naming +**Guidelines:** +- Max 800 lines per file (typical 200-400) +- Max 50 lines per method +- One class per file (except nested) +- Group by feature, not by type -``` -src/ocr/paddle_ocr.py # snake_case for modules -src/inference/yolo_detector.py # snake_case for modules -tests/test_paddle_ocr.py # test_ prefix for tests -config.py # snake_case for config +## Code Smells + +```csharp +// BAD: Deep nesting +if (doc != null) + if (doc.IsValid) + if (doc.HasFields) + // ... + +// GOOD: Early returns +if (doc is null) return null; +if (!doc.IsValid) return null; +if (!doc.HasFields) return null; +// ... + +// BAD: Magic numbers +if (confidence > 0.5) { } + +// GOOD: Named constants +private const double ConfidenceThreshold = 0.5; +if (confidence > ConfidenceThreshold) { } ``` -### Module Size Guidelines +## Logging -- **Maximum**: 800 lines per file -- **Typical**: 200-400 lines per file -- **Functions**: Max 50 lines each -- Extract utilities when modules grow too large +```csharp +// Structured logging with templates +_logger.LogInformation("Processing document {DocumentId}", docId); +_logger.LogError(ex, "Failed to process {DocumentId}", docId); -## Comments & Documentation - -### When to Comment - -```python -# GOOD: Explain WHY, not WHAT -# Swedish Bankgiro uses Luhn algorithm with weight [1,2,1,2...] -def validate_bankgiro_checksum(bankgiro: str) -> bool: - ... - -# Payment line format: 7 groups separated by #, checksum at end -def parse_payment_line(line: str) -> PaymentLineData: - ... - -# BAD: Stating the obvious -# Increment counter by 1 -count += 1 - -# Set name to user's name -name = user.name +// Appropriate levels +LogDebug // Development details +LogInformation // Normal operations +LogWarning // Potential issues +LogError // Errors with exceptions ``` -### Docstrings for Public APIs +## Testing (AAA Pattern) -```python -def extract_invoice_fields( - pdf_path: Path, - confidence_threshold: float = 0.5, - use_gpu: bool = True -) -> InferenceResult: - """Extract structured fields from Swedish invoice PDF. +```csharp +[Fact] +public async Task GetById_WithValidId_ReturnsDocument() +{ + // Arrange + var repo = Substitute.For>(); + repo.GetByIdAsync(Arg.Any(), Arg.Any()) + .Returns(new Document("Test")); + var service = new DocumentService(repo); - Uses YOLOv11 for field detection and PaddleOCR for text extraction. - Applies field-specific normalization and validation. + // Act + var result = await service.GetAsync(Guid.NewGuid(), CancellationToken.None); - Args: - pdf_path: Path to the invoice PDF file. - confidence_threshold: Minimum confidence for field detection (0.0-1.0). - use_gpu: Whether to use GPU acceleration. - - Returns: - InferenceResult containing extracted fields and confidence scores. - - Raises: - FileNotFoundError: If PDF file doesn't exist. - ProcessingError: If OCR or detection fails. - - Example: - >>> result = extract_invoice_fields(Path("invoice.pdf")) - >>> print(result.fields["invoice_number"]) - "INV-2024-001" - """ - ... + // Assert + result.Should().NotBeNull(); + result!.Name.Should().Be("Test"); +} ``` -## Performance Best Practices +## Key Rules -### Caching - -```python -from functools import lru_cache -from cachetools import TTLCache - -# Static data: LRU cache -@lru_cache(maxsize=100) -def get_field_config(field_name: str) -> FieldConfig: - """Load field configuration (cached).""" - return load_config(field_name) - -# Dynamic data: TTL cache -_document_cache = TTLCache(maxsize=1000, ttl=300) # 5 minutes - -def get_document_cached(doc_id: str) -> Document | None: - if doc_id in _document_cache: - return _document_cache[doc_id] - - doc = repo.find_by_id(doc_id) - if doc: - _document_cache[doc_id] = doc - return doc -``` - -### Database Queries - -```python -# GOOD: Select only needed columns -cur.execute(""" - SELECT id, status, fields->>'invoice_number' - FROM documents - WHERE status = %s - LIMIT %s -""", ('processed', 10)) - -# BAD: Select everything -cur.execute("SELECT * FROM documents") - -# GOOD: Batch operations -cur.executemany( - "INSERT INTO labels (doc_id, field, value) VALUES (%s, %s, %s)", - [(doc_id, f, v) for f, v in fields.items()] -) - -# BAD: Individual inserts in loop -for field, value in fields.items(): - cur.execute("INSERT INTO labels ...", (doc_id, field, value)) -``` - -### Lazy Loading - -```python -class InferencePipeline: - def __init__(self, model_path: Path): - self.model_path = model_path - self._model: YOLO | None = None - self._ocr: PaddleOCR | None = None - - @property - def model(self) -> YOLO: - """Lazy load YOLO model.""" - if self._model is None: - self._model = YOLO(str(self.model_path)) - return self._model - - @property - def ocr(self) -> PaddleOCR: - """Lazy load PaddleOCR.""" - if self._ocr is None: - self._ocr = PaddleOCR(use_angle_cls=True, lang="latin") - return self._ocr -``` - -## Testing Standards - -### Test Structure (AAA Pattern) - -```python -def test_extract_bankgiro_valid(): - # Arrange - text = "Bankgiro: 123-4567" - - # Act - result = extract_bankgiro(text) - - # Assert - assert result == "1234567" - -def test_extract_bankgiro_invalid_returns_none(): - # Arrange - text = "No bankgiro here" - - # Act - result = extract_bankgiro(text) - - # Assert - assert result is None -``` - -### Test Naming - -```python -# GOOD: Descriptive test names -def test_parse_payment_line_extracts_all_fields(): ... -def test_parse_payment_line_handles_missing_checksum(): ... -def test_validate_ocr_returns_false_for_invalid_checksum(): ... - -# BAD: Vague test names -def test_parse(): ... -def test_works(): ... -def test_payment_line(): ... -``` - -### Fixtures - -```python -import pytest -from pathlib import Path - -@pytest.fixture -def sample_invoice_pdf(tmp_path: Path) -> Path: - """Create sample invoice PDF for testing.""" - pdf_path = tmp_path / "invoice.pdf" - # Create test PDF... - return pdf_path - -@pytest.fixture -def inference_pipeline(sample_model_path: Path) -> InferencePipeline: - """Create inference pipeline with test model.""" - return InferencePipeline(sample_model_path) - -def test_process_invoice(inference_pipeline, sample_invoice_pdf): - result = inference_pipeline.process(sample_invoice_pdf) - assert result.fields.get("invoice_number") is not None -``` - -## Code Smell Detection - -### 1. Long Functions - -```python -# BAD: Function > 50 lines -def process_document(): - # 100 lines of code... - -# GOOD: Split into smaller functions -def process_document(pdf_path: Path) -> InferenceResult: - image = render_pdf(pdf_path) - detections = detect_fields(image) - ocr_results = extract_text(image, detections) - fields = normalize_fields(ocr_results) - return build_result(fields) -``` - -### 2. Deep Nesting - -```python -# BAD: 5+ levels of nesting -if document: - if document.is_valid: - if document.has_fields: - if field in document.fields: - if document.fields[field]: - # Do something - -# GOOD: Early returns -if not document: - return None -if not document.is_valid: - return None -if not document.has_fields: - return None -if field not in document.fields: - return None -if not document.fields[field]: - return None - -# Do something -``` - -### 3. Magic Numbers - -```python -# BAD: Unexplained numbers -if confidence > 0.5: - ... -time.sleep(3) - -# GOOD: Named constants -CONFIDENCE_THRESHOLD = 0.5 -RETRY_DELAY_SECONDS = 3 - -if confidence > CONFIDENCE_THRESHOLD: - ... -time.sleep(RETRY_DELAY_SECONDS) -``` - -### 4. Mutable Default Arguments - -```python -# BAD: Mutable default argument -def process_fields(fields: list = []): # DANGEROUS! - fields.append("new_field") - return fields - -# GOOD: Use None as default -def process_fields(fields: list | None = None) -> list: - if fields is None: - fields = [] - return [*fields, "new_field"] -``` - -## Logging Standards - -```python -import logging - -# Module-level logger -logger = logging.getLogger(__name__) - -# GOOD: Appropriate log levels -logger.debug("Processing document: %s", doc_id) -logger.info("Document processed successfully: %s", doc_id) -logger.warning("Low confidence score: %.2f", confidence) -logger.error("Failed to process document: %s", error) - -# GOOD: Structured logging with extra data -logger.info( - "Inference complete", - extra={ - "document_id": doc_id, - "field_count": len(fields), - "processing_time_ms": elapsed_ms - } -) - -# BAD: Using print() -print(f"Processing {doc_id}") # Never in production! -``` - -**Remember**: Code quality is not negotiable. Clear, maintainable Python code with proper type hints enables confident development and refactoring. +- Always use `CancellationToken` for async methods +- Prefer `records` for DTOs and immutable data +- Use `IReadOnlyList` for return types +- Never use `async void` (except event handlers) +- Always handle `null` with pattern matching or null operators +- Use structured logging, never `Console.WriteLine` diff --git a/.opencode/skills/backend-patterns/SKILL.md b/.opencode/skills/backend-patterns/SKILL.md index 53bf07e..8111b1d 100644 --- a/.opencode/skills/backend-patterns/SKILL.md +++ b/.opencode/skills/backend-patterns/SKILL.md @@ -1,314 +1,274 @@ -# Backend Development Patterns +# .NET Development Best Practices -Backend architecture patterns for Python/FastAPI/PostgreSQL applications. - -## API Design - -### RESTful Structure +## Project Structure ``` -GET /api/v1/documents # List -GET /api/v1/documents/{id} # Get -POST /api/v1/documents # Create -PUT /api/v1/documents/{id} # Replace -PATCH /api/v1/documents/{id} # Update -DELETE /api/v1/documents/{id} # Delete - -GET /api/v1/documents?status=processed&sort=created_at&limit=20&offset=0 +src/ + Domain/ # Entities, value objects, domain events + Application/ # Use cases, DTOs, interfaces + Infrastructure/ # EF Core, external services + Api/ # Controllers, middleware, filters +tests/ + Unit/ + Integration/ ``` -### FastAPI Route Pattern +## Code Style -```python -from fastapi import APIRouter, HTTPException, Depends, Query, File, UploadFile -from pydantic import BaseModel +```csharp +// Use records for DTOs and value objects +public sealed record CreateDocumentRequest(string Name, string Type); -router = APIRouter(prefix="/api/v1", tags=["inference"]) +// Use primary constructors +public class DocumentService(IRepository repo, ILogger logger) +{ + public async Task GetAsync(Guid id, CancellationToken ct) => + await repo.GetByIdAsync(id, ct); +} -@router.post("/infer", response_model=ApiResponse[InferenceResult]) -async def infer_document( - file: UploadFile = File(...), - confidence_threshold: float = Query(0.5, ge=0, le=1), - service: InferenceService = Depends(get_inference_service) -) -> ApiResponse[InferenceResult]: - result = await service.process(file, confidence_threshold) - return ApiResponse(success=True, data=result) +// Prefer expression body for simple methods +public Document? FindById(Guid id) => _documents.FirstOrDefault(d => d.Id == id); + +// Use collection expressions +int[] numbers = [1, 2, 3]; +List names = ["Alice", "Bob"]; ``` -### Consistent Response Schema +## Async/Await -```python -from typing import Generic, TypeVar -T = TypeVar('T') +```csharp +// Always pass CancellationToken +public async Task GetAsync(Guid id, CancellationToken ct) -class ApiResponse(BaseModel, Generic[T]): - success: bool - data: T | None = None - error: str | None = None - meta: dict | None = None +// Use ConfigureAwait(false) in libraries +await _httpClient.GetAsync(url, ct).ConfigureAwait(false); + +// Avoid async void (except event handlers) +public async Task ProcessAsync() { } // Good +public async void Process() { } // Bad + +// Use ValueTask for hot paths with frequent sync completion +public ValueTask GetCachedCountAsync() ``` -## Core Patterns +## Dependency Injection -### Repository Pattern +```csharp +// Register by interface +builder.Services.AddScoped(); -```python -from typing import Protocol +// Use Options pattern for configuration +builder.Services.Configure(builder.Configuration.GetSection("App")); -class DocumentRepository(Protocol): - def find_all(self, filters: dict | None = None) -> list[Document]: ... - def find_by_id(self, id: str) -> Document | None: ... - def create(self, data: dict) -> Document: ... - def update(self, id: str, data: dict) -> Document: ... - def delete(self, id: str) -> None: ... +public class MyService(IOptions options) +{ + private readonly AppSettings _settings = options.Value; +} + +// Avoid service locator pattern +// Bad: var service = serviceProvider.GetService(); +// Good: Constructor injection ``` -### Service Layer +## Entity Framework Core -```python -class InferenceService: - def __init__(self, model_path: str, use_gpu: bool = True): - self.pipeline = InferencePipeline(model_path=model_path, use_gpu=use_gpu) +```csharp +// Always use AsNoTracking for read-only queries +await _context.Documents.AsNoTracking().ToListAsync(ct); - async def process(self, file: UploadFile, confidence_threshold: float) -> InferenceResult: - temp_path = self._save_temp_file(file) - try: - return self.pipeline.process_pdf(temp_path) - finally: - temp_path.unlink(missing_ok=True) -``` +// Use projection to select only needed fields +await _context.Documents + .Where(d => d.Status == "Active") + .Select(d => new DocumentDto(d.Id, d.Name)) + .ToListAsync(ct); -### Dependency Injection +// Prevent N+1 with Include or projection +await _context.Documents.Include(d => d.Labels).ToListAsync(ct); -```python -from functools import lru_cache -from pydantic_settings import BaseSettings +// Use explicit transactions for multiple operations +await using var tx = await _context.Database.BeginTransactionAsync(ct); -class Settings(BaseSettings): - db_host: str = "localhost" - db_password: str - model_path: str = "runs/train/invoice_fields/weights/best.pt" - class Config: - env_file = ".env" - -@lru_cache() -def get_settings() -> Settings: - return Settings() - -def get_inference_service(settings: Settings = Depends(get_settings)) -> InferenceService: - return InferenceService(model_path=settings.model_path) -``` - -## Database Patterns - -### Connection Pooling - -```python -from psycopg2 import pool -from contextlib import contextmanager - -db_pool = pool.ThreadedConnectionPool(minconn=2, maxconn=10, **db_config) - -@contextmanager -def get_db_connection(): - conn = db_pool.getconn() - try: - yield conn - finally: - db_pool.putconn(conn) -``` - -### Query Optimization - -```python -# GOOD: Select only needed columns -cur.execute(""" - SELECT id, status, fields->>'InvoiceNumber' as invoice_number - FROM documents WHERE status = %s - ORDER BY created_at DESC LIMIT %s -""", ('processed', 10)) - -# BAD: SELECT * FROM documents -``` - -### N+1 Prevention - -```python -# BAD: N+1 queries -for doc in documents: - doc.labels = get_labels(doc.id) # N queries - -# GOOD: Batch fetch with JOIN -cur.execute(""" - SELECT d.id, d.status, array_agg(l.label) as labels - FROM documents d - LEFT JOIN document_labels l ON d.id = l.document_id - GROUP BY d.id, d.status -""") -``` - -### Transaction Pattern - -```python -def create_document_with_labels(doc_data: dict, labels: list[dict]) -> str: - with get_db_connection() as conn: - try: - with conn.cursor() as cur: - cur.execute("INSERT INTO documents ... RETURNING id", ...) - doc_id = cur.fetchone()[0] - for label in labels: - cur.execute("INSERT INTO document_labels ...", ...) - conn.commit() - return doc_id - except Exception: - conn.rollback() - raise -``` - -## Caching - -```python -from cachetools import TTLCache - -_cache = TTLCache(maxsize=1000, ttl=300) - -def get_document_cached(doc_id: str) -> Document | None: - if doc_id in _cache: - return _cache[doc_id] - doc = repo.find_by_id(doc_id) - if doc: - _cache[doc_id] = doc - return doc +// Configure entities with IEntityTypeConfiguration +public class DocumentConfiguration : IEntityTypeConfiguration +{ + public void Configure(EntityTypeBuilder builder) + { + builder.HasKey(d => d.Id); + builder.Property(d => d.Name).HasMaxLength(200).IsRequired(); + builder.HasIndex(d => d.Status); + } +} ``` ## Error Handling -### Exception Hierarchy +```csharp +// Create domain-specific exceptions +public class NotFoundException(string resource, Guid id) + : Exception($"{resource} not found: {id}"); -```python -class AppError(Exception): - def __init__(self, message: str, status_code: int = 500): - self.message = message - self.status_code = status_code +// Use global exception handler +public class GlobalExceptionHandler(ILogger logger) : IExceptionHandler +{ + public async ValueTask TryHandleAsync(HttpContext ctx, Exception ex, CancellationToken ct) + { + logger.LogError(ex, "Error: {Message}", ex.Message); + ctx.Response.StatusCode = ex is NotFoundException ? 404 : 500; + await ctx.Response.WriteAsJsonAsync(new { error = ex.Message }, ct); + return true; + } +} -class NotFoundError(AppError): - def __init__(self, resource: str, id: str): - super().__init__(f"{resource} not found: {id}", 404) - -class ValidationError(AppError): - def __init__(self, message: str): - super().__init__(message, 400) +// Use Result pattern for expected failures +public Result Validate(CreateRequest request) => + string.IsNullOrEmpty(request.Name) + ? Result.Fail("Name is required") + : Result.Ok(new Document(request.Name)); ``` -### FastAPI Exception Handler +## Validation -```python -@app.exception_handler(AppError) -async def app_error_handler(request: Request, exc: AppError): - return JSONResponse(status_code=exc.status_code, content={"success": False, "error": exc.message}) +```csharp +// Use FluentValidation +public class CreateDocumentValidator : AbstractValidator +{ + public CreateDocumentValidator() + { + RuleFor(x => x.Name).NotEmpty().MaximumLength(200); + RuleFor(x => x.Type).Must(BeValidType).WithMessage("Invalid document type"); + } +} -@app.exception_handler(Exception) -async def generic_error_handler(request: Request, exc: Exception): - logger.error(f"Unexpected error: {exc}", exc_info=True) - return JSONResponse(status_code=500, content={"success": False, "error": "Internal server error"}) +// Or use Data Annotations for simple cases +public record CreateRequest( + [Required, MaxLength(200)] string Name, + [Range(1, 100)] int Quantity); ``` -### Retry with Backoff +## Logging -```python -async def retry_with_backoff(fn, max_retries: int = 3, base_delay: float = 1.0): - last_error = None - for attempt in range(max_retries): - try: - return await fn() if asyncio.iscoroutinefunction(fn) else fn() - except Exception as e: - last_error = e - if attempt < max_retries - 1: - await asyncio.sleep(base_delay * (2 ** attempt)) - raise last_error +```csharp +// Use structured logging with templates +logger.LogInformation("Processing document {DocumentId} for user {UserId}", docId, userId); + +// Use appropriate log levels +logger.LogDebug("Cache hit for key {Key}", key); // Development details +logger.LogInformation("Document {Id} created", id); // Normal operations +logger.LogWarning("Retry attempt {Attempt} for {Op}", n, op); // Potential issues +logger.LogError(ex, "Failed to process {DocumentId}", id); // Errors + +// Configure log filtering in appsettings +{ + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft.AspNetCore": "Warning", + "Microsoft.EntityFrameworkCore": "Warning" + } + } +} ``` -## Rate Limiting +## API Design -```python -from time import time -from collections import defaultdict +```csharp +[ApiController] +[Route("api/v1/[controller]")] +public class DocumentsController(IDocumentService service) : ControllerBase +{ + [HttpGet("{id:guid}")] + [ProducesResponseType(200)] + [ProducesResponseType(404)] + public async Task Get(Guid id, CancellationToken ct) + { + var doc = await service.GetAsync(id, ct); + return doc is null ? NotFound() : Ok(doc); + } -class RateLimiter: - def __init__(self): - self.requests: dict[str, list[float]] = defaultdict(list) - - def check_limit(self, identifier: str, max_requests: int, window_sec: int) -> bool: - now = time() - self.requests[identifier] = [t for t in self.requests[identifier] if now - t < window_sec] - if len(self.requests[identifier]) >= max_requests: - return False - self.requests[identifier].append(now) - return True - -limiter = RateLimiter() - -@app.middleware("http") -async def rate_limit_middleware(request: Request, call_next): - ip = request.client.host - if not limiter.check_limit(ip, max_requests=100, window_sec=60): - return JSONResponse(status_code=429, content={"error": "Rate limit exceeded"}) - return await call_next(request) + [HttpPost] + public async Task Create(CreateRequest request, CancellationToken ct) + { + var doc = await service.CreateAsync(request, ct); + return CreatedAtAction(nameof(Get), new { id = doc.Id }, doc); + } +} ``` -## Logging & Middleware +## Testing -### Request Logging +```csharp +// Use descriptive test names +[Fact] +public async Task GetById_WithValidId_ReturnsDocument() +{ + // Arrange + var repo = Substitute.For>(); + repo.GetByIdAsync(Arg.Any(), Arg.Any()) + .Returns(new Document("Test")); + var service = new DocumentService(repo); -```python -@app.middleware("http") -async def log_requests(request: Request, call_next): - request_id = str(uuid.uuid4())[:8] - start_time = time.time() - logger.info(f"[{request_id}] {request.method} {request.url.path}") - response = await call_next(request) - duration_ms = (time.time() - start_time) * 1000 - logger.info(f"[{request_id}] Completed {response.status_code} in {duration_ms:.2f}ms") - return response + // Act + var result = await service.GetAsync(Guid.NewGuid(), CancellationToken.None); + + // Assert + result.Should().NotBeNull(); + result!.Name.Should().Be("Test"); +} + +// Use WebApplicationFactory for integration tests +public class ApiTests(WebApplicationFactory factory) : IClassFixture> +{ + [Fact] + public async Task GetDocuments_ReturnsSuccess() + { + var client = factory.CreateClient(); + var response = await client.GetAsync("/api/v1/documents"); + response.StatusCode.Should().Be(HttpStatusCode.OK); + } +} ``` -### Structured Logging +## Performance -```python -class JSONFormatter(logging.Formatter): - def format(self, record): - return json.dumps({ - "timestamp": datetime.utcnow().isoformat(), - "level": record.levelname, - "message": record.getMessage(), - "module": record.module, - }) +```csharp +// Use IMemoryCache for frequently accessed data +public class CachedService(IMemoryCache cache, IRepository repo) +{ + public async Task GetAsync(Guid id, CancellationToken ct) => + await cache.GetOrCreateAsync($"doc:{id}", async entry => + { + entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(5); + return await repo.GetByIdAsync(id, ct); + }); +} + +// Use pagination for large collections +public async Task> GetPagedAsync(int page, int size, CancellationToken ct) => + new( + await _context.Documents.Skip((page - 1) * size).Take(size).ToListAsync(ct), + await _context.Documents.CountAsync(ct) + ); + +// Use IAsyncEnumerable for streaming large datasets +public async IAsyncEnumerable StreamAllAsync([EnumeratorCancellation] CancellationToken ct) +{ + await foreach (var doc in _context.Documents.AsAsyncEnumerable().WithCancellation(ct)) + yield return doc; +} ``` -## Background Tasks +## Security -```python -from fastapi import BackgroundTasks +```csharp +// Never hardcode secrets +var apiKey = builder.Configuration["ApiKey"]; // From environment/secrets -def send_notification(document_id: str, status: str): - logger.info(f"Notification: {document_id} -> {status}") +// Use parameterized queries (EF Core does this automatically) +// Bad: $"SELECT * FROM Users WHERE Id = {id}" +// Good: _context.Users.Where(u => u.Id == id) -@router.post("/infer") -async def infer(file: UploadFile, background_tasks: BackgroundTasks): - result = await process_document(file) - background_tasks.add_task(send_notification, result.document_id, "completed") - return result +// Validate and sanitize all inputs +// Use HTTPS in production +// Implement rate limiting +builder.Services.AddRateLimiter(options => { ... }); ``` - -## Key Principles - -- Repository pattern: Abstract data access -- Service layer: Business logic separated from routes -- Dependency injection via `Depends()` -- Connection pooling for database -- Parameterized queries only (no f-strings in SQL) -- Batch fetch to prevent N+1 -- Consistent `ApiResponse[T]` format -- Exception hierarchy with proper status codes -- Rate limit by IP -- Structured logging with request ID \ No newline at end of file diff --git a/.opencode/skills/coding-standards/SKILL.md b/.opencode/skills/coding-standards/SKILL.md index 4bb9b71..9fbb3ec 100644 --- a/.opencode/skills/coding-standards/SKILL.md +++ b/.opencode/skills/coding-standards/SKILL.md @@ -1,665 +1,234 @@ --- name: coding-standards -description: Universal coding standards, best practices, and patterns for Python, FastAPI, and data processing development. +description: .NET/C# coding standards and best practices. --- -# Coding Standards & Best Practices +# .NET Coding Standards -Python coding standards for the Invoice Master project. +## Core Principles -## Code Quality Principles +- **Readability First** - Clear names, self-documenting code +- **KISS** - Simplest solution that works +- **DRY** - Extract common logic, avoid copy-paste +- **YAGNI** - Don't build features before needed -### 1. Readability First -- Code is read more than written -- Clear variable and function names -- Self-documenting code preferred over comments -- Consistent formatting (follow PEP 8) +## Naming Conventions -### 2. KISS (Keep It Simple, Stupid) -- Simplest solution that works -- Avoid over-engineering -- No premature optimization -- Easy to understand > clever code +```csharp +// PascalCase: Types, methods, properties, public fields +public class DocumentService { } +public async Task GetByIdAsync(Guid id) { } +public string InvoiceNumber { get; init; } -### 3. DRY (Don't Repeat Yourself) -- Extract common logic into functions -- Create reusable utilities -- Share modules across the codebase -- Avoid copy-paste programming +// camelCase: Parameters, local variables, private fields with underscore +private readonly ILogger _logger; +public void Process(string documentId, int pageCount) { } -### 4. YAGNI (You Aren't Gonna Need It) -- Don't build features before they're needed -- Avoid speculative generality -- Add complexity only when required -- Start simple, refactor when needed +// Interfaces: I prefix +public interface IDocumentRepository { } -## Python Standards - -### Variable Naming - -```python -# GOOD: Descriptive names -invoice_number = "INV-2024-001" -is_valid_document = True -total_confidence_score = 0.95 - -# BAD: Unclear names -inv = "INV-2024-001" -flag = True -x = 0.95 +// Async methods: Async suffix +public async Task LoadAsync(CancellationToken ct) ``` -### Function Naming +## Modern C# Features -```python -# GOOD: Verb-noun pattern with type hints -def extract_invoice_fields(pdf_path: Path) -> dict[str, str]: - """Extract fields from invoice PDF.""" - ... +```csharp +// Records for DTOs and value objects +public sealed record CreateDocumentRequest(string Name, string Type); +public sealed record DocumentDto(Guid Id, string Name, DateTime CreatedAt); -def calculate_confidence(predictions: list[float]) -> float: - """Calculate average confidence score.""" - ... +// Primary constructors +public class DocumentService(IRepository repo, ILogger logger) +{ + public async Task GetAsync(Guid id, CancellationToken ct) => + await repo.GetByIdAsync(id, ct); +} -def is_valid_bankgiro(value: str) -> bool: - """Check if value is valid Bankgiro number.""" - ... +// Pattern matching +var message = result switch +{ + { IsSuccess: true, Value: var doc } => $"Found: {doc.Name}", + { Error: var err } => $"Error: {err}", + _ => "Unknown" +}; -# BAD: Unclear or noun-only -def invoice(path): - ... +// Collection expressions +int[] numbers = [1, 2, 3]; +List names = ["Alice", "Bob"]; -def confidence(p): - ... - -def bankgiro(v): - ... +// Null coalescing +var name = user?.Name ?? "Unknown"; +list ??= []; ``` -### Type Hints (REQUIRED) +## Immutability (Critical) -```python -# GOOD: Full type annotations -from typing import Optional -from pathlib import Path -from dataclasses import dataclass +```csharp +// GOOD: Create new objects +public record User(string Name, int Age) +{ + public User WithName(string newName) => this with { Name = newName }; +} -@dataclass -class InferenceResult: - document_id: str - fields: dict[str, str] - confidence: dict[str, float] - processing_time_ms: float +// GOOD: Immutable collections +public IReadOnlyList GetNames() => _names.AsReadOnly(); -def process_document( - pdf_path: Path, - confidence_threshold: float = 0.5 -) -> InferenceResult: - """Process PDF and return extracted fields.""" - ... - -# BAD: No type hints -def process_document(pdf_path, confidence_threshold=0.5): - ... +// BAD: Mutation +public void UpdateUser(User user, string name) +{ + user.Name = name; // MUTATION! +} ``` -### Immutability Pattern (CRITICAL) +## Error Handling -```python -# GOOD: Create new objects, don't mutate -def update_fields(fields: dict[str, str], updates: dict[str, str]) -> dict[str, str]: - return {**fields, **updates} +```csharp +// Domain exceptions +public class NotFoundException(string resource, Guid id) + : Exception($"{resource} not found: {id}"); -def add_item(items: list[str], new_item: str) -> list[str]: - return [*items, new_item] +// Comprehensive handling +public async Task LoadAsync(Guid id, CancellationToken ct) +{ + try + { + var doc = await _repo.GetByIdAsync(id, ct); + return doc ?? throw new NotFoundException("Document", id); + } + catch (Exception ex) when (ex is not NotFoundException) + { + _logger.LogError(ex, "Failed to load document {Id}", id); + throw; + } +} -# BAD: Direct mutation -def update_fields(fields: dict[str, str], updates: dict[str, str]) -> dict[str, str]: - fields.update(updates) # MUTATION! - return fields - -def add_item(items: list[str], new_item: str) -> list[str]: - items.append(new_item) # MUTATION! - return items +// Result pattern for expected failures +public Result Validate(CreateRequest request) => + string.IsNullOrEmpty(request.Name) + ? Result.Fail("Name required") + : Result.Ok(new Document(request.Name)); ``` -### Error Handling +## Async/Await -```python -import logging +```csharp +// Always pass CancellationToken +public async Task GetAsync(Guid id, CancellationToken ct) -logger = logging.getLogger(__name__) +// Use ConfigureAwait(false) in libraries +await _client.GetAsync(url, ct).ConfigureAwait(false); -# GOOD: Comprehensive error handling with logging -def load_model(model_path: Path) -> Model: - """Load YOLO model from path.""" - try: - if not model_path.exists(): - raise FileNotFoundError(f"Model not found: {model_path}") +// Avoid async void +public async Task ProcessAsync() { } // Good +public async void Process() { } // Bad - model = YOLO(str(model_path)) - logger.info(f"Model loaded: {model_path}") - return model - except Exception as e: - logger.error(f"Failed to load model: {e}") - raise RuntimeError(f"Model loading failed: {model_path}") from e - -# BAD: No error handling -def load_model(model_path): - return YOLO(str(model_path)) - -# BAD: Bare except -def load_model(model_path): - try: - return YOLO(str(model_path)) - except: # Never use bare except! - return None +// Parallel when independent +var tasks = ids.Select(id => GetAsync(id, ct)); +var results = await Task.WhenAll(tasks); ``` -### Async Best Practices +## LINQ Best Practices -```python -import asyncio +```csharp +// Prefer method syntax for complex queries +var result = documents + .Where(d => d.Status == "Active") + .OrderByDescending(d => d.CreatedAt) + .Select(d => new DocumentDto(d.Id, d.Name, d.CreatedAt)) + .Take(10); -# GOOD: Parallel execution when possible -async def process_batch(pdf_paths: list[Path]) -> list[InferenceResult]: - tasks = [process_document(path) for path in pdf_paths] - results = await asyncio.gather(*tasks, return_exceptions=True) +// Use Any() instead of Count() > 0 +if (documents.Any(d => d.IsValid)) { } - # Handle exceptions - valid_results = [] - for path, result in zip(pdf_paths, results): - if isinstance(result, Exception): - logger.error(f"Failed to process {path}: {result}") - else: - valid_results.append(result) - return valid_results - -# BAD: Sequential when unnecessary -async def process_batch(pdf_paths: list[Path]) -> list[InferenceResult]: - results = [] - for path in pdf_paths: - result = await process_document(path) - results.append(result) - return results -``` - -### Context Managers - -```python -from contextlib import contextmanager -from pathlib import Path -import tempfile - -# GOOD: Proper resource management -@contextmanager -def temp_pdf_copy(pdf_path: Path): - """Create temporary copy of PDF for processing.""" - with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: - tmp.write(pdf_path.read_bytes()) - tmp_path = Path(tmp.name) - try: - yield tmp_path - finally: - tmp_path.unlink(missing_ok=True) - -# Usage -with temp_pdf_copy(original_pdf) as tmp_pdf: - result = process_pdf(tmp_pdf) -``` - -## FastAPI Best Practices - -### Route Structure - -```python -from fastapi import APIRouter, HTTPException, Depends, Query, File, UploadFile -from pydantic import BaseModel - -router = APIRouter(prefix="/api/v1", tags=["inference"]) - -class InferenceResponse(BaseModel): - success: bool - document_id: str - fields: dict[str, str] - confidence: dict[str, float] - processing_time_ms: float - -@router.post("/infer", response_model=InferenceResponse) -async def infer_document( - file: UploadFile = File(...), - confidence_threshold: float = Query(0.5, ge=0.0, le=1.0) -) -> InferenceResponse: - """Process invoice PDF and extract fields.""" - if not file.filename.endswith(".pdf"): - raise HTTPException(status_code=400, detail="Only PDF files accepted") - - result = await inference_service.process(file, confidence_threshold) - return InferenceResponse( - success=True, - document_id=result.document_id, - fields=result.fields, - confidence=result.confidence, - processing_time_ms=result.processing_time_ms - ) -``` - -### Input Validation with Pydantic - -```python -from pydantic import BaseModel, Field, field_validator -from datetime import date -import re - -class InvoiceData(BaseModel): - invoice_number: str = Field(..., min_length=1, max_length=50) - invoice_date: date - amount: float = Field(..., gt=0) - bankgiro: str | None = None - ocr_number: str | None = None - - @field_validator("bankgiro") - @classmethod - def validate_bankgiro(cls, v: str | None) -> str | None: - if v is None: - return None - # Bankgiro: 7-8 digits - cleaned = re.sub(r"[^0-9]", "", v) - if not (7 <= len(cleaned) <= 8): - raise ValueError("Bankgiro must be 7-8 digits") - return cleaned - - @field_validator("ocr_number") - @classmethod - def validate_ocr(cls, v: str | None) -> str | None: - if v is None: - return None - # OCR: 2-25 digits - cleaned = re.sub(r"[^0-9]", "", v) - if not (2 <= len(cleaned) <= 25): - raise ValueError("OCR must be 2-25 digits") - return cleaned -``` - -### Response Format - -```python -from pydantic import BaseModel -from typing import Generic, TypeVar - -T = TypeVar("T") - -class ApiResponse(BaseModel, Generic[T]): - success: bool - data: T | None = None - error: str | None = None - meta: dict | None = None - -# Success response -return ApiResponse( - success=True, - data=result, - meta={"processing_time_ms": elapsed_ms} -) - -# Error response -return ApiResponse( - success=False, - error="Invalid PDF format" -) +// Avoid multiple enumerations +var list = documents.ToList(); // Materialize once +var count = list.Count; +var first = list.FirstOrDefault(); ``` ## File Organization -### Project Structure - ``` src/ -├── cli/ # Command-line interfaces -│ ├── autolabel.py -│ ├── train.py -│ └── infer.py -├── pdf/ # PDF processing -│ ├── extractor.py -│ └── renderer.py -├── ocr/ # OCR processing -│ ├── paddle_ocr.py -│ └── machine_code_parser.py -├── inference/ # Inference pipeline -│ ├── pipeline.py -│ ├── yolo_detector.py -│ └── field_extractor.py -├── normalize/ # Field normalization -│ ├── base.py -│ ├── date_normalizer.py -│ └── amount_normalizer.py -├── web/ # FastAPI application -│ ├── app.py -│ ├── routes.py -│ ├── services.py -│ └── schemas.py -└── utils/ # Shared utilities - ├── validators.py - ├── text_cleaner.py - └── logging.py -tests/ # Mirror of src structure - ├── test_pdf/ - ├── test_ocr/ - └── test_inference/ + Domain/ # Entities, value objects + Application/ # Use cases, DTOs, interfaces + Infrastructure/ # EF Core, external services + Api/ # Controllers, middleware +tests/ + Unit/ + Integration/ ``` -### File Naming +**Guidelines:** +- Max 800 lines per file (typical 200-400) +- Max 50 lines per method +- One class per file (except nested) +- Group by feature, not by type -``` -src/ocr/paddle_ocr.py # snake_case for modules -src/inference/yolo_detector.py # snake_case for modules -tests/test_paddle_ocr.py # test_ prefix for tests -config.py # snake_case for config +## Code Smells + +```csharp +// BAD: Deep nesting +if (doc != null) + if (doc.IsValid) + if (doc.HasFields) + // ... + +// GOOD: Early returns +if (doc is null) return null; +if (!doc.IsValid) return null; +if (!doc.HasFields) return null; +// ... + +// BAD: Magic numbers +if (confidence > 0.5) { } + +// GOOD: Named constants +private const double ConfidenceThreshold = 0.5; +if (confidence > ConfidenceThreshold) { } ``` -### Module Size Guidelines +## Logging -- **Maximum**: 800 lines per file -- **Typical**: 200-400 lines per file -- **Functions**: Max 50 lines each -- Extract utilities when modules grow too large +```csharp +// Structured logging with templates +_logger.LogInformation("Processing document {DocumentId}", docId); +_logger.LogError(ex, "Failed to process {DocumentId}", docId); -## Comments & Documentation - -### When to Comment - -```python -# GOOD: Explain WHY, not WHAT -# Swedish Bankgiro uses Luhn algorithm with weight [1,2,1,2...] -def validate_bankgiro_checksum(bankgiro: str) -> bool: - ... - -# Payment line format: 7 groups separated by #, checksum at end -def parse_payment_line(line: str) -> PaymentLineData: - ... - -# BAD: Stating the obvious -# Increment counter by 1 -count += 1 - -# Set name to user's name -name = user.name +// Appropriate levels +LogDebug // Development details +LogInformation // Normal operations +LogWarning // Potential issues +LogError // Errors with exceptions ``` -### Docstrings for Public APIs +## Testing (AAA Pattern) -```python -def extract_invoice_fields( - pdf_path: Path, - confidence_threshold: float = 0.5, - use_gpu: bool = True -) -> InferenceResult: - """Extract structured fields from Swedish invoice PDF. +```csharp +[Fact] +public async Task GetById_WithValidId_ReturnsDocument() +{ + // Arrange + var repo = Substitute.For>(); + repo.GetByIdAsync(Arg.Any(), Arg.Any()) + .Returns(new Document("Test")); + var service = new DocumentService(repo); - Uses YOLOv11 for field detection and PaddleOCR for text extraction. - Applies field-specific normalization and validation. + // Act + var result = await service.GetAsync(Guid.NewGuid(), CancellationToken.None); - Args: - pdf_path: Path to the invoice PDF file. - confidence_threshold: Minimum confidence for field detection (0.0-1.0). - use_gpu: Whether to use GPU acceleration. - - Returns: - InferenceResult containing extracted fields and confidence scores. - - Raises: - FileNotFoundError: If PDF file doesn't exist. - ProcessingError: If OCR or detection fails. - - Example: - >>> result = extract_invoice_fields(Path("invoice.pdf")) - >>> print(result.fields["invoice_number"]) - "INV-2024-001" - """ - ... + // Assert + result.Should().NotBeNull(); + result!.Name.Should().Be("Test"); +} ``` -## Performance Best Practices +## Key Rules -### Caching - -```python -from functools import lru_cache -from cachetools import TTLCache - -# Static data: LRU cache -@lru_cache(maxsize=100) -def get_field_config(field_name: str) -> FieldConfig: - """Load field configuration (cached).""" - return load_config(field_name) - -# Dynamic data: TTL cache -_document_cache = TTLCache(maxsize=1000, ttl=300) # 5 minutes - -def get_document_cached(doc_id: str) -> Document | None: - if doc_id in _document_cache: - return _document_cache[doc_id] - - doc = repo.find_by_id(doc_id) - if doc: - _document_cache[doc_id] = doc - return doc -``` - -### Database Queries - -```python -# GOOD: Select only needed columns -cur.execute(""" - SELECT id, status, fields->>'invoice_number' - FROM documents - WHERE status = %s - LIMIT %s -""", ('processed', 10)) - -# BAD: Select everything -cur.execute("SELECT * FROM documents") - -# GOOD: Batch operations -cur.executemany( - "INSERT INTO labels (doc_id, field, value) VALUES (%s, %s, %s)", - [(doc_id, f, v) for f, v in fields.items()] -) - -# BAD: Individual inserts in loop -for field, value in fields.items(): - cur.execute("INSERT INTO labels ...", (doc_id, field, value)) -``` - -### Lazy Loading - -```python -class InferencePipeline: - def __init__(self, model_path: Path): - self.model_path = model_path - self._model: YOLO | None = None - self._ocr: PaddleOCR | None = None - - @property - def model(self) -> YOLO: - """Lazy load YOLO model.""" - if self._model is None: - self._model = YOLO(str(self.model_path)) - return self._model - - @property - def ocr(self) -> PaddleOCR: - """Lazy load PaddleOCR.""" - if self._ocr is None: - self._ocr = PaddleOCR(use_angle_cls=True, lang="latin") - return self._ocr -``` - -## Testing Standards - -### Test Structure (AAA Pattern) - -```python -def test_extract_bankgiro_valid(): - # Arrange - text = "Bankgiro: 123-4567" - - # Act - result = extract_bankgiro(text) - - # Assert - assert result == "1234567" - -def test_extract_bankgiro_invalid_returns_none(): - # Arrange - text = "No bankgiro here" - - # Act - result = extract_bankgiro(text) - - # Assert - assert result is None -``` - -### Test Naming - -```python -# GOOD: Descriptive test names -def test_parse_payment_line_extracts_all_fields(): ... -def test_parse_payment_line_handles_missing_checksum(): ... -def test_validate_ocr_returns_false_for_invalid_checksum(): ... - -# BAD: Vague test names -def test_parse(): ... -def test_works(): ... -def test_payment_line(): ... -``` - -### Fixtures - -```python -import pytest -from pathlib import Path - -@pytest.fixture -def sample_invoice_pdf(tmp_path: Path) -> Path: - """Create sample invoice PDF for testing.""" - pdf_path = tmp_path / "invoice.pdf" - # Create test PDF... - return pdf_path - -@pytest.fixture -def inference_pipeline(sample_model_path: Path) -> InferencePipeline: - """Create inference pipeline with test model.""" - return InferencePipeline(sample_model_path) - -def test_process_invoice(inference_pipeline, sample_invoice_pdf): - result = inference_pipeline.process(sample_invoice_pdf) - assert result.fields.get("invoice_number") is not None -``` - -## Code Smell Detection - -### 1. Long Functions - -```python -# BAD: Function > 50 lines -def process_document(): - # 100 lines of code... - -# GOOD: Split into smaller functions -def process_document(pdf_path: Path) -> InferenceResult: - image = render_pdf(pdf_path) - detections = detect_fields(image) - ocr_results = extract_text(image, detections) - fields = normalize_fields(ocr_results) - return build_result(fields) -``` - -### 2. Deep Nesting - -```python -# BAD: 5+ levels of nesting -if document: - if document.is_valid: - if document.has_fields: - if field in document.fields: - if document.fields[field]: - # Do something - -# GOOD: Early returns -if not document: - return None -if not document.is_valid: - return None -if not document.has_fields: - return None -if field not in document.fields: - return None -if not document.fields[field]: - return None - -# Do something -``` - -### 3. Magic Numbers - -```python -# BAD: Unexplained numbers -if confidence > 0.5: - ... -time.sleep(3) - -# GOOD: Named constants -CONFIDENCE_THRESHOLD = 0.5 -RETRY_DELAY_SECONDS = 3 - -if confidence > CONFIDENCE_THRESHOLD: - ... -time.sleep(RETRY_DELAY_SECONDS) -``` - -### 4. Mutable Default Arguments - -```python -# BAD: Mutable default argument -def process_fields(fields: list = []): # DANGEROUS! - fields.append("new_field") - return fields - -# GOOD: Use None as default -def process_fields(fields: list | None = None) -> list: - if fields is None: - fields = [] - return [*fields, "new_field"] -``` - -## Logging Standards - -```python -import logging - -# Module-level logger -logger = logging.getLogger(__name__) - -# GOOD: Appropriate log levels -logger.debug("Processing document: %s", doc_id) -logger.info("Document processed successfully: %s", doc_id) -logger.warning("Low confidence score: %.2f", confidence) -logger.error("Failed to process document: %s", error) - -# GOOD: Structured logging with extra data -logger.info( - "Inference complete", - extra={ - "document_id": doc_id, - "field_count": len(fields), - "processing_time_ms": elapsed_ms - } -) - -# BAD: Using print() -print(f"Processing {doc_id}") # Never in production! -``` - -**Remember**: Code quality is not negotiable. Clear, maintainable Python code with proper type hints enables confident development and refactoring. +- Always use `CancellationToken` for async methods +- Prefer `records` for DTOs and immutable data +- Use `IReadOnlyList` for return types +- Never use `async void` (except event handlers) +- Always handle `null` with pattern matching or null operators +- Use structured logging, never `Console.WriteLine`