Fix the skill

This commit is contained in:
Yaojia Wang
2026-02-04 23:30:06 +01:00
parent fa996683c3
commit 15533285c6
4 changed files with 788 additions and 1730 deletions

View File

@@ -1,314 +1,274 @@
# Backend Development Patterns
# .NET Development Best Practices
Backend architecture patterns for Python/FastAPI/PostgreSQL applications.
## API Design
### RESTful Structure
## Project Structure
```
GET /api/v1/documents # List
GET /api/v1/documents/{id} # Get
POST /api/v1/documents # Create
PUT /api/v1/documents/{id} # Replace
PATCH /api/v1/documents/{id} # Update
DELETE /api/v1/documents/{id} # Delete
GET /api/v1/documents?status=processed&sort=created_at&limit=20&offset=0
src/
Domain/ # Entities, value objects, domain events
Application/ # Use cases, DTOs, interfaces
Infrastructure/ # EF Core, external services
Api/ # Controllers, middleware, filters
tests/
Unit/
Integration/
```
### FastAPI Route Pattern
## Code Style
```python
from fastapi import APIRouter, HTTPException, Depends, Query, File, UploadFile
from pydantic import BaseModel
```csharp
// Use records for DTOs and value objects
public sealed record CreateDocumentRequest(string Name, string Type);
router = APIRouter(prefix="/api/v1", tags=["inference"])
// Use primary constructors
public class DocumentService(IRepository<Document> repo, ILogger<DocumentService> logger)
{
public async Task<Document?> GetAsync(Guid id, CancellationToken ct) =>
await repo.GetByIdAsync(id, ct);
}
@router.post("/infer", response_model=ApiResponse[InferenceResult])
async def infer_document(
file: UploadFile = File(...),
confidence_threshold: float = Query(0.5, ge=0, le=1),
service: InferenceService = Depends(get_inference_service)
) -> ApiResponse[InferenceResult]:
result = await service.process(file, confidence_threshold)
return ApiResponse(success=True, data=result)
// Prefer expression body for simple methods
public Document? FindById(Guid id) => _documents.FirstOrDefault(d => d.Id == id);
// Use collection expressions
int[] numbers = [1, 2, 3];
List<string> names = ["Alice", "Bob"];
```
### Consistent Response Schema
## Async/Await
```python
from typing import Generic, TypeVar
T = TypeVar('T')
```csharp
// Always pass CancellationToken
public async Task<Document> GetAsync(Guid id, CancellationToken ct)
class ApiResponse(BaseModel, Generic[T]):
success: bool
data: T | None = None
error: str | None = None
meta: dict | None = None
// Use ConfigureAwait(false) in libraries
await _httpClient.GetAsync(url, ct).ConfigureAwait(false);
// Avoid async void (except event handlers)
public async Task ProcessAsync() { } // Good
public async void Process() { } // Bad
// Use ValueTask for hot paths with frequent sync completion
public ValueTask<int> GetCachedCountAsync()
```
## Core Patterns
## Dependency Injection
### Repository Pattern
```csharp
// Register by interface
builder.Services.AddScoped<IDocumentService, DocumentService>();
```python
from typing import Protocol
// Use Options pattern for configuration
builder.Services.Configure<AppSettings>(builder.Configuration.GetSection("App"));
class DocumentRepository(Protocol):
def find_all(self, filters: dict | None = None) -> list[Document]: ...
def find_by_id(self, id: str) -> Document | None: ...
def create(self, data: dict) -> Document: ...
def update(self, id: str, data: dict) -> Document: ...
def delete(self, id: str) -> None: ...
public class MyService(IOptions<AppSettings> options)
{
private readonly AppSettings _settings = options.Value;
}
// Avoid service locator pattern
// Bad: var service = serviceProvider.GetService<IMyService>();
// Good: Constructor injection
```
### Service Layer
## Entity Framework Core
```python
class InferenceService:
def __init__(self, model_path: str, use_gpu: bool = True):
self.pipeline = InferencePipeline(model_path=model_path, use_gpu=use_gpu)
```csharp
// Always use AsNoTracking for read-only queries
await _context.Documents.AsNoTracking().ToListAsync(ct);
async def process(self, file: UploadFile, confidence_threshold: float) -> InferenceResult:
temp_path = self._save_temp_file(file)
try:
return self.pipeline.process_pdf(temp_path)
finally:
temp_path.unlink(missing_ok=True)
```
// Use projection to select only needed fields
await _context.Documents
.Where(d => d.Status == "Active")
.Select(d => new DocumentDto(d.Id, d.Name))
.ToListAsync(ct);
### Dependency Injection
// Prevent N+1 with Include or projection
await _context.Documents.Include(d => d.Labels).ToListAsync(ct);
```python
from functools import lru_cache
from pydantic_settings import BaseSettings
// Use explicit transactions for multiple operations
await using var tx = await _context.Database.BeginTransactionAsync(ct);
class Settings(BaseSettings):
db_host: str = "localhost"
db_password: str
model_path: str = "runs/train/invoice_fields/weights/best.pt"
class Config:
env_file = ".env"
@lru_cache()
def get_settings() -> Settings:
return Settings()
def get_inference_service(settings: Settings = Depends(get_settings)) -> InferenceService:
return InferenceService(model_path=settings.model_path)
```
## Database Patterns
### Connection Pooling
```python
from psycopg2 import pool
from contextlib import contextmanager
db_pool = pool.ThreadedConnectionPool(minconn=2, maxconn=10, **db_config)
@contextmanager
def get_db_connection():
conn = db_pool.getconn()
try:
yield conn
finally:
db_pool.putconn(conn)
```
### Query Optimization
```python
# GOOD: Select only needed columns
cur.execute("""
SELECT id, status, fields->>'InvoiceNumber' as invoice_number
FROM documents WHERE status = %s
ORDER BY created_at DESC LIMIT %s
""", ('processed', 10))
# BAD: SELECT * FROM documents
```
### N+1 Prevention
```python
# BAD: N+1 queries
for doc in documents:
doc.labels = get_labels(doc.id) # N queries
# GOOD: Batch fetch with JOIN
cur.execute("""
SELECT d.id, d.status, array_agg(l.label) as labels
FROM documents d
LEFT JOIN document_labels l ON d.id = l.document_id
GROUP BY d.id, d.status
""")
```
### Transaction Pattern
```python
def create_document_with_labels(doc_data: dict, labels: list[dict]) -> str:
with get_db_connection() as conn:
try:
with conn.cursor() as cur:
cur.execute("INSERT INTO documents ... RETURNING id", ...)
doc_id = cur.fetchone()[0]
for label in labels:
cur.execute("INSERT INTO document_labels ...", ...)
conn.commit()
return doc_id
except Exception:
conn.rollback()
raise
```
## Caching
```python
from cachetools import TTLCache
_cache = TTLCache(maxsize=1000, ttl=300)
def get_document_cached(doc_id: str) -> Document | None:
if doc_id in _cache:
return _cache[doc_id]
doc = repo.find_by_id(doc_id)
if doc:
_cache[doc_id] = doc
return doc
// Configure entities with IEntityTypeConfiguration
public class DocumentConfiguration : IEntityTypeConfiguration<Document>
{
public void Configure(EntityTypeBuilder<Document> builder)
{
builder.HasKey(d => d.Id);
builder.Property(d => d.Name).HasMaxLength(200).IsRequired();
builder.HasIndex(d => d.Status);
}
}
```
## Error Handling
### Exception Hierarchy
```csharp
// Create domain-specific exceptions
public class NotFoundException(string resource, Guid id)
: Exception($"{resource} not found: {id}");
```python
class AppError(Exception):
def __init__(self, message: str, status_code: int = 500):
self.message = message
self.status_code = status_code
// Use global exception handler
public class GlobalExceptionHandler(ILogger<GlobalExceptionHandler> logger) : IExceptionHandler
{
public async ValueTask<bool> TryHandleAsync(HttpContext ctx, Exception ex, CancellationToken ct)
{
logger.LogError(ex, "Error: {Message}", ex.Message);
ctx.Response.StatusCode = ex is NotFoundException ? 404 : 500;
await ctx.Response.WriteAsJsonAsync(new { error = ex.Message }, ct);
return true;
}
}
class NotFoundError(AppError):
def __init__(self, resource: str, id: str):
super().__init__(f"{resource} not found: {id}", 404)
class ValidationError(AppError):
def __init__(self, message: str):
super().__init__(message, 400)
// Use Result pattern for expected failures
public Result<Document> Validate(CreateRequest request) =>
string.IsNullOrEmpty(request.Name)
? Result<Document>.Fail("Name is required")
: Result<Document>.Ok(new Document(request.Name));
```
### FastAPI Exception Handler
## Validation
```python
@app.exception_handler(AppError)
async def app_error_handler(request: Request, exc: AppError):
return JSONResponse(status_code=exc.status_code, content={"success": False, "error": exc.message})
```csharp
// Use FluentValidation
public class CreateDocumentValidator : AbstractValidator<CreateDocumentRequest>
{
public CreateDocumentValidator()
{
RuleFor(x => x.Name).NotEmpty().MaximumLength(200);
RuleFor(x => x.Type).Must(BeValidType).WithMessage("Invalid document type");
}
}
@app.exception_handler(Exception)
async def generic_error_handler(request: Request, exc: Exception):
logger.error(f"Unexpected error: {exc}", exc_info=True)
return JSONResponse(status_code=500, content={"success": False, "error": "Internal server error"})
// Or use Data Annotations for simple cases
public record CreateRequest(
[Required, MaxLength(200)] string Name,
[Range(1, 100)] int Quantity);
```
### Retry with Backoff
## Logging
```python
async def retry_with_backoff(fn, max_retries: int = 3, base_delay: float = 1.0):
last_error = None
for attempt in range(max_retries):
try:
return await fn() if asyncio.iscoroutinefunction(fn) else fn()
except Exception as e:
last_error = e
if attempt < max_retries - 1:
await asyncio.sleep(base_delay * (2 ** attempt))
raise last_error
```csharp
// Use structured logging with templates
logger.LogInformation("Processing document {DocumentId} for user {UserId}", docId, userId);
// Use appropriate log levels
logger.LogDebug("Cache hit for key {Key}", key); // Development details
logger.LogInformation("Document {Id} created", id); // Normal operations
logger.LogWarning("Retry attempt {Attempt} for {Op}", n, op); // Potential issues
logger.LogError(ex, "Failed to process {DocumentId}", id); // Errors
// Configure log filtering in appsettings
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning",
"Microsoft.EntityFrameworkCore": "Warning"
}
}
}
```
## Rate Limiting
## API Design
```python
from time import time
from collections import defaultdict
```csharp
[ApiController]
[Route("api/v1/[controller]")]
public class DocumentsController(IDocumentService service) : ControllerBase
{
[HttpGet("{id:guid}")]
[ProducesResponseType<Document>(200)]
[ProducesResponseType(404)]
public async Task<IActionResult> Get(Guid id, CancellationToken ct)
{
var doc = await service.GetAsync(id, ct);
return doc is null ? NotFound() : Ok(doc);
}
class RateLimiter:
def __init__(self):
self.requests: dict[str, list[float]] = defaultdict(list)
def check_limit(self, identifier: str, max_requests: int, window_sec: int) -> bool:
now = time()
self.requests[identifier] = [t for t in self.requests[identifier] if now - t < window_sec]
if len(self.requests[identifier]) >= max_requests:
return False
self.requests[identifier].append(now)
return True
limiter = RateLimiter()
@app.middleware("http")
async def rate_limit_middleware(request: Request, call_next):
ip = request.client.host
if not limiter.check_limit(ip, max_requests=100, window_sec=60):
return JSONResponse(status_code=429, content={"error": "Rate limit exceeded"})
return await call_next(request)
[HttpPost]
public async Task<IActionResult> Create(CreateRequest request, CancellationToken ct)
{
var doc = await service.CreateAsync(request, ct);
return CreatedAtAction(nameof(Get), new { id = doc.Id }, doc);
}
}
```
## Logging & Middleware
## Testing
### Request Logging
```csharp
// Use descriptive test names
[Fact]
public async Task GetById_WithValidId_ReturnsDocument()
{
// Arrange
var repo = Substitute.For<IRepository<Document>>();
repo.GetByIdAsync(Arg.Any<Guid>(), Arg.Any<CancellationToken>())
.Returns(new Document("Test"));
var service = new DocumentService(repo);
```python
@app.middleware("http")
async def log_requests(request: Request, call_next):
request_id = str(uuid.uuid4())[:8]
start_time = time.time()
logger.info(f"[{request_id}] {request.method} {request.url.path}")
response = await call_next(request)
duration_ms = (time.time() - start_time) * 1000
logger.info(f"[{request_id}] Completed {response.status_code} in {duration_ms:.2f}ms")
return response
// Act
var result = await service.GetAsync(Guid.NewGuid(), CancellationToken.None);
// Assert
result.Should().NotBeNull();
result!.Name.Should().Be("Test");
}
// Use WebApplicationFactory for integration tests
public class ApiTests(WebApplicationFactory<Program> factory) : IClassFixture<WebApplicationFactory<Program>>
{
[Fact]
public async Task GetDocuments_ReturnsSuccess()
{
var client = factory.CreateClient();
var response = await client.GetAsync("/api/v1/documents");
response.StatusCode.Should().Be(HttpStatusCode.OK);
}
}
```
### Structured Logging
## Performance
```python
class JSONFormatter(logging.Formatter):
def format(self, record):
return json.dumps({
"timestamp": datetime.utcnow().isoformat(),
"level": record.levelname,
"message": record.getMessage(),
"module": record.module,
})
```csharp
// Use IMemoryCache for frequently accessed data
public class CachedService(IMemoryCache cache, IRepository<Document> repo)
{
public async Task<Document?> GetAsync(Guid id, CancellationToken ct) =>
await cache.GetOrCreateAsync($"doc:{id}", async entry =>
{
entry.AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(5);
return await repo.GetByIdAsync(id, ct);
});
}
// Use pagination for large collections
public async Task<PagedResult<Document>> GetPagedAsync(int page, int size, CancellationToken ct) =>
new(
await _context.Documents.Skip((page - 1) * size).Take(size).ToListAsync(ct),
await _context.Documents.CountAsync(ct)
);
// Use IAsyncEnumerable for streaming large datasets
public async IAsyncEnumerable<Document> StreamAllAsync([EnumeratorCancellation] CancellationToken ct)
{
await foreach (var doc in _context.Documents.AsAsyncEnumerable().WithCancellation(ct))
yield return doc;
}
```
## Background Tasks
## Security
```python
from fastapi import BackgroundTasks
```csharp
// Never hardcode secrets
var apiKey = builder.Configuration["ApiKey"]; // From environment/secrets
def send_notification(document_id: str, status: str):
logger.info(f"Notification: {document_id} -> {status}")
// Use parameterized queries (EF Core does this automatically)
// Bad: $"SELECT * FROM Users WHERE Id = {id}"
// Good: _context.Users.Where(u => u.Id == id)
@router.post("/infer")
async def infer(file: UploadFile, background_tasks: BackgroundTasks):
result = await process_document(file)
background_tasks.add_task(send_notification, result.document_id, "completed")
return result
// Validate and sanitize all inputs
// Use HTTPS in production
// Implement rate limiting
builder.Services.AddRateLimiter(options => { ... });
```
## Key Principles
- Repository pattern: Abstract data access
- Service layer: Business logic separated from routes
- Dependency injection via `Depends()`
- Connection pooling for database
- Parameterized queries only (no f-strings in SQL)
- Batch fetch to prevent N+1
- Consistent `ApiResponse[T]` format
- Exception hierarchy with proper status codes
- Rate limit by IP
- Structured logging with request ID