using System.Net; using System.Text; using System.Text.Json; using FiscalFlow.Core.Interfaces; using FiscalFlow.Infrastructure.Services; using FluentAssertions; using Microsoft.Extensions.Logging; using Moq; using Xunit; namespace FiscalFlow.UnitTests.Services; public sealed class OcrServiceTests { private readonly Mock> _loggerMock; public OcrServiceTests() { _loggerMock = new Mock>(); } [Fact] public async Task ExtractAsync_SuccessfulExtraction_ReturnsCompleteOcrResult() { // Arrange var apiResponse = new { status = "success", message = "Processed document abc123", result = new { document_id = "abc123", success = true, document_type = "invoice", fields = new Dictionary { ["InvoiceNumber"] = "INV-001", ["InvoiceDate"] = "2024-01-15", ["InvoiceDueDate"] = "2024-02-15", ["OCR"] = "123456789", ["Bankgiro"] = "1234-5678", ["Plusgiro"] = null, ["Amount"] = "12500.00", ["SupplierName"] = "Test AB", ["supplier_org_number"] = "556677-8899", ["customer_number"] = "C-001", ["payment_line"] = "# 123456789 # 12500 00 #", ["Currency"] = "SEK", }, confidence = new Dictionary { ["InvoiceNumber"] = 0.95m, ["InvoiceDate"] = 0.88m, ["Amount"] = 0.92m, }, detections = new List(), processing_time_ms = 1234.5, errors = new List(), vat_summary = new { breakdowns = new[] { new { rate = 25.0m, vat_amount = "2500.00", base_amount = "10000.00", source = "regex", }, }, total_excl_vat = "10000.00", total_vat = "2500.00", total_incl_vat = "12500.00", confidence = 0.85m, }, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Should().NotBeNull(); result.Success.Should().BeTrue(); result.Confidence.Should().Be(0.9167m); result.ProcessingTimeMs.Should().Be(1234.5); result.DocumentType.Should().Be("invoice"); result.FieldConfidences.Should().ContainKey("InvoiceNumber").WhoseValue.Should().Be(0.95m); result.FieldConfidences.Should().ContainKey("InvoiceDate").WhoseValue.Should().Be(0.88m); result.FieldConfidences.Should().ContainKey("Amount").WhoseValue.Should().Be(0.92m); var data = result.Data; data.Should().NotBeNull(); data!.InvoiceNumber.Should().Be("INV-001"); data.InvoiceDate.Should().Be(new DateTime(2024, 1, 15)); data.DueDate.Should().Be(new DateTime(2024, 2, 15)); data.OcrNumber.Should().Be("123456789"); data.Bankgiro.Should().Be("1234-5678"); data.Plusgiro.Should().BeNull(); data.AmountTotal.Should().Be(12500.00m); data.SupplierName.Should().Be("Test AB"); data.SupplierOrgNumber.Should().Be("556677-8899"); data.CustomerNumber.Should().Be("C-001"); data.PaymentLine.Should().Be("# 123456789 # 12500 00 #"); data.Currency.Should().Be("SEK"); data.AmountVat.Should().Be(2500.00m); data.VatRate.Should().Be(25); } [Fact] public async Task ExtractAsync_FieldMapping_MapsAllFieldsCorrectly() { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary { ["InvoiceNumber"] = "TEST-123", ["InvoiceDate"] = "2024-03-20", ["InvoiceDueDate"] = "2024-04-20", ["OCR"] = "987654321", ["Bankgiro"] = "9999-8888", ["Plusgiro"] = "123456-7", ["Amount"] = "5000.00", ["supplier_org_number"] = "111222-3333", ["customer_number"] = "CUST-999", ["payment_line"] = "Payment reference line", }, confidence = new Dictionary(), processing_time_ms = 500.0, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert var data = result.Data; data.Should().NotBeNull(); data!.InvoiceNumber.Should().Be("TEST-123"); data.InvoiceDate.Should().Be(new DateTime(2024, 3, 20)); data.DueDate.Should().Be(new DateTime(2024, 4, 20)); data.OcrNumber.Should().Be("987654321"); data.Bankgiro.Should().Be("9999-8888"); data.Plusgiro.Should().Be("123456-7"); data.AmountTotal.Should().Be(5000.00m); data.SupplierOrgNumber.Should().Be("111222-3333"); data.CustomerNumber.Should().Be("CUST-999"); data.PaymentLine.Should().Be("Payment reference line"); } [Fact] public async Task ExtractAsync_VatSummaryParsing_PopulatesVatFieldsFromTotalVat() { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary(), confidence = new Dictionary(), processing_time_ms = 100.0, vat_summary = new { total_vat = "1250.50", breakdowns = new[] { new { rate = 25.0m, vat_amount = "1250.50", base_amount = "5000.00", source = "table", }, }, }, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert var data = result.Data; data.Should().NotBeNull(); data!.AmountVat.Should().Be(1250.50m); data.VatRate.Should().Be(25); } [Fact] public async Task ExtractAsync_VatSummaryWithoutTotalVat_SumsFromBreakdowns() { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary(), confidence = new Dictionary(), processing_time_ms = 100.0, vat_summary = new { breakdowns = new[] { new { rate = 25.0m, vat_amount = "1000.00", base_amount = "4000.00", source = "table", }, new { rate = 12.0m, vat_amount = "240.00", base_amount = "2000.00", source = "table", }, }, }, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert var data = result.Data; data.Should().NotBeNull(); data!.AmountVat.Should().Be(1240.00m); data.VatRate.Should().Be(25); } [Fact] public async Task ExtractAsync_ConfidenceCalculation_AveragesFieldConfidencesAndRoundsToFourDecimals() { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary(), confidence = new Dictionary { ["Field1"] = 0.123456789m, ["Field2"] = 0.987654321m, ["Field3"] = 0.555555555m, }, processing_time_ms = 100.0, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Confidence.Should().Be(0.5556m); } [Fact] public async Task ExtractAsync_NoConfidenceScores_ReturnsZeroConfidence() { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary(), confidence = new Dictionary(), processing_time_ms = 100.0, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Confidence.Should().Be(0m); } [Fact] public async Task ExtractAsync_HttpError500_ReturnsFailureWithErrorMessage() { // Arrange var httpClient = CreateHttpClient(HttpStatusCode.InternalServerError, "Internal server error"); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Should().NotBeNull(); result.Success.Should().BeFalse(); result.ErrorMessage.Should().Be("OCR API returned InternalServerError"); } [Fact] public async Task ExtractAsync_NullResponse_ReturnsFailure() { // Arrange var httpClient = CreateHttpClient(HttpStatusCode.OK, "{}"); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Should().NotBeNull(); result.Success.Should().BeFalse(); result.ErrorMessage.Should().NotBeNullOrEmpty(); } [Fact] public async Task ExtractAsync_NullResult_ReturnsFailureWithMessage() { // Arrange var apiResponse = new { status = "error", message = "Processing failed", result = (object?)null, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Should().NotBeNull(); result.Success.Should().BeFalse(); result.ErrorMessage.Should().Be("Processing failed"); } [Fact] public async Task ExtractAsync_ErrorStatus_ReturnsFailureWithMessage() { // Arrange var apiResponse = new { status = "error", message = "Document format not supported", result = new { document_id = "test", success = false, fields = new Dictionary(), confidence = new Dictionary(), processing_time_ms = 10.0, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Should().NotBeNull(); result.Success.Should().BeFalse(); result.ErrorMessage.Should().Be("Document format not supported"); } [Fact] public async Task ExtractAsync_PartialStatus_TreatedAsSuccess() { // Arrange var apiResponse = new { status = "partial", message = "Some fields not detected", result = new { document_id = "test", success = true, fields = new Dictionary { ["InvoiceNumber"] = "PARTIAL-001", }, confidence = new Dictionary { ["InvoiceNumber"] = 0.75m, }, processing_time_ms = 200.0, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Should().NotBeNull(); result.Success.Should().BeTrue(); result.Data.Should().NotBeNull(); result.Data!.InvoiceNumber.Should().Be("PARTIAL-001"); } [Fact] public async Task ExtractAsync_TaskCanceledException_NotUserCancellation_ReturnsTimeoutError() { // Arrange var handler = new TimeoutHttpMessageHandler(); var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost") }; var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Should().NotBeNull(); result.Success.Should().BeFalse(); result.ErrorMessage.Should().Be("OCR API request timed out"); } [Fact] public async Task ExtractAsync_HttpRequestException_ReturnsConnectionError() { // Arrange var handler = new ConnectionErrorHttpMessageHandler(); var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost") }; var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Should().NotBeNull(); result.Success.Should().BeFalse(); result.ErrorMessage.Should().Contain("OCR API connection error"); } [Theory] [InlineData("1234.56", 1234.56)] [InlineData("1 234,56", 1234.56)] [InlineData("1,234.56", 1234.56)] [InlineData("1234,56", 1234.56)] [InlineData("12345", 12345)] [InlineData("0.99", 0.99)] public async Task ExtractAsync_DecimalParsing_HandlesVariousFormats(string input, decimal expected) { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary { ["Amount"] = input, }, confidence = new Dictionary(), processing_time_ms = 100.0, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Data.Should().NotBeNull(); result.Data!.AmountTotal.Should().Be(expected); } [Theory] [InlineData("2024-01-15", 2024, 1, 15)] [InlineData("15/01/2024", 2024, 1, 15)] [InlineData("15.01.2024", 2024, 1, 15)] [InlineData("2024/01/15", 2024, 1, 15)] public async Task ExtractAsync_DateParsing_HandlesVariousFormats(string input, int year, int month, int day) { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary { ["InvoiceDate"] = input, }, confidence = new Dictionary(), processing_time_ms = 100.0, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Data.Should().NotBeNull(); result.Data!.InvoiceDate.Should().Be(new DateTime(year, month, day)); } [Theory] [InlineData("test.pdf")] [InlineData("invoice.PDF")] [InlineData("scan.png")] [InlineData("photo.PNG")] [InlineData("image.jpg")] [InlineData("photo.jpeg")] [InlineData("document.JPG")] [InlineData("unknown.txt")] public async Task ExtractAsync_DifferentFileTypes_ProcessesSuccessfully(string fileName) { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary(), confidence = new Dictionary(), processing_time_ms = 100.0, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, fileName, CancellationToken.None); // Assert result.Should().NotBeNull(); result.Success.Should().BeTrue(); } [Fact] public async Task ExtractAsync_ErrorsInResult_IncludedInErrorMessage() { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary(), confidence = new Dictionary(), processing_time_ms = 100.0, errors = new List { "Low quality image", "Missing page 2" }, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Should().NotBeNull(); result.ErrorMessage.Should().Be("Low quality image; Missing page 2"); } [Fact] public async Task ExtractAsync_DefaultCurrency_SetToSEK() { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary(), confidence = new Dictionary(), processing_time_ms = 100.0, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Data.Should().NotBeNull(); result.Data!.Currency.Should().Be("SEK"); } [Fact] public async Task ExtractAsync_CustomCurrency_OverridesDefault() { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary { ["Currency"] = "EUR", }, confidence = new Dictionary(), processing_time_ms = 100.0, }, }; var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse); var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(); // Act var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert result.Data.Should().NotBeNull(); result.Data!.Currency.Should().Be("EUR"); } [Fact] public async Task ExtractAsync_MultipartFormData_IncludesFileAndExtractLineItemsParameter() { // Arrange var apiResponse = new { status = "success", message = "OK", result = new { document_id = "test", success = true, fields = new Dictionary(), confidence = new Dictionary(), processing_time_ms = 100.0, }, }; var captureHandler = new CaptureHttpMessageHandler(HttpStatusCode.OK, apiResponse); var httpClient = new HttpClient(captureHandler) { BaseAddress = new Uri("http://localhost") }; var service = new OcrService(httpClient, _loggerMock.Object); using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test file content")); // Act await service.ExtractAsync(stream, "test.pdf", CancellationToken.None); // Assert captureHandler.CapturedRequest.Should().NotBeNull(); captureHandler.CapturedRequest!.RequestUri.Should().NotBeNull(); captureHandler.CapturedRequest.RequestUri!.ToString().Should().EndWith("infer"); captureHandler.CapturedRequest.Content.Should().NotBeNull(); captureHandler.CapturedRequest.Content.Should().BeOfType(); } private static HttpClient CreateHttpClient(HttpStatusCode statusCode, object response) { var json = JsonSerializer.Serialize(response); var handler = new MockHttpMessageHandler(statusCode, json); return new HttpClient(handler) { BaseAddress = new Uri("http://localhost") }; } private static HttpClient CreateHttpClient(HttpStatusCode statusCode, string responseBody) { var handler = new MockHttpMessageHandler(statusCode, responseBody); return new HttpClient(handler) { BaseAddress = new Uri("http://localhost") }; } private sealed class MockHttpMessageHandler : HttpMessageHandler { private readonly HttpStatusCode _statusCode; private readonly string _responseBody; public MockHttpMessageHandler(HttpStatusCode statusCode, string responseBody) { _statusCode = statusCode; _responseBody = responseBody; } protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) { var response = new HttpResponseMessage(_statusCode) { Content = new StringContent(_responseBody, Encoding.UTF8, "application/json"), }; return Task.FromResult(response); } } private sealed class TimeoutHttpMessageHandler : HttpMessageHandler { protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) { throw new TaskCanceledException("Request timed out"); } } private sealed class ConnectionErrorHttpMessageHandler : HttpMessageHandler { protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) { throw new HttpRequestException("Unable to connect to server"); } } private sealed class CaptureHttpMessageHandler : HttpMessageHandler { private readonly HttpStatusCode _statusCode; private readonly string _responseBody; public HttpRequestMessage? CapturedRequest { get; private set; } public CaptureHttpMessageHandler(HttpStatusCode statusCode, object response) { _statusCode = statusCode; _responseBody = JsonSerializer.Serialize(response); } protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) { CapturedRequest = request; var response = new HttpResponseMessage(_statusCode) { Content = new StringContent(_responseBody, Encoding.UTF8, "application/json"), }; return Task.FromResult(response); } } }