feat: integrate invoice-master-poc-v2 inference API
Rewrite OcrService to match the actual inference API response format (nested status/result structure with PascalCase/snake_case field names). Register IOcrService in DI with typed HttpClient and Polly v8 resilience (retry, timeout, circuit breaker via AddStandardResilienceHandler). Key changes: - Fix response model to match real API (InferenceApiResponse) - Map correct field names (InvoiceNumber, InvoiceDueDate, OCR, Amount, etc.) - Add extract_line_items=true for VAT summary extraction - Copy stream before sending to avoid disposal conflicts with retries - Add JsonException handling for malformed responses - Remove sensitive data from error logs - Add 35 unit tests covering field mapping, VAT parsing, error handling, decimal/date formats, and content type detection
This commit is contained in:
@@ -5,6 +5,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration" Version="10.0.3" />
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
|
||||
<PackageReference Include="xunit" Version="2.9.2" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" Version="3.0.0">
|
||||
@@ -22,6 +23,7 @@
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\src\FiscalFlow.Application\FiscalFlow.Application.csproj" />
|
||||
<ProjectReference Include="..\..\src\FiscalFlow.Core\FiscalFlow.Core.csproj" />
|
||||
<ProjectReference Include="..\..\src\FiscalFlow.Infrastructure\FiscalFlow.Infrastructure.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
806
backend/tests/FiscalFlow.UnitTests/Services/OcrServiceTests.cs
Normal file
806
backend/tests/FiscalFlow.UnitTests/Services/OcrServiceTests.cs
Normal file
@@ -0,0 +1,806 @@
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using FiscalFlow.Core.Interfaces;
|
||||
using FiscalFlow.Infrastructure.Services;
|
||||
using FluentAssertions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Moq;
|
||||
using Xunit;
|
||||
|
||||
namespace FiscalFlow.UnitTests.Services;
|
||||
|
||||
public sealed class OcrServiceTests
|
||||
{
|
||||
private readonly Mock<ILogger<OcrService>> _loggerMock;
|
||||
|
||||
public OcrServiceTests()
|
||||
{
|
||||
_loggerMock = new Mock<ILogger<OcrService>>();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_SuccessfulExtraction_ReturnsCompleteOcrResult()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "Processed document abc123",
|
||||
result = new
|
||||
{
|
||||
document_id = "abc123",
|
||||
success = true,
|
||||
document_type = "invoice",
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["InvoiceNumber"] = "INV-001",
|
||||
["InvoiceDate"] = "2024-01-15",
|
||||
["InvoiceDueDate"] = "2024-02-15",
|
||||
["OCR"] = "123456789",
|
||||
["Bankgiro"] = "1234-5678",
|
||||
["Plusgiro"] = null,
|
||||
["Amount"] = "12500.00",
|
||||
["SupplierName"] = "Test AB",
|
||||
["supplier_org_number"] = "556677-8899",
|
||||
["customer_number"] = "C-001",
|
||||
["payment_line"] = "# 123456789 # 12500 00 #",
|
||||
["Currency"] = "SEK",
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>
|
||||
{
|
||||
["InvoiceNumber"] = 0.95m,
|
||||
["InvoiceDate"] = 0.88m,
|
||||
["Amount"] = 0.92m,
|
||||
},
|
||||
detections = new List<object>(),
|
||||
processing_time_ms = 1234.5,
|
||||
errors = new List<string>(),
|
||||
vat_summary = new
|
||||
{
|
||||
breakdowns = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
rate = 25.0m,
|
||||
vat_amount = "2500.00",
|
||||
base_amount = "10000.00",
|
||||
source = "regex",
|
||||
},
|
||||
},
|
||||
total_excl_vat = "10000.00",
|
||||
total_vat = "2500.00",
|
||||
total_incl_vat = "12500.00",
|
||||
confidence = 0.85m,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeTrue();
|
||||
result.Confidence.Should().Be(0.9167m);
|
||||
result.ProcessingTimeMs.Should().Be(1234.5);
|
||||
result.DocumentType.Should().Be("invoice");
|
||||
result.FieldConfidences.Should().ContainKey("InvoiceNumber").WhoseValue.Should().Be(0.95m);
|
||||
result.FieldConfidences.Should().ContainKey("InvoiceDate").WhoseValue.Should().Be(0.88m);
|
||||
result.FieldConfidences.Should().ContainKey("Amount").WhoseValue.Should().Be(0.92m);
|
||||
|
||||
var data = result.Data;
|
||||
data.Should().NotBeNull();
|
||||
data!.InvoiceNumber.Should().Be("INV-001");
|
||||
data.InvoiceDate.Should().Be(new DateTime(2024, 1, 15));
|
||||
data.DueDate.Should().Be(new DateTime(2024, 2, 15));
|
||||
data.OcrNumber.Should().Be("123456789");
|
||||
data.Bankgiro.Should().Be("1234-5678");
|
||||
data.Plusgiro.Should().BeNull();
|
||||
data.AmountTotal.Should().Be(12500.00m);
|
||||
data.SupplierName.Should().Be("Test AB");
|
||||
data.SupplierOrgNumber.Should().Be("556677-8899");
|
||||
data.CustomerNumber.Should().Be("C-001");
|
||||
data.PaymentLine.Should().Be("# 123456789 # 12500 00 #");
|
||||
data.Currency.Should().Be("SEK");
|
||||
data.AmountVat.Should().Be(2500.00m);
|
||||
data.VatRate.Should().Be(25);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_FieldMapping_MapsAllFieldsCorrectly()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["InvoiceNumber"] = "TEST-123",
|
||||
["InvoiceDate"] = "2024-03-20",
|
||||
["InvoiceDueDate"] = "2024-04-20",
|
||||
["OCR"] = "987654321",
|
||||
["Bankgiro"] = "9999-8888",
|
||||
["Plusgiro"] = "123456-7",
|
||||
["Amount"] = "5000.00",
|
||||
["supplier_org_number"] = "111222-3333",
|
||||
["customer_number"] = "CUST-999",
|
||||
["payment_line"] = "Payment reference line",
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 500.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
var data = result.Data;
|
||||
data.Should().NotBeNull();
|
||||
data!.InvoiceNumber.Should().Be("TEST-123");
|
||||
data.InvoiceDate.Should().Be(new DateTime(2024, 3, 20));
|
||||
data.DueDate.Should().Be(new DateTime(2024, 4, 20));
|
||||
data.OcrNumber.Should().Be("987654321");
|
||||
data.Bankgiro.Should().Be("9999-8888");
|
||||
data.Plusgiro.Should().Be("123456-7");
|
||||
data.AmountTotal.Should().Be(5000.00m);
|
||||
data.SupplierOrgNumber.Should().Be("111222-3333");
|
||||
data.CustomerNumber.Should().Be("CUST-999");
|
||||
data.PaymentLine.Should().Be("Payment reference line");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_VatSummaryParsing_PopulatesVatFieldsFromTotalVat()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
vat_summary = new
|
||||
{
|
||||
total_vat = "1250.50",
|
||||
breakdowns = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
rate = 25.0m,
|
||||
vat_amount = "1250.50",
|
||||
base_amount = "5000.00",
|
||||
source = "table",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
var data = result.Data;
|
||||
data.Should().NotBeNull();
|
||||
data!.AmountVat.Should().Be(1250.50m);
|
||||
data.VatRate.Should().Be(25);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_VatSummaryWithoutTotalVat_SumsFromBreakdowns()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
vat_summary = new
|
||||
{
|
||||
breakdowns = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
rate = 25.0m,
|
||||
vat_amount = "1000.00",
|
||||
base_amount = "4000.00",
|
||||
source = "table",
|
||||
},
|
||||
new
|
||||
{
|
||||
rate = 12.0m,
|
||||
vat_amount = "240.00",
|
||||
base_amount = "2000.00",
|
||||
source = "table",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
var data = result.Data;
|
||||
data.Should().NotBeNull();
|
||||
data!.AmountVat.Should().Be(1240.00m);
|
||||
data.VatRate.Should().Be(25);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_ConfidenceCalculation_AveragesFieldConfidencesAndRoundsToFourDecimals()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>
|
||||
{
|
||||
["Field1"] = 0.123456789m,
|
||||
["Field2"] = 0.987654321m,
|
||||
["Field3"] = 0.555555555m,
|
||||
},
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Confidence.Should().Be(0.5556m);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_NoConfidenceScores_ReturnsZeroConfidence()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Confidence.Should().Be(0m);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_HttpError500_ReturnsFailureWithErrorMessage()
|
||||
{
|
||||
// Arrange
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.InternalServerError, "Internal server error");
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().Be("OCR API returned InternalServerError");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_NullResponse_ReturnsFailure()
|
||||
{
|
||||
// Arrange
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, "{}");
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().NotBeNullOrEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_NullResult_ReturnsFailureWithMessage()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "error",
|
||||
message = "Processing failed",
|
||||
result = (object?)null,
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().Be("Processing failed");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_ErrorStatus_ReturnsFailureWithMessage()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "error",
|
||||
message = "Document format not supported",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = false,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 10.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().Be("Document format not supported");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_PartialStatus_TreatedAsSuccess()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "partial",
|
||||
message = "Some fields not detected",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["InvoiceNumber"] = "PARTIAL-001",
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>
|
||||
{
|
||||
["InvoiceNumber"] = 0.75m,
|
||||
},
|
||||
processing_time_ms = 200.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeTrue();
|
||||
result.Data.Should().NotBeNull();
|
||||
result.Data!.InvoiceNumber.Should().Be("PARTIAL-001");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_TaskCanceledException_NotUserCancellation_ReturnsTimeoutError()
|
||||
{
|
||||
// Arrange
|
||||
var handler = new TimeoutHttpMessageHandler();
|
||||
var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().Be("OCR API request timed out");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_HttpRequestException_ReturnsConnectionError()
|
||||
{
|
||||
// Arrange
|
||||
var handler = new ConnectionErrorHttpMessageHandler();
|
||||
var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().Contain("OCR API connection error");
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("1234.56", 1234.56)]
|
||||
[InlineData("1 234,56", 1234.56)]
|
||||
[InlineData("1,234.56", 1234.56)]
|
||||
[InlineData("1234,56", 1234.56)]
|
||||
[InlineData("12345", 12345)]
|
||||
[InlineData("0.99", 0.99)]
|
||||
public async Task ExtractAsync_DecimalParsing_HandlesVariousFormats(string input, decimal expected)
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["Amount"] = input,
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Data.Should().NotBeNull();
|
||||
result.Data!.AmountTotal.Should().Be(expected);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("2024-01-15", 2024, 1, 15)]
|
||||
[InlineData("15/01/2024", 2024, 1, 15)]
|
||||
[InlineData("15.01.2024", 2024, 1, 15)]
|
||||
[InlineData("2024/01/15", 2024, 1, 15)]
|
||||
public async Task ExtractAsync_DateParsing_HandlesVariousFormats(string input, int year, int month, int day)
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["InvoiceDate"] = input,
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Data.Should().NotBeNull();
|
||||
result.Data!.InvoiceDate.Should().Be(new DateTime(year, month, day));
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("test.pdf")]
|
||||
[InlineData("invoice.PDF")]
|
||||
[InlineData("scan.png")]
|
||||
[InlineData("photo.PNG")]
|
||||
[InlineData("image.jpg")]
|
||||
[InlineData("photo.jpeg")]
|
||||
[InlineData("document.JPG")]
|
||||
[InlineData("unknown.txt")]
|
||||
public async Task ExtractAsync_DifferentFileTypes_ProcessesSuccessfully(string fileName)
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, fileName, CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeTrue();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_ErrorsInResult_IncludedInErrorMessage()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
errors = new List<string> { "Low quality image", "Missing page 2" },
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.ErrorMessage.Should().Be("Low quality image; Missing page 2");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_DefaultCurrency_SetToSEK()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Data.Should().NotBeNull();
|
||||
result.Data!.Currency.Should().Be("SEK");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_CustomCurrency_OverridesDefault()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["Currency"] = "EUR",
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Data.Should().NotBeNull();
|
||||
result.Data!.Currency.Should().Be("EUR");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_MultipartFormData_IncludesFileAndExtractLineItemsParameter()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var captureHandler = new CaptureHttpMessageHandler(HttpStatusCode.OK, apiResponse);
|
||||
var httpClient = new HttpClient(captureHandler) { BaseAddress = new Uri("http://localhost") };
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test file content"));
|
||||
|
||||
// Act
|
||||
await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
captureHandler.CapturedRequest.Should().NotBeNull();
|
||||
captureHandler.CapturedRequest!.RequestUri.Should().NotBeNull();
|
||||
captureHandler.CapturedRequest.RequestUri!.ToString().Should().EndWith("infer");
|
||||
captureHandler.CapturedRequest.Content.Should().NotBeNull();
|
||||
captureHandler.CapturedRequest.Content.Should().BeOfType<MultipartFormDataContent>();
|
||||
}
|
||||
|
||||
private static HttpClient CreateHttpClient(HttpStatusCode statusCode, object response)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(response);
|
||||
var handler = new MockHttpMessageHandler(statusCode, json);
|
||||
return new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
||||
}
|
||||
|
||||
private static HttpClient CreateHttpClient(HttpStatusCode statusCode, string responseBody)
|
||||
{
|
||||
var handler = new MockHttpMessageHandler(statusCode, responseBody);
|
||||
return new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
||||
}
|
||||
|
||||
private sealed class MockHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
private readonly HttpStatusCode _statusCode;
|
||||
private readonly string _responseBody;
|
||||
|
||||
public MockHttpMessageHandler(HttpStatusCode statusCode, string responseBody)
|
||||
{
|
||||
_statusCode = statusCode;
|
||||
_responseBody = responseBody;
|
||||
}
|
||||
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
var response = new HttpResponseMessage(_statusCode)
|
||||
{
|
||||
Content = new StringContent(_responseBody, Encoding.UTF8, "application/json"),
|
||||
};
|
||||
return Task.FromResult(response);
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class TimeoutHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
throw new TaskCanceledException("Request timed out");
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class ConnectionErrorHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
throw new HttpRequestException("Unable to connect to server");
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class CaptureHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
private readonly HttpStatusCode _statusCode;
|
||||
private readonly string _responseBody;
|
||||
|
||||
public HttpRequestMessage? CapturedRequest { get; private set; }
|
||||
|
||||
public CaptureHttpMessageHandler(HttpStatusCode statusCode, object response)
|
||||
{
|
||||
_statusCode = statusCode;
|
||||
_responseBody = JsonSerializer.Serialize(response);
|
||||
}
|
||||
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
CapturedRequest = request;
|
||||
|
||||
var response = new HttpResponseMessage(_statusCode)
|
||||
{
|
||||
Content = new StringContent(_responseBody, Encoding.UTF8, "application/json"),
|
||||
};
|
||||
return Task.FromResult(response);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user