Rewrite OcrService to match the actual inference API response format (nested status/result structure with PascalCase/snake_case field names). Register IOcrService in DI with typed HttpClient and Polly v8 resilience (retry, timeout, circuit breaker via AddStandardResilienceHandler). Key changes: - Fix response model to match real API (InferenceApiResponse) - Map correct field names (InvoiceNumber, InvoiceDueDate, OCR, Amount, etc.) - Add extract_line_items=true for VAT summary extraction - Copy stream before sending to avoid disposal conflicts with retries - Add JsonException handling for malformed responses - Remove sensitive data from error logs - Add 35 unit tests covering field mapping, VAT parsing, error handling, decimal/date formats, and content type detection
807 lines
27 KiB
C#
807 lines
27 KiB
C#
using System.Net;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using FiscalFlow.Core.Interfaces;
|
|
using FiscalFlow.Infrastructure.Services;
|
|
using FluentAssertions;
|
|
using Microsoft.Extensions.Logging;
|
|
using Moq;
|
|
using Xunit;
|
|
|
|
namespace FiscalFlow.UnitTests.Services;
|
|
|
|
public sealed class OcrServiceTests
|
|
{
|
|
private readonly Mock<ILogger<OcrService>> _loggerMock;
|
|
|
|
public OcrServiceTests()
|
|
{
|
|
_loggerMock = new Mock<ILogger<OcrService>>();
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_SuccessfulExtraction_ReturnsCompleteOcrResult()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "Processed document abc123",
|
|
result = new
|
|
{
|
|
document_id = "abc123",
|
|
success = true,
|
|
document_type = "invoice",
|
|
fields = new Dictionary<string, string?>
|
|
{
|
|
["InvoiceNumber"] = "INV-001",
|
|
["InvoiceDate"] = "2024-01-15",
|
|
["InvoiceDueDate"] = "2024-02-15",
|
|
["OCR"] = "123456789",
|
|
["Bankgiro"] = "1234-5678",
|
|
["Plusgiro"] = null,
|
|
["Amount"] = "12500.00",
|
|
["SupplierName"] = "Test AB",
|
|
["supplier_org_number"] = "556677-8899",
|
|
["customer_number"] = "C-001",
|
|
["payment_line"] = "# 123456789 # 12500 00 #",
|
|
["Currency"] = "SEK",
|
|
},
|
|
confidence = new Dictionary<string, decimal>
|
|
{
|
|
["InvoiceNumber"] = 0.95m,
|
|
["InvoiceDate"] = 0.88m,
|
|
["Amount"] = 0.92m,
|
|
},
|
|
detections = new List<object>(),
|
|
processing_time_ms = 1234.5,
|
|
errors = new List<string>(),
|
|
vat_summary = new
|
|
{
|
|
breakdowns = new[]
|
|
{
|
|
new
|
|
{
|
|
rate = 25.0m,
|
|
vat_amount = "2500.00",
|
|
base_amount = "10000.00",
|
|
source = "regex",
|
|
},
|
|
},
|
|
total_excl_vat = "10000.00",
|
|
total_vat = "2500.00",
|
|
total_incl_vat = "12500.00",
|
|
confidence = 0.85m,
|
|
},
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Should().NotBeNull();
|
|
result.Success.Should().BeTrue();
|
|
result.Confidence.Should().Be(0.9167m);
|
|
result.ProcessingTimeMs.Should().Be(1234.5);
|
|
result.DocumentType.Should().Be("invoice");
|
|
result.FieldConfidences.Should().ContainKey("InvoiceNumber").WhoseValue.Should().Be(0.95m);
|
|
result.FieldConfidences.Should().ContainKey("InvoiceDate").WhoseValue.Should().Be(0.88m);
|
|
result.FieldConfidences.Should().ContainKey("Amount").WhoseValue.Should().Be(0.92m);
|
|
|
|
var data = result.Data;
|
|
data.Should().NotBeNull();
|
|
data!.InvoiceNumber.Should().Be("INV-001");
|
|
data.InvoiceDate.Should().Be(new DateTime(2024, 1, 15));
|
|
data.DueDate.Should().Be(new DateTime(2024, 2, 15));
|
|
data.OcrNumber.Should().Be("123456789");
|
|
data.Bankgiro.Should().Be("1234-5678");
|
|
data.Plusgiro.Should().BeNull();
|
|
data.AmountTotal.Should().Be(12500.00m);
|
|
data.SupplierName.Should().Be("Test AB");
|
|
data.SupplierOrgNumber.Should().Be("556677-8899");
|
|
data.CustomerNumber.Should().Be("C-001");
|
|
data.PaymentLine.Should().Be("# 123456789 # 12500 00 #");
|
|
data.Currency.Should().Be("SEK");
|
|
data.AmountVat.Should().Be(2500.00m);
|
|
data.VatRate.Should().Be(25);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_FieldMapping_MapsAllFieldsCorrectly()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>
|
|
{
|
|
["InvoiceNumber"] = "TEST-123",
|
|
["InvoiceDate"] = "2024-03-20",
|
|
["InvoiceDueDate"] = "2024-04-20",
|
|
["OCR"] = "987654321",
|
|
["Bankgiro"] = "9999-8888",
|
|
["Plusgiro"] = "123456-7",
|
|
["Amount"] = "5000.00",
|
|
["supplier_org_number"] = "111222-3333",
|
|
["customer_number"] = "CUST-999",
|
|
["payment_line"] = "Payment reference line",
|
|
},
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 500.0,
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
var data = result.Data;
|
|
data.Should().NotBeNull();
|
|
data!.InvoiceNumber.Should().Be("TEST-123");
|
|
data.InvoiceDate.Should().Be(new DateTime(2024, 3, 20));
|
|
data.DueDate.Should().Be(new DateTime(2024, 4, 20));
|
|
data.OcrNumber.Should().Be("987654321");
|
|
data.Bankgiro.Should().Be("9999-8888");
|
|
data.Plusgiro.Should().Be("123456-7");
|
|
data.AmountTotal.Should().Be(5000.00m);
|
|
data.SupplierOrgNumber.Should().Be("111222-3333");
|
|
data.CustomerNumber.Should().Be("CUST-999");
|
|
data.PaymentLine.Should().Be("Payment reference line");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_VatSummaryParsing_PopulatesVatFieldsFromTotalVat()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>(),
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 100.0,
|
|
vat_summary = new
|
|
{
|
|
total_vat = "1250.50",
|
|
breakdowns = new[]
|
|
{
|
|
new
|
|
{
|
|
rate = 25.0m,
|
|
vat_amount = "1250.50",
|
|
base_amount = "5000.00",
|
|
source = "table",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
var data = result.Data;
|
|
data.Should().NotBeNull();
|
|
data!.AmountVat.Should().Be(1250.50m);
|
|
data.VatRate.Should().Be(25);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_VatSummaryWithoutTotalVat_SumsFromBreakdowns()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>(),
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 100.0,
|
|
vat_summary = new
|
|
{
|
|
breakdowns = new[]
|
|
{
|
|
new
|
|
{
|
|
rate = 25.0m,
|
|
vat_amount = "1000.00",
|
|
base_amount = "4000.00",
|
|
source = "table",
|
|
},
|
|
new
|
|
{
|
|
rate = 12.0m,
|
|
vat_amount = "240.00",
|
|
base_amount = "2000.00",
|
|
source = "table",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
var data = result.Data;
|
|
data.Should().NotBeNull();
|
|
data!.AmountVat.Should().Be(1240.00m);
|
|
data.VatRate.Should().Be(25);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_ConfidenceCalculation_AveragesFieldConfidencesAndRoundsToFourDecimals()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>(),
|
|
confidence = new Dictionary<string, decimal>
|
|
{
|
|
["Field1"] = 0.123456789m,
|
|
["Field2"] = 0.987654321m,
|
|
["Field3"] = 0.555555555m,
|
|
},
|
|
processing_time_ms = 100.0,
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Confidence.Should().Be(0.5556m);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_NoConfidenceScores_ReturnsZeroConfidence()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>(),
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 100.0,
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Confidence.Should().Be(0m);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_HttpError500_ReturnsFailureWithErrorMessage()
|
|
{
|
|
// Arrange
|
|
var httpClient = CreateHttpClient(HttpStatusCode.InternalServerError, "Internal server error");
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Should().NotBeNull();
|
|
result.Success.Should().BeFalse();
|
|
result.ErrorMessage.Should().Be("OCR API returned InternalServerError");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_NullResponse_ReturnsFailure()
|
|
{
|
|
// Arrange
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, "{}");
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Should().NotBeNull();
|
|
result.Success.Should().BeFalse();
|
|
result.ErrorMessage.Should().NotBeNullOrEmpty();
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_NullResult_ReturnsFailureWithMessage()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "error",
|
|
message = "Processing failed",
|
|
result = (object?)null,
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Should().NotBeNull();
|
|
result.Success.Should().BeFalse();
|
|
result.ErrorMessage.Should().Be("Processing failed");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_ErrorStatus_ReturnsFailureWithMessage()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "error",
|
|
message = "Document format not supported",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = false,
|
|
fields = new Dictionary<string, string?>(),
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 10.0,
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Should().NotBeNull();
|
|
result.Success.Should().BeFalse();
|
|
result.ErrorMessage.Should().Be("Document format not supported");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_PartialStatus_TreatedAsSuccess()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "partial",
|
|
message = "Some fields not detected",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>
|
|
{
|
|
["InvoiceNumber"] = "PARTIAL-001",
|
|
},
|
|
confidence = new Dictionary<string, decimal>
|
|
{
|
|
["InvoiceNumber"] = 0.75m,
|
|
},
|
|
processing_time_ms = 200.0,
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Should().NotBeNull();
|
|
result.Success.Should().BeTrue();
|
|
result.Data.Should().NotBeNull();
|
|
result.Data!.InvoiceNumber.Should().Be("PARTIAL-001");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_TaskCanceledException_NotUserCancellation_ReturnsTimeoutError()
|
|
{
|
|
// Arrange
|
|
var handler = new TimeoutHttpMessageHandler();
|
|
var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Should().NotBeNull();
|
|
result.Success.Should().BeFalse();
|
|
result.ErrorMessage.Should().Be("OCR API request timed out");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_HttpRequestException_ReturnsConnectionError()
|
|
{
|
|
// Arrange
|
|
var handler = new ConnectionErrorHttpMessageHandler();
|
|
var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Should().NotBeNull();
|
|
result.Success.Should().BeFalse();
|
|
result.ErrorMessage.Should().Contain("OCR API connection error");
|
|
}
|
|
|
|
[Theory]
|
|
[InlineData("1234.56", 1234.56)]
|
|
[InlineData("1 234,56", 1234.56)]
|
|
[InlineData("1,234.56", 1234.56)]
|
|
[InlineData("1234,56", 1234.56)]
|
|
[InlineData("12345", 12345)]
|
|
[InlineData("0.99", 0.99)]
|
|
public async Task ExtractAsync_DecimalParsing_HandlesVariousFormats(string input, decimal expected)
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>
|
|
{
|
|
["Amount"] = input,
|
|
},
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 100.0,
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Data.Should().NotBeNull();
|
|
result.Data!.AmountTotal.Should().Be(expected);
|
|
}
|
|
|
|
[Theory]
|
|
[InlineData("2024-01-15", 2024, 1, 15)]
|
|
[InlineData("15/01/2024", 2024, 1, 15)]
|
|
[InlineData("15.01.2024", 2024, 1, 15)]
|
|
[InlineData("2024/01/15", 2024, 1, 15)]
|
|
public async Task ExtractAsync_DateParsing_HandlesVariousFormats(string input, int year, int month, int day)
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>
|
|
{
|
|
["InvoiceDate"] = input,
|
|
},
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 100.0,
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Data.Should().NotBeNull();
|
|
result.Data!.InvoiceDate.Should().Be(new DateTime(year, month, day));
|
|
}
|
|
|
|
[Theory]
|
|
[InlineData("test.pdf")]
|
|
[InlineData("invoice.PDF")]
|
|
[InlineData("scan.png")]
|
|
[InlineData("photo.PNG")]
|
|
[InlineData("image.jpg")]
|
|
[InlineData("photo.jpeg")]
|
|
[InlineData("document.JPG")]
|
|
[InlineData("unknown.txt")]
|
|
public async Task ExtractAsync_DifferentFileTypes_ProcessesSuccessfully(string fileName)
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>(),
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 100.0,
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, fileName, CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Should().NotBeNull();
|
|
result.Success.Should().BeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_ErrorsInResult_IncludedInErrorMessage()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>(),
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 100.0,
|
|
errors = new List<string> { "Low quality image", "Missing page 2" },
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Should().NotBeNull();
|
|
result.ErrorMessage.Should().Be("Low quality image; Missing page 2");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_DefaultCurrency_SetToSEK()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>(),
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 100.0,
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Data.Should().NotBeNull();
|
|
result.Data!.Currency.Should().Be("SEK");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_CustomCurrency_OverridesDefault()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>
|
|
{
|
|
["Currency"] = "EUR",
|
|
},
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 100.0,
|
|
},
|
|
};
|
|
|
|
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream();
|
|
|
|
// Act
|
|
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
result.Data.Should().NotBeNull();
|
|
result.Data!.Currency.Should().Be("EUR");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExtractAsync_MultipartFormData_IncludesFileAndExtractLineItemsParameter()
|
|
{
|
|
// Arrange
|
|
var apiResponse = new
|
|
{
|
|
status = "success",
|
|
message = "OK",
|
|
result = new
|
|
{
|
|
document_id = "test",
|
|
success = true,
|
|
fields = new Dictionary<string, string?>(),
|
|
confidence = new Dictionary<string, decimal>(),
|
|
processing_time_ms = 100.0,
|
|
},
|
|
};
|
|
|
|
var captureHandler = new CaptureHttpMessageHandler(HttpStatusCode.OK, apiResponse);
|
|
var httpClient = new HttpClient(captureHandler) { BaseAddress = new Uri("http://localhost") };
|
|
var service = new OcrService(httpClient, _loggerMock.Object);
|
|
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test file content"));
|
|
|
|
// Act
|
|
await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
|
|
|
// Assert
|
|
captureHandler.CapturedRequest.Should().NotBeNull();
|
|
captureHandler.CapturedRequest!.RequestUri.Should().NotBeNull();
|
|
captureHandler.CapturedRequest.RequestUri!.ToString().Should().EndWith("infer");
|
|
captureHandler.CapturedRequest.Content.Should().NotBeNull();
|
|
captureHandler.CapturedRequest.Content.Should().BeOfType<MultipartFormDataContent>();
|
|
}
|
|
|
|
private static HttpClient CreateHttpClient(HttpStatusCode statusCode, object response)
|
|
{
|
|
var json = JsonSerializer.Serialize(response);
|
|
var handler = new MockHttpMessageHandler(statusCode, json);
|
|
return new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
|
}
|
|
|
|
private static HttpClient CreateHttpClient(HttpStatusCode statusCode, string responseBody)
|
|
{
|
|
var handler = new MockHttpMessageHandler(statusCode, responseBody);
|
|
return new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
|
}
|
|
|
|
private sealed class MockHttpMessageHandler : HttpMessageHandler
|
|
{
|
|
private readonly HttpStatusCode _statusCode;
|
|
private readonly string _responseBody;
|
|
|
|
public MockHttpMessageHandler(HttpStatusCode statusCode, string responseBody)
|
|
{
|
|
_statusCode = statusCode;
|
|
_responseBody = responseBody;
|
|
}
|
|
|
|
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
|
{
|
|
var response = new HttpResponseMessage(_statusCode)
|
|
{
|
|
Content = new StringContent(_responseBody, Encoding.UTF8, "application/json"),
|
|
};
|
|
return Task.FromResult(response);
|
|
}
|
|
}
|
|
|
|
private sealed class TimeoutHttpMessageHandler : HttpMessageHandler
|
|
{
|
|
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
|
{
|
|
throw new TaskCanceledException("Request timed out");
|
|
}
|
|
}
|
|
|
|
private sealed class ConnectionErrorHttpMessageHandler : HttpMessageHandler
|
|
{
|
|
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
|
{
|
|
throw new HttpRequestException("Unable to connect to server");
|
|
}
|
|
}
|
|
|
|
private sealed class CaptureHttpMessageHandler : HttpMessageHandler
|
|
{
|
|
private readonly HttpStatusCode _statusCode;
|
|
private readonly string _responseBody;
|
|
|
|
public HttpRequestMessage? CapturedRequest { get; private set; }
|
|
|
|
public CaptureHttpMessageHandler(HttpStatusCode statusCode, object response)
|
|
{
|
|
_statusCode = statusCode;
|
|
_responseBody = JsonSerializer.Serialize(response);
|
|
}
|
|
|
|
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
|
{
|
|
CapturedRequest = request;
|
|
|
|
var response = new HttpResponseMessage(_statusCode)
|
|
{
|
|
Content = new StringContent(_responseBody, Encoding.UTF8, "application/json"),
|
|
};
|
|
return Task.FromResult(response);
|
|
}
|
|
}
|
|
}
|