feat: integrate invoice-master-poc-v2 inference API
Rewrite OcrService to match the actual inference API response format (nested status/result structure with PascalCase/snake_case field names). Register IOcrService in DI with typed HttpClient and Polly v8 resilience (retry, timeout, circuit breaker via AddStandardResilienceHandler). Key changes: - Fix response model to match real API (InferenceApiResponse) - Map correct field names (InvoiceNumber, InvoiceDueDate, OCR, Amount, etc.) - Add extract_line_items=true for VAT summary extraction - Copy stream before sending to avoid disposal conflicts with retries - Add JsonException handling for malformed responses - Remove sensitive data from error logs - Add 35 unit tests covering field mapping, VAT parsing, error handling, decimal/date formats, and content type detection
This commit is contained in:
@@ -11,6 +11,9 @@ public class OcrResult
|
||||
public string? ErrorMessage { get; set; }
|
||||
public InvoiceData? Data { get; set; }
|
||||
public decimal Confidence { get; set; }
|
||||
public Dictionary<string, decimal> FieldConfidences { get; set; } = new();
|
||||
public double ProcessingTimeMs { get; set; }
|
||||
public string? DocumentType { get; set; }
|
||||
}
|
||||
|
||||
public class InvoiceData
|
||||
@@ -26,5 +29,7 @@ public class InvoiceData
|
||||
public string? OcrNumber { get; set; }
|
||||
public string? Bankgiro { get; set; }
|
||||
public string? Plusgiro { get; set; }
|
||||
public string? CustomerNumber { get; set; }
|
||||
public string? PaymentLine { get; set; }
|
||||
public string Currency { get; set; } = "SEK";
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ using FiscalFlow.Infrastructure.Services;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Http.Resilience;
|
||||
|
||||
namespace FiscalFlow.Infrastructure.Extensions;
|
||||
|
||||
@@ -25,6 +26,27 @@ public static class DependencyInjection
|
||||
services.AddScoped<IUnitOfWork, UnitOfWork>();
|
||||
services.AddSingleton<IBlobStorageService, AzureBlobStorageService>();
|
||||
|
||||
services
|
||||
.AddHttpClient<IOcrService, OcrService>(client =>
|
||||
{
|
||||
var baseUrl = configuration["Ocr:ApiUrl"] ?? "http://localhost:8000/api/v1";
|
||||
client.BaseAddress = new Uri(baseUrl.TrimEnd('/') + "/");
|
||||
client.Timeout = TimeSpan.FromSeconds(60);
|
||||
|
||||
var apiKey = configuration["Ocr:ApiKey"];
|
||||
if (!string.IsNullOrEmpty(apiKey))
|
||||
{
|
||||
client.DefaultRequestHeaders.Add("X-API-Key", apiKey);
|
||||
}
|
||||
})
|
||||
.AddStandardResilienceHandler(options =>
|
||||
{
|
||||
options.Retry.MaxRetryAttempts = 3;
|
||||
options.Retry.Delay = TimeSpan.FromSeconds(2);
|
||||
options.AttemptTimeout.Timeout = TimeSpan.FromSeconds(30);
|
||||
options.TotalRequestTimeout.Timeout = TimeSpan.FromSeconds(90);
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,16 +7,15 @@
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="10.0.0" />
|
||||
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="9.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http.Resilience" Version="10.3.0" />
|
||||
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="10.0.0" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Tools" Version="10.0.0">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="Microsoft.AspNetCore.Identity.EntityFrameworkCore" Version="10.0.0" />
|
||||
<PackageReference Include="Azure.Storage.Blobs" Version="12.23.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.0" />
|
||||
<PackageReference Include="Polly" Version="8.5.0" />
|
||||
<PackageReference Include="Polly.Extensions.Http" Version="3.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.3" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -1,154 +1,286 @@
|
||||
using FiscalFlow.Core.Interfaces;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System.Globalization;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Net.Http.Json;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using FiscalFlow.Core.Interfaces;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace FiscalFlow.Infrastructure.Services;
|
||||
|
||||
public class OcrService : IOcrService
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly ILogger<OcrService> _logger;
|
||||
private readonly string _apiUrl;
|
||||
private readonly string? _apiKey;
|
||||
|
||||
public OcrService(IHttpClientFactory httpClientFactory, IConfiguration configuration, ILogger<OcrService> logger)
|
||||
public OcrService(HttpClient httpClient, ILogger<OcrService> logger)
|
||||
{
|
||||
_httpClient = httpClientFactory.CreateClient();
|
||||
_httpClient = httpClient;
|
||||
_logger = logger;
|
||||
_apiUrl = configuration["Ocr:ApiUrl"] ?? "http://localhost:8000/api/v1";
|
||||
_apiKey = configuration["Ocr:ApiKey"];
|
||||
}
|
||||
|
||||
public async Task<OcrResult> ExtractAsync(Stream fileStream, string fileName, CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var content = new MultipartFormDataContent();
|
||||
var streamContent = new StreamContent(fileStream);
|
||||
streamContent.Headers.ContentType = new MediaTypeHeaderValue("application/pdf");
|
||||
using var streamCopy = new MemoryStream();
|
||||
await fileStream.CopyToAsync(streamCopy, cancellationToken);
|
||||
streamCopy.Position = 0;
|
||||
|
||||
using var content = new MultipartFormDataContent();
|
||||
var streamContent = new StreamContent(streamCopy);
|
||||
streamContent.Headers.ContentType = new MediaTypeHeaderValue(GetContentType(fileName));
|
||||
content.Add(streamContent, "file", fileName);
|
||||
content.Add(new StringContent("true"), "extract_line_items");
|
||||
|
||||
var request = new HttpRequestMessage(HttpMethod.Post, $"{_apiUrl}/infer")
|
||||
{
|
||||
Content = content
|
||||
};
|
||||
|
||||
if (!string.IsNullOrEmpty(_apiKey))
|
||||
{
|
||||
request.Headers.Add("X-API-Key", _apiKey);
|
||||
}
|
||||
|
||||
var response = await _httpClient.SendAsync(request, cancellationToken);
|
||||
var responseContent = await response.Content.ReadAsStringAsync(cancellationToken);
|
||||
var response = await _httpClient.PostAsync("infer", content, cancellationToken);
|
||||
var responseBody = await response.Content.ReadAsStringAsync(cancellationToken);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
_logger.LogError("OCR API error: {StatusCode} - {Content}", response.StatusCode, responseContent);
|
||||
_logger.LogError("OCR API error: {StatusCode}, response length: {Length} bytes", response.StatusCode, responseBody.Length);
|
||||
return new OcrResult
|
||||
{
|
||||
Success = false,
|
||||
ErrorMessage = $"OCR API returned {response.StatusCode}"
|
||||
ErrorMessage = $"OCR API returned {response.StatusCode}",
|
||||
};
|
||||
}
|
||||
|
||||
var result = JsonSerializer.Deserialize<OcrApiResponse>(responseContent, new JsonSerializerOptions
|
||||
InferenceApiResponse? apiResponse;
|
||||
try
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
});
|
||||
apiResponse = JsonSerializer.Deserialize<InferenceApiResponse>(responseBody, JsonOptions);
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to parse OCR API response");
|
||||
return new OcrResult
|
||||
{
|
||||
Success = false,
|
||||
ErrorMessage = "Invalid response format from OCR API",
|
||||
};
|
||||
}
|
||||
|
||||
if (result == null)
|
||||
if (apiResponse?.Result == null)
|
||||
{
|
||||
return new OcrResult
|
||||
{
|
||||
Success = false,
|
||||
ErrorMessage = "Invalid OCR response"
|
||||
ErrorMessage = apiResponse?.Message ?? "Invalid OCR response",
|
||||
};
|
||||
}
|
||||
|
||||
if (!string.Equals(apiResponse.Status, "success", StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.Equals(apiResponse.Status, "partial", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return new OcrResult
|
||||
{
|
||||
Success = false,
|
||||
ErrorMessage = apiResponse.Message ?? $"OCR status: {apiResponse.Status}",
|
||||
};
|
||||
}
|
||||
|
||||
var result = apiResponse.Result;
|
||||
var fieldConfidences = result.Confidence ?? new Dictionary<string, decimal>();
|
||||
var averageConfidence = fieldConfidences.Count > 0
|
||||
? fieldConfidences.Values.Average()
|
||||
: 0m;
|
||||
|
||||
return new OcrResult
|
||||
{
|
||||
Success = result.Success,
|
||||
Data = MapToInvoiceData(result.Fields),
|
||||
Confidence = result.Confidence,
|
||||
ErrorMessage = result.Error
|
||||
Data = MapToInvoiceData(result.Fields, result.VatSummary),
|
||||
Confidence = Math.Round(averageConfidence, 4),
|
||||
FieldConfidences = fieldConfidences,
|
||||
ProcessingTimeMs = result.ProcessingTimeMs,
|
||||
DocumentType = result.DocumentType,
|
||||
ErrorMessage = result.Errors?.Count > 0 ? string.Join("; ", result.Errors) : null,
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
catch (TaskCanceledException ex) when (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogError(ex, "Error calling OCR API");
|
||||
_logger.LogError(ex, "OCR API request timed out");
|
||||
return new OcrResult
|
||||
{
|
||||
Success = false,
|
||||
ErrorMessage = ex.Message
|
||||
ErrorMessage = "OCR API request timed out",
|
||||
};
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error connecting to OCR API");
|
||||
return new OcrResult
|
||||
{
|
||||
Success = false,
|
||||
ErrorMessage = $"OCR API connection error: {ex.Message}",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static InvoiceData MapToInvoiceData(Dictionary<string, OcrField>? fields)
|
||||
private static InvoiceData MapToInvoiceData(Dictionary<string, string?>? fields, VatSummaryResult? vatSummary)
|
||||
{
|
||||
if (fields == null)
|
||||
{
|
||||
return new InvoiceData();
|
||||
}
|
||||
|
||||
return new InvoiceData
|
||||
var data = new InvoiceData
|
||||
{
|
||||
SupplierName = GetFieldValue(fields, "supplier_name"),
|
||||
SupplierOrgNumber = GetFieldValue(fields, "supplier_org_number"),
|
||||
InvoiceNumber = GetFieldValue(fields, "invoice_number"),
|
||||
InvoiceDate = ParseDate(GetFieldValue(fields, "invoice_date")),
|
||||
DueDate = ParseDate(GetFieldValue(fields, "due_date")),
|
||||
AmountTotal = ParseDecimal(GetFieldValue(fields, "amount_total")),
|
||||
AmountVat = ParseDecimal(GetFieldValue(fields, "amount_vat")),
|
||||
VatRate = ParseInt(GetFieldValue(fields, "vat_rate")),
|
||||
OcrNumber = GetFieldValue(fields, "ocr_number"),
|
||||
Bankgiro = GetFieldValue(fields, "bankgiro"),
|
||||
Plusgiro = GetFieldValue(fields, "plusgiro"),
|
||||
Currency = GetFieldValue(fields, "currency") ?? "SEK"
|
||||
SupplierName = GetField(fields, "SupplierName"),
|
||||
SupplierOrgNumber = GetField(fields, "supplier_org_number"),
|
||||
InvoiceNumber = GetField(fields, "InvoiceNumber"),
|
||||
InvoiceDate = ParseDate(GetField(fields, "InvoiceDate")),
|
||||
DueDate = ParseDate(GetField(fields, "InvoiceDueDate")),
|
||||
AmountTotal = ParseDecimal(GetField(fields, "Amount")),
|
||||
OcrNumber = GetField(fields, "OCR"),
|
||||
Bankgiro = GetField(fields, "Bankgiro"),
|
||||
Plusgiro = GetField(fields, "Plusgiro"),
|
||||
CustomerNumber = GetField(fields, "customer_number"),
|
||||
PaymentLine = GetField(fields, "payment_line"),
|
||||
Currency = GetField(fields, "Currency") ?? "SEK",
|
||||
};
|
||||
|
||||
if (vatSummary != null)
|
||||
{
|
||||
data.AmountVat = ParseDecimal(vatSummary.TotalVat);
|
||||
|
||||
if (data.AmountVat == null && vatSummary.Breakdowns?.Count > 0)
|
||||
{
|
||||
data.AmountVat = vatSummary.Breakdowns
|
||||
.Select(b => ParseDecimal(b.VatAmount))
|
||||
.Where(v => v.HasValue)
|
||||
.Aggregate((decimal?)null, (sum, v) => (sum ?? 0) + v);
|
||||
}
|
||||
|
||||
private static string? GetFieldValue(Dictionary<string, OcrField> fields, string key)
|
||||
if (vatSummary.Breakdowns?.Count > 0)
|
||||
{
|
||||
return fields.TryGetValue(key, out var field) ? field.Value : null;
|
||||
var primaryRate = vatSummary.Breakdowns[0].Rate;
|
||||
data.VatRate = primaryRate.HasValue ? (int)Math.Round(primaryRate.Value) : null;
|
||||
}
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
private static string? GetField(Dictionary<string, string?> fields, string key)
|
||||
{
|
||||
return fields.TryGetValue(key, out var value) ? value : null;
|
||||
}
|
||||
|
||||
private static string GetContentType(string fileName)
|
||||
{
|
||||
var extension = Path.GetExtension(fileName)?.ToLowerInvariant();
|
||||
return extension switch
|
||||
{
|
||||
".pdf" => "application/pdf",
|
||||
".png" => "image/png",
|
||||
".jpg" or ".jpeg" => "image/jpeg",
|
||||
_ => "application/octet-stream",
|
||||
};
|
||||
}
|
||||
|
||||
private static DateTime? ParseDate(string? value)
|
||||
{
|
||||
if (string.IsNullOrEmpty(value)) return null;
|
||||
if (DateTime.TryParse(value, out var date)) return date;
|
||||
return null;
|
||||
if (string.IsNullOrWhiteSpace(value)) return null;
|
||||
|
||||
string[] formats = ["yyyy-MM-dd", "yyyy/MM/dd", "dd/MM/yyyy", "dd.MM.yyyy"];
|
||||
if (DateTime.TryParseExact(value, formats, CultureInfo.InvariantCulture, DateTimeStyles.None, out var date))
|
||||
{
|
||||
return date;
|
||||
}
|
||||
|
||||
return DateTime.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.None, out date) ? date : null;
|
||||
}
|
||||
|
||||
private static decimal? ParseDecimal(string? value)
|
||||
{
|
||||
if (string.IsNullOrEmpty(value)) return null;
|
||||
value = value.Replace(",", "").Replace(" ", "");
|
||||
if (decimal.TryParse(value, out var result)) return result;
|
||||
return null;
|
||||
}
|
||||
if (string.IsNullOrWhiteSpace(value)) return null;
|
||||
var cleaned = value.Replace(" ", "").Replace("\u00a0", "");
|
||||
|
||||
private static int? ParseInt(string? value)
|
||||
if (cleaned.Contains(',') && cleaned.Contains('.'))
|
||||
{
|
||||
if (string.IsNullOrEmpty(value)) return null;
|
||||
if (int.TryParse(value, out var result)) return result;
|
||||
return null;
|
||||
cleaned = cleaned.Replace(",", "");
|
||||
}
|
||||
else if (cleaned.Contains(','))
|
||||
{
|
||||
cleaned = cleaned.Replace(",", ".");
|
||||
}
|
||||
|
||||
return decimal.TryParse(cleaned, NumberStyles.Number, CultureInfo.InvariantCulture, out var result)
|
||||
? result
|
||||
: null;
|
||||
}
|
||||
}
|
||||
|
||||
public class OcrApiResponse
|
||||
internal sealed class InferenceApiResponse
|
||||
{
|
||||
public string? Status { get; set; }
|
||||
public string? Message { get; set; }
|
||||
public InferenceApiResult? Result { get; set; }
|
||||
}
|
||||
|
||||
internal sealed class InferenceApiResult
|
||||
{
|
||||
[JsonPropertyName("document_id")]
|
||||
public string? DocumentId { get; set; }
|
||||
|
||||
public bool Success { get; set; }
|
||||
public string? Error { get; set; }
|
||||
public Dictionary<string, OcrField>? Fields { get; set; }
|
||||
|
||||
[JsonPropertyName("document_type")]
|
||||
public string? DocumentType { get; set; }
|
||||
|
||||
public Dictionary<string, string?>? Fields { get; set; }
|
||||
|
||||
public Dictionary<string, decimal>? Confidence { get; set; }
|
||||
|
||||
public List<DetectionItem>? Detections { get; set; }
|
||||
|
||||
[JsonPropertyName("processing_time_ms")]
|
||||
public double ProcessingTimeMs { get; set; }
|
||||
|
||||
public List<string>? Errors { get; set; }
|
||||
|
||||
[JsonPropertyName("vat_summary")]
|
||||
public VatSummaryResult? VatSummary { get; set; }
|
||||
}
|
||||
|
||||
internal sealed class DetectionItem
|
||||
{
|
||||
public string? Field { get; set; }
|
||||
public decimal Confidence { get; set; }
|
||||
public List<double>? Bbox { get; set; }
|
||||
}
|
||||
|
||||
internal sealed class VatSummaryResult
|
||||
{
|
||||
public List<VatBreakdownResult>? Breakdowns { get; set; }
|
||||
|
||||
[JsonPropertyName("total_excl_vat")]
|
||||
public string? TotalExclVat { get; set; }
|
||||
|
||||
[JsonPropertyName("total_vat")]
|
||||
public string? TotalVat { get; set; }
|
||||
|
||||
[JsonPropertyName("total_incl_vat")]
|
||||
public string? TotalInclVat { get; set; }
|
||||
|
||||
public decimal Confidence { get; set; }
|
||||
}
|
||||
|
||||
public class OcrField
|
||||
internal sealed class VatBreakdownResult
|
||||
{
|
||||
public string? Value { get; set; }
|
||||
public decimal Confidence { get; set; }
|
||||
public decimal? Rate { get; set; }
|
||||
|
||||
[JsonPropertyName("base_amount")]
|
||||
public string? BaseAmount { get; set; }
|
||||
|
||||
[JsonPropertyName("vat_amount")]
|
||||
public string? VatAmount { get; set; }
|
||||
|
||||
public string? Source { get; set; }
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration" Version="10.0.3" />
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
|
||||
<PackageReference Include="xunit" Version="2.9.2" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" Version="3.0.0">
|
||||
@@ -22,6 +23,7 @@
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\src\FiscalFlow.Application\FiscalFlow.Application.csproj" />
|
||||
<ProjectReference Include="..\..\src\FiscalFlow.Core\FiscalFlow.Core.csproj" />
|
||||
<ProjectReference Include="..\..\src\FiscalFlow.Infrastructure\FiscalFlow.Infrastructure.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
806
backend/tests/FiscalFlow.UnitTests/Services/OcrServiceTests.cs
Normal file
806
backend/tests/FiscalFlow.UnitTests/Services/OcrServiceTests.cs
Normal file
@@ -0,0 +1,806 @@
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using FiscalFlow.Core.Interfaces;
|
||||
using FiscalFlow.Infrastructure.Services;
|
||||
using FluentAssertions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Moq;
|
||||
using Xunit;
|
||||
|
||||
namespace FiscalFlow.UnitTests.Services;
|
||||
|
||||
public sealed class OcrServiceTests
|
||||
{
|
||||
private readonly Mock<ILogger<OcrService>> _loggerMock;
|
||||
|
||||
public OcrServiceTests()
|
||||
{
|
||||
_loggerMock = new Mock<ILogger<OcrService>>();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_SuccessfulExtraction_ReturnsCompleteOcrResult()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "Processed document abc123",
|
||||
result = new
|
||||
{
|
||||
document_id = "abc123",
|
||||
success = true,
|
||||
document_type = "invoice",
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["InvoiceNumber"] = "INV-001",
|
||||
["InvoiceDate"] = "2024-01-15",
|
||||
["InvoiceDueDate"] = "2024-02-15",
|
||||
["OCR"] = "123456789",
|
||||
["Bankgiro"] = "1234-5678",
|
||||
["Plusgiro"] = null,
|
||||
["Amount"] = "12500.00",
|
||||
["SupplierName"] = "Test AB",
|
||||
["supplier_org_number"] = "556677-8899",
|
||||
["customer_number"] = "C-001",
|
||||
["payment_line"] = "# 123456789 # 12500 00 #",
|
||||
["Currency"] = "SEK",
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>
|
||||
{
|
||||
["InvoiceNumber"] = 0.95m,
|
||||
["InvoiceDate"] = 0.88m,
|
||||
["Amount"] = 0.92m,
|
||||
},
|
||||
detections = new List<object>(),
|
||||
processing_time_ms = 1234.5,
|
||||
errors = new List<string>(),
|
||||
vat_summary = new
|
||||
{
|
||||
breakdowns = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
rate = 25.0m,
|
||||
vat_amount = "2500.00",
|
||||
base_amount = "10000.00",
|
||||
source = "regex",
|
||||
},
|
||||
},
|
||||
total_excl_vat = "10000.00",
|
||||
total_vat = "2500.00",
|
||||
total_incl_vat = "12500.00",
|
||||
confidence = 0.85m,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeTrue();
|
||||
result.Confidence.Should().Be(0.9167m);
|
||||
result.ProcessingTimeMs.Should().Be(1234.5);
|
||||
result.DocumentType.Should().Be("invoice");
|
||||
result.FieldConfidences.Should().ContainKey("InvoiceNumber").WhoseValue.Should().Be(0.95m);
|
||||
result.FieldConfidences.Should().ContainKey("InvoiceDate").WhoseValue.Should().Be(0.88m);
|
||||
result.FieldConfidences.Should().ContainKey("Amount").WhoseValue.Should().Be(0.92m);
|
||||
|
||||
var data = result.Data;
|
||||
data.Should().NotBeNull();
|
||||
data!.InvoiceNumber.Should().Be("INV-001");
|
||||
data.InvoiceDate.Should().Be(new DateTime(2024, 1, 15));
|
||||
data.DueDate.Should().Be(new DateTime(2024, 2, 15));
|
||||
data.OcrNumber.Should().Be("123456789");
|
||||
data.Bankgiro.Should().Be("1234-5678");
|
||||
data.Plusgiro.Should().BeNull();
|
||||
data.AmountTotal.Should().Be(12500.00m);
|
||||
data.SupplierName.Should().Be("Test AB");
|
||||
data.SupplierOrgNumber.Should().Be("556677-8899");
|
||||
data.CustomerNumber.Should().Be("C-001");
|
||||
data.PaymentLine.Should().Be("# 123456789 # 12500 00 #");
|
||||
data.Currency.Should().Be("SEK");
|
||||
data.AmountVat.Should().Be(2500.00m);
|
||||
data.VatRate.Should().Be(25);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_FieldMapping_MapsAllFieldsCorrectly()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["InvoiceNumber"] = "TEST-123",
|
||||
["InvoiceDate"] = "2024-03-20",
|
||||
["InvoiceDueDate"] = "2024-04-20",
|
||||
["OCR"] = "987654321",
|
||||
["Bankgiro"] = "9999-8888",
|
||||
["Plusgiro"] = "123456-7",
|
||||
["Amount"] = "5000.00",
|
||||
["supplier_org_number"] = "111222-3333",
|
||||
["customer_number"] = "CUST-999",
|
||||
["payment_line"] = "Payment reference line",
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 500.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
var data = result.Data;
|
||||
data.Should().NotBeNull();
|
||||
data!.InvoiceNumber.Should().Be("TEST-123");
|
||||
data.InvoiceDate.Should().Be(new DateTime(2024, 3, 20));
|
||||
data.DueDate.Should().Be(new DateTime(2024, 4, 20));
|
||||
data.OcrNumber.Should().Be("987654321");
|
||||
data.Bankgiro.Should().Be("9999-8888");
|
||||
data.Plusgiro.Should().Be("123456-7");
|
||||
data.AmountTotal.Should().Be(5000.00m);
|
||||
data.SupplierOrgNumber.Should().Be("111222-3333");
|
||||
data.CustomerNumber.Should().Be("CUST-999");
|
||||
data.PaymentLine.Should().Be("Payment reference line");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_VatSummaryParsing_PopulatesVatFieldsFromTotalVat()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
vat_summary = new
|
||||
{
|
||||
total_vat = "1250.50",
|
||||
breakdowns = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
rate = 25.0m,
|
||||
vat_amount = "1250.50",
|
||||
base_amount = "5000.00",
|
||||
source = "table",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
var data = result.Data;
|
||||
data.Should().NotBeNull();
|
||||
data!.AmountVat.Should().Be(1250.50m);
|
||||
data.VatRate.Should().Be(25);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_VatSummaryWithoutTotalVat_SumsFromBreakdowns()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
vat_summary = new
|
||||
{
|
||||
breakdowns = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
rate = 25.0m,
|
||||
vat_amount = "1000.00",
|
||||
base_amount = "4000.00",
|
||||
source = "table",
|
||||
},
|
||||
new
|
||||
{
|
||||
rate = 12.0m,
|
||||
vat_amount = "240.00",
|
||||
base_amount = "2000.00",
|
||||
source = "table",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
var data = result.Data;
|
||||
data.Should().NotBeNull();
|
||||
data!.AmountVat.Should().Be(1240.00m);
|
||||
data.VatRate.Should().Be(25);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_ConfidenceCalculation_AveragesFieldConfidencesAndRoundsToFourDecimals()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>
|
||||
{
|
||||
["Field1"] = 0.123456789m,
|
||||
["Field2"] = 0.987654321m,
|
||||
["Field3"] = 0.555555555m,
|
||||
},
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Confidence.Should().Be(0.5556m);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_NoConfidenceScores_ReturnsZeroConfidence()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Confidence.Should().Be(0m);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_HttpError500_ReturnsFailureWithErrorMessage()
|
||||
{
|
||||
// Arrange
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.InternalServerError, "Internal server error");
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().Be("OCR API returned InternalServerError");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_NullResponse_ReturnsFailure()
|
||||
{
|
||||
// Arrange
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, "{}");
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().NotBeNullOrEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_NullResult_ReturnsFailureWithMessage()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "error",
|
||||
message = "Processing failed",
|
||||
result = (object?)null,
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().Be("Processing failed");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_ErrorStatus_ReturnsFailureWithMessage()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "error",
|
||||
message = "Document format not supported",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = false,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 10.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().Be("Document format not supported");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_PartialStatus_TreatedAsSuccess()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "partial",
|
||||
message = "Some fields not detected",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["InvoiceNumber"] = "PARTIAL-001",
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>
|
||||
{
|
||||
["InvoiceNumber"] = 0.75m,
|
||||
},
|
||||
processing_time_ms = 200.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeTrue();
|
||||
result.Data.Should().NotBeNull();
|
||||
result.Data!.InvoiceNumber.Should().Be("PARTIAL-001");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_TaskCanceledException_NotUserCancellation_ReturnsTimeoutError()
|
||||
{
|
||||
// Arrange
|
||||
var handler = new TimeoutHttpMessageHandler();
|
||||
var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().Be("OCR API request timed out");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_HttpRequestException_ReturnsConnectionError()
|
||||
{
|
||||
// Arrange
|
||||
var handler = new ConnectionErrorHttpMessageHandler();
|
||||
var httpClient = new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeFalse();
|
||||
result.ErrorMessage.Should().Contain("OCR API connection error");
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("1234.56", 1234.56)]
|
||||
[InlineData("1 234,56", 1234.56)]
|
||||
[InlineData("1,234.56", 1234.56)]
|
||||
[InlineData("1234,56", 1234.56)]
|
||||
[InlineData("12345", 12345)]
|
||||
[InlineData("0.99", 0.99)]
|
||||
public async Task ExtractAsync_DecimalParsing_HandlesVariousFormats(string input, decimal expected)
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["Amount"] = input,
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Data.Should().NotBeNull();
|
||||
result.Data!.AmountTotal.Should().Be(expected);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("2024-01-15", 2024, 1, 15)]
|
||||
[InlineData("15/01/2024", 2024, 1, 15)]
|
||||
[InlineData("15.01.2024", 2024, 1, 15)]
|
||||
[InlineData("2024/01/15", 2024, 1, 15)]
|
||||
public async Task ExtractAsync_DateParsing_HandlesVariousFormats(string input, int year, int month, int day)
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["InvoiceDate"] = input,
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Data.Should().NotBeNull();
|
||||
result.Data!.InvoiceDate.Should().Be(new DateTime(year, month, day));
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("test.pdf")]
|
||||
[InlineData("invoice.PDF")]
|
||||
[InlineData("scan.png")]
|
||||
[InlineData("photo.PNG")]
|
||||
[InlineData("image.jpg")]
|
||||
[InlineData("photo.jpeg")]
|
||||
[InlineData("document.JPG")]
|
||||
[InlineData("unknown.txt")]
|
||||
public async Task ExtractAsync_DifferentFileTypes_ProcessesSuccessfully(string fileName)
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, fileName, CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.Success.Should().BeTrue();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_ErrorsInResult_IncludedInErrorMessage()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
errors = new List<string> { "Low quality image", "Missing page 2" },
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull();
|
||||
result.ErrorMessage.Should().Be("Low quality image; Missing page 2");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_DefaultCurrency_SetToSEK()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Data.Should().NotBeNull();
|
||||
result.Data!.Currency.Should().Be("SEK");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_CustomCurrency_OverridesDefault()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>
|
||||
{
|
||||
["Currency"] = "EUR",
|
||||
},
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var httpClient = CreateHttpClient(HttpStatusCode.OK, apiResponse);
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
// Act
|
||||
var result = await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
result.Data.Should().NotBeNull();
|
||||
result.Data!.Currency.Should().Be("EUR");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractAsync_MultipartFormData_IncludesFileAndExtractLineItemsParameter()
|
||||
{
|
||||
// Arrange
|
||||
var apiResponse = new
|
||||
{
|
||||
status = "success",
|
||||
message = "OK",
|
||||
result = new
|
||||
{
|
||||
document_id = "test",
|
||||
success = true,
|
||||
fields = new Dictionary<string, string?>(),
|
||||
confidence = new Dictionary<string, decimal>(),
|
||||
processing_time_ms = 100.0,
|
||||
},
|
||||
};
|
||||
|
||||
var captureHandler = new CaptureHttpMessageHandler(HttpStatusCode.OK, apiResponse);
|
||||
var httpClient = new HttpClient(captureHandler) { BaseAddress = new Uri("http://localhost") };
|
||||
var service = new OcrService(httpClient, _loggerMock.Object);
|
||||
using var stream = new MemoryStream(Encoding.UTF8.GetBytes("test file content"));
|
||||
|
||||
// Act
|
||||
await service.ExtractAsync(stream, "test.pdf", CancellationToken.None);
|
||||
|
||||
// Assert
|
||||
captureHandler.CapturedRequest.Should().NotBeNull();
|
||||
captureHandler.CapturedRequest!.RequestUri.Should().NotBeNull();
|
||||
captureHandler.CapturedRequest.RequestUri!.ToString().Should().EndWith("infer");
|
||||
captureHandler.CapturedRequest.Content.Should().NotBeNull();
|
||||
captureHandler.CapturedRequest.Content.Should().BeOfType<MultipartFormDataContent>();
|
||||
}
|
||||
|
||||
private static HttpClient CreateHttpClient(HttpStatusCode statusCode, object response)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(response);
|
||||
var handler = new MockHttpMessageHandler(statusCode, json);
|
||||
return new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
||||
}
|
||||
|
||||
private static HttpClient CreateHttpClient(HttpStatusCode statusCode, string responseBody)
|
||||
{
|
||||
var handler = new MockHttpMessageHandler(statusCode, responseBody);
|
||||
return new HttpClient(handler) { BaseAddress = new Uri("http://localhost") };
|
||||
}
|
||||
|
||||
private sealed class MockHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
private readonly HttpStatusCode _statusCode;
|
||||
private readonly string _responseBody;
|
||||
|
||||
public MockHttpMessageHandler(HttpStatusCode statusCode, string responseBody)
|
||||
{
|
||||
_statusCode = statusCode;
|
||||
_responseBody = responseBody;
|
||||
}
|
||||
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
var response = new HttpResponseMessage(_statusCode)
|
||||
{
|
||||
Content = new StringContent(_responseBody, Encoding.UTF8, "application/json"),
|
||||
};
|
||||
return Task.FromResult(response);
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class TimeoutHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
throw new TaskCanceledException("Request timed out");
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class ConnectionErrorHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
throw new HttpRequestException("Unable to connect to server");
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class CaptureHttpMessageHandler : HttpMessageHandler
|
||||
{
|
||||
private readonly HttpStatusCode _statusCode;
|
||||
private readonly string _responseBody;
|
||||
|
||||
public HttpRequestMessage? CapturedRequest { get; private set; }
|
||||
|
||||
public CaptureHttpMessageHandler(HttpStatusCode statusCode, object response)
|
||||
{
|
||||
_statusCode = statusCode;
|
||||
_responseBody = JsonSerializer.Serialize(response);
|
||||
}
|
||||
|
||||
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
CapturedRequest = request;
|
||||
|
||||
var response = new HttpResponseMessage(_statusCode)
|
||||
{
|
||||
Content = new StringContent(_responseBody, Encoding.UTF8, "application/json"),
|
||||
};
|
||||
return Task.FromResult(response);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user