Use XML geometry for critical PDF import

This commit is contained in:
2026-03-14 01:25:43 +01:00
parent f70d610c92
commit 719355da90
10 changed files with 335 additions and 201 deletions

View File

@@ -5,7 +5,8 @@ namespace RolemasterDb.ImportTool;
public sealed class CriticalImportCommandRunner
{
private readonly CriticalImportManifestLoader manifestLoader = new();
private readonly PdfTextExtractor pdfTextExtractor = new();
private readonly ImportArtifactWriter artifactWriter = new();
private readonly PdfXmlExtractor pdfXmlExtractor = new();
private readonly StandardCriticalTableParser standardParser = new();
public async Task<int> RunAsync(ResetOptions options)
@@ -26,8 +27,8 @@ public sealed class CriticalImportCommandRunner
{
var entry = GetManifestEntry(options.Table);
var artifactPaths = CreateArtifactPaths(entry.Slug);
await pdfTextExtractor.ExtractAsync(ResolveRepositoryPath(entry.PdfPath), artifactPaths.ExtractedTextPath);
Console.WriteLine($"Extracted {entry.Slug} to {artifactPaths.ExtractedTextPath}");
await pdfXmlExtractor.ExtractAsync(ResolveRepositoryPath(entry.PdfPath), artifactPaths.XmlPath);
Console.WriteLine($"Extracted {entry.Slug} to {artifactPaths.XmlPath}");
return 0;
}
@@ -36,16 +37,24 @@ public sealed class CriticalImportCommandRunner
var entry = GetManifestEntry(options.Table);
var artifactPaths = CreateArtifactPaths(entry.Slug);
if (!File.Exists(artifactPaths.ExtractedTextPath))
if (!File.Exists(artifactPaths.XmlPath))
{
Console.Error.WriteLine($"Missing extracted text artifact: {artifactPaths.ExtractedTextPath}");
Console.Error.WriteLine($"Missing XML artifact: {artifactPaths.XmlPath}");
return 1;
}
var extractedText = await File.ReadAllTextAsync(artifactPaths.ExtractedTextPath);
var parsedTable = Parse(entry, extractedText);
var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath);
var parseResult = Parse(entry, xmlContent);
await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None);
if (!parseResult.ValidationReport.IsValid)
{
throw new InvalidOperationException(
$"Validation failed for '{entry.Slug}'. See {artifactPaths.ValidationReportPath} for details.");
}
var loader = new CriticalImportLoader(ResolveDatabasePath(options.DatabasePath));
var result = await loader.LoadAsync(parsedTable);
var result = await loader.LoadAsync(parseResult.Table);
Console.WriteLine(
$"Loaded {result.TableSlug}: {result.ColumnCount} columns, {result.RollBandCount} roll bands, {result.ResultCount} results.");
@@ -82,14 +91,14 @@ public sealed class CriticalImportCommandRunner
?? throw new InvalidOperationException($"No enabled manifest entry was found for '{tableSlug}'.");
}
private ParsedCriticalTable Parse(CriticalImportManifestEntry entry, string extractedText)
private StandardCriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
{
if (!string.Equals(entry.Family, "standard", StringComparison.OrdinalIgnoreCase))
{
throw new InvalidOperationException($"Family '{entry.Family}' is not supported by phase 1.");
throw new InvalidOperationException($"Family '{entry.Family}' is not supported by phase 2.");
}
return standardParser.Parse(entry, extractedText);
return standardParser.Parse(entry, xmlContent);
}
private static ImportArtifactPaths CreateArtifactPaths(string slug) =>