Generate critical source image artifacts

This commit is contained in:
2026-03-17 22:28:17 +01:00
parent 4979cf87f7
commit 2936d7146f
6 changed files with 363 additions and 2 deletions

View File

@@ -0,0 +1,82 @@
using RolemasterDb.ImportTool.Parsing;
namespace RolemasterDb.ImportTool.Tests;
public sealed class CriticalImportArtifactGenerationIntegrationTests
{
private static readonly PdfXmlExtractor Extractor = new();
private static readonly StandardCriticalTableParser StandardParser = new();
[Fact]
public async Task Generated_artifacts_include_page_and_cell_source_images()
{
var (parseResult, artifactPaths) = await LoadPreparedSlashParseResultAsync();
var result = FindResult(parseResult, "71-75", "A");
var cellArtifact = parseResult.Cells.Single(item =>
item.GroupKey is null &&
item.RollBandLabel == "71-75" &&
item.ColumnKey == "A");
Assert.True(result.SourceBounds.PageNumber > 0);
Assert.True(result.SourceBounds.Width > 0);
Assert.True(result.SourceBounds.Height > 0);
Assert.NotNull(result.SourceImagePath);
Assert.NotNull(result.SourceImageCrop);
Assert.Equal(result.SourceImagePath, cellArtifact.SourceImagePath);
Assert.NotNull(cellArtifact.SourceImageCrop);
Assert.True(File.Exists(artifactPaths.GetPageImagePath(result.SourceBounds.PageNumber)));
Assert.True(File.Exists(artifactPaths.ResolveRelativePath(result.SourceImagePath!)));
}
private static async Task<(CriticalTableParseResult ParseResult, ImportArtifactPaths ArtifactPaths)> LoadPreparedSlashParseResultAsync()
{
var entry = LoadManifest().Tables.Single(item => item.Slug == "slash");
var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml");
if (!File.Exists(xmlPath))
{
await Extractor.ExtractAsync(Path.Combine(GetRepositoryRoot(), entry.PdfPath), xmlPath);
}
var parseResult = StandardParser.Parse(entry, await File.ReadAllTextAsync(xmlPath));
var artifactRoot = Path.Combine(GetArtifactCacheRoot(), Guid.NewGuid().ToString("N"));
var artifactPaths = ImportArtifactPaths.Create(artifactRoot, entry.Slug);
var generator = new CriticalSourceImageArtifactGenerator(new PdfXmlExtractor());
await generator.GenerateAsync(Path.Combine(GetRepositoryRoot(), entry.PdfPath), artifactPaths, parseResult);
return (parseResult, artifactPaths);
}
private static ParsedCriticalResult FindResult(CriticalTableParseResult parseResult, string rollBandLabel, string columnKey) =>
parseResult.Table.Results.Single(item =>
item.GroupKey is null &&
item.RollBandLabel == rollBandLabel &&
item.ColumnKey == columnKey);
private static CriticalImportManifest LoadManifest() =>
new CriticalImportManifestLoader().Load(Path.Combine(GetRepositoryRoot(), "sources", "critical-import-manifest.json"));
private static string GetArtifactCacheRoot()
{
var cacheRoot = Path.Combine(Path.GetTempPath(), "RolemasterDb.ImportTool.MergeTests");
Directory.CreateDirectory(cacheRoot);
return cacheRoot;
}
private static string GetRepositoryRoot()
{
var probe = new DirectoryInfo(AppContext.BaseDirectory);
while (probe is not null)
{
if (File.Exists(Path.Combine(probe.FullName, "RolemasterDB.slnx")))
{
return probe.FullName;
}
probe = probe.Parent;
}
throw new InvalidOperationException("Could not find the repository root for integration tests.");
}
}

View File

@@ -7,10 +7,16 @@ public sealed class CriticalImportCommandRunner
private readonly CriticalImportManifestLoader manifestLoader = new(); private readonly CriticalImportManifestLoader manifestLoader = new();
private readonly ImportArtifactWriter artifactWriter = new(); private readonly ImportArtifactWriter artifactWriter = new();
private readonly PdfXmlExtractor pdfXmlExtractor = new(); private readonly PdfXmlExtractor pdfXmlExtractor = new();
private readonly CriticalSourceImageArtifactGenerator sourceImageArtifactGenerator;
private readonly StandardCriticalTableParser standardParser = new(); private readonly StandardCriticalTableParser standardParser = new();
private readonly VariantColumnCriticalTableParser variantColumnParser = new(); private readonly VariantColumnCriticalTableParser variantColumnParser = new();
private readonly GroupedVariantCriticalTableParser groupedVariantParser = new(); private readonly GroupedVariantCriticalTableParser groupedVariantParser = new();
public CriticalImportCommandRunner()
{
sourceImageArtifactGenerator = new CriticalSourceImageArtifactGenerator(pdfXmlExtractor);
}
public async Task<int> RunAsync(ResetOptions options) public async Task<int> RunAsync(ResetOptions options)
{ {
if (!string.Equals(options.Target, "criticals", StringComparison.OrdinalIgnoreCase)) if (!string.Equals(options.Target, "criticals", StringComparison.OrdinalIgnoreCase))
@@ -47,6 +53,11 @@ public sealed class CriticalImportCommandRunner
var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath); var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath);
var parseResult = Parse(entry, xmlContent); var parseResult = Parse(entry, xmlContent);
await sourceImageArtifactGenerator.GenerateAsync(
ResolveRepositoryPath(entry.PdfPath),
artifactPaths,
parseResult,
CancellationToken.None);
await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None); await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None);
if (!parseResult.ValidationReport.IsValid) if (!parseResult.ValidationReport.IsValid)

View File

@@ -0,0 +1,93 @@
using RolemasterDb.ImportTool.Parsing;
namespace RolemasterDb.ImportTool;
public sealed class CriticalSourceImageArtifactGenerator(PdfXmlExtractor pdfXmlExtractor)
{
private const int CropPaddingX = 12;
private const int CropPaddingY = 8;
public async Task GenerateAsync(
string pdfPath,
ImportArtifactPaths artifactPaths,
CriticalTableParseResult parseResult,
CancellationToken cancellationToken = default)
{
Directory.CreateDirectory(artifactPaths.PagesDirectoryPath);
Directory.CreateDirectory(artifactPaths.CellsDirectoryPath);
var pageGeometriesByNumber = parseResult.PageGeometries.ToDictionary(item => item.PageNumber);
foreach (var pageGeometry in parseResult.PageGeometries.OrderBy(item => item.PageNumber))
{
await pdfXmlExtractor.RenderPagePngAsync(
pdfPath,
pageGeometry.PageNumber,
artifactPaths.GetPageImagePath(pageGeometry.PageNumber),
cancellationToken);
}
var cellsByKey = parseResult.Cells.ToDictionary(
item => CreateCellKey(item.GroupKey, item.RollBandLabel, item.ColumnKey),
StringComparer.Ordinal);
foreach (var result in parseResult.Table.Results)
{
if (!pageGeometriesByNumber.TryGetValue(result.SourceBounds.PageNumber, out var pageGeometry))
{
throw new InvalidOperationException(
$"Missing page geometry for page {result.SourceBounds.PageNumber} in table '{parseResult.Table.Slug}'.");
}
var crop = CreateCrop(result.SourceBounds, pageGeometry);
var relativePath = artifactPaths.GetRelativeCellImagePath(result.GroupKey, result.ColumnKey, result.RollBandLabel);
var fullPath = artifactPaths.ResolveRelativePath(relativePath);
await pdfXmlExtractor.RenderCropPngAsync(
pdfPath,
crop.PageNumber,
crop.CropLeft,
crop.CropTop,
crop.CropWidth,
crop.CropHeight,
fullPath,
cancellationToken);
result.SourceImagePath = relativePath;
result.SourceImageCrop = crop;
var cellKey = CreateCellKey(result.GroupKey, result.RollBandLabel, result.ColumnKey);
if (cellsByKey.TryGetValue(cellKey, out var cellArtifact))
{
cellArtifact.SourceImagePath = relativePath;
cellArtifact.SourceImageCrop = crop;
}
}
}
private static CriticalSourceImageCrop CreateCrop(
ParsedCriticalSourceRect sourceBounds,
ParsedPdfPageGeometry pageGeometry)
{
var cropLeft = Math.Max(0, sourceBounds.Left - CropPaddingX);
var cropTop = Math.Max(0, sourceBounds.Top - CropPaddingY);
var cropRight = Math.Min(pageGeometry.Width, sourceBounds.Left + sourceBounds.Width + CropPaddingX);
var cropBottom = Math.Min(pageGeometry.Height, sourceBounds.Top + sourceBounds.Height + CropPaddingY);
return new CriticalSourceImageCrop(
sourceBounds.PageNumber,
pageGeometry.Width,
pageGeometry.Height,
sourceBounds.Left,
sourceBounds.Top,
sourceBounds.Width,
sourceBounds.Height,
cropLeft,
cropTop,
Math.Max(1, cropRight - cropLeft),
Math.Max(1, cropBottom - cropTop),
PdfXmlExtractor.XmlAlignedRenderDpi);
}
private static string CreateCellKey(string? groupKey, string rollBandLabel, string columnKey) =>
$"{groupKey ?? string.Empty}|{rollBandLabel}|{columnKey}";
}

View File

@@ -1,35 +1,100 @@
using System.Text;
namespace RolemasterDb.ImportTool; namespace RolemasterDb.ImportTool;
public sealed class ImportArtifactPaths public sealed class ImportArtifactPaths
{ {
private ImportArtifactPaths( private ImportArtifactPaths(
string artifactsRootPath,
string tableSlug,
string directoryPath, string directoryPath,
string xmlPath, string xmlPath,
string fragmentsJsonPath, string fragmentsJsonPath,
string parsedCellsJsonPath, string parsedCellsJsonPath,
string validationReportPath) string validationReportPath,
string pagesDirectoryPath,
string cellsDirectoryPath)
{ {
ArtifactsRootPath = artifactsRootPath;
TableSlug = tableSlug;
DirectoryPath = directoryPath; DirectoryPath = directoryPath;
XmlPath = xmlPath; XmlPath = xmlPath;
FragmentsJsonPath = fragmentsJsonPath; FragmentsJsonPath = fragmentsJsonPath;
ParsedCellsJsonPath = parsedCellsJsonPath; ParsedCellsJsonPath = parsedCellsJsonPath;
ValidationReportPath = validationReportPath; ValidationReportPath = validationReportPath;
PagesDirectoryPath = pagesDirectoryPath;
CellsDirectoryPath = cellsDirectoryPath;
} }
public string ArtifactsRootPath { get; }
public string TableSlug { get; }
public string DirectoryPath { get; } public string DirectoryPath { get; }
public string XmlPath { get; } public string XmlPath { get; }
public string FragmentsJsonPath { get; } public string FragmentsJsonPath { get; }
public string ParsedCellsJsonPath { get; } public string ParsedCellsJsonPath { get; }
public string ValidationReportPath { get; } public string ValidationReportPath { get; }
public string PagesDirectoryPath { get; }
public string CellsDirectoryPath { get; }
public static ImportArtifactPaths Create(string artifactsRootPath, string tableSlug) public static ImportArtifactPaths Create(string artifactsRootPath, string tableSlug)
{ {
var directoryPath = Path.Combine(artifactsRootPath, tableSlug); var directoryPath = Path.Combine(artifactsRootPath, tableSlug);
var pagesDirectoryPath = Path.Combine(directoryPath, "pages");
var cellsDirectoryPath = Path.Combine(directoryPath, "cells");
return new ImportArtifactPaths( return new ImportArtifactPaths(
artifactsRootPath,
tableSlug,
directoryPath, directoryPath,
Path.Combine(directoryPath, "source.xml"), Path.Combine(directoryPath, "source.xml"),
Path.Combine(directoryPath, "fragments.json"), Path.Combine(directoryPath, "fragments.json"),
Path.Combine(directoryPath, "parsed-cells.json"), Path.Combine(directoryPath, "parsed-cells.json"),
Path.Combine(directoryPath, "validation-report.json")); Path.Combine(directoryPath, "validation-report.json"),
pagesDirectoryPath,
cellsDirectoryPath);
}
public string GetPageImagePath(int pageNumber) =>
Path.Combine(PagesDirectoryPath, $"page-{pageNumber:000}.png");
public string GetRelativeCellImagePath(string? groupKey, string columnKey, string rollBandLabel) =>
Path.Combine(
TableSlug,
"cells",
$"{NormalizeFileSegment(groupKey ?? "none")}__{NormalizeFileSegment(columnKey)}__{NormalizeFileSegment(rollBandLabel)}.png")
.Replace('\\', '/');
public string ResolveRelativePath(string relativePath) =>
Path.GetFullPath(Path.Combine(ArtifactsRootPath, relativePath.Replace('/', Path.DirectorySeparatorChar)));
private static string NormalizeFileSegment(string value)
{
var builder = new StringBuilder();
foreach (var character in value.Trim().ToLowerInvariant())
{
if (char.IsLetterOrDigit(character))
{
builder.Append(character);
continue;
}
if (character is '-' or '_')
{
builder.Append(character);
continue;
}
if (character == '+')
{
builder.Append("plus");
continue;
}
builder.Append('_');
}
var normalized = builder.ToString().Trim('_');
return string.IsNullOrWhiteSpace(normalized) ? "empty" : normalized;
} }
} }

View File

@@ -0,0 +1,29 @@
namespace RolemasterDb.ImportTool.Parsing;
public sealed class CriticalSourceImageCrop(
int pageNumber,
int pageWidth,
int pageHeight,
int boundsLeft,
int boundsTop,
int boundsWidth,
int boundsHeight,
int cropLeft,
int cropTop,
int cropWidth,
int cropHeight,
int renderDpi)
{
public int PageNumber { get; } = pageNumber;
public int PageWidth { get; } = pageWidth;
public int PageHeight { get; } = pageHeight;
public int BoundsLeft { get; } = boundsLeft;
public int BoundsTop { get; } = boundsTop;
public int BoundsWidth { get; } = boundsWidth;
public int BoundsHeight { get; } = boundsHeight;
public int CropLeft { get; } = cropLeft;
public int CropTop { get; } = cropTop;
public int CropWidth { get; } = cropWidth;
public int CropHeight { get; } = cropHeight;
public int RenderDpi { get; } = renderDpi;
}

View File

@@ -4,6 +4,8 @@ namespace RolemasterDb.ImportTool;
public sealed class PdfXmlExtractor public sealed class PdfXmlExtractor
{ {
public const int XmlAlignedRenderDpi = 108;
public async Task ExtractAsync(string pdfPath, string outputPath, CancellationToken cancellationToken = default) public async Task ExtractAsync(string pdfPath, string outputPath, CancellationToken cancellationToken = default)
{ {
Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!); Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!);
@@ -33,4 +35,83 @@ public sealed class PdfXmlExtractor
throw new InvalidOperationException($"pdftohtml failed for '{pdfPath}': {error}"); throw new InvalidOperationException($"pdftohtml failed for '{pdfPath}': {error}");
} }
} }
public Task RenderPagePngAsync(
string pdfPath,
int pageNumber,
string outputPath,
CancellationToken cancellationToken = default) =>
RenderPngAsync(pdfPath, pageNumber, outputPath, null, null, null, null, cancellationToken);
public Task RenderCropPngAsync(
string pdfPath,
int pageNumber,
int left,
int top,
int width,
int height,
string outputPath,
CancellationToken cancellationToken = default) =>
RenderPngAsync(pdfPath, pageNumber, outputPath, left, top, width, height, cancellationToken);
private static async Task RenderPngAsync(
string pdfPath,
int pageNumber,
string outputPath,
int? left,
int? top,
int? width,
int? height,
CancellationToken cancellationToken)
{
Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!);
var startInfo = new ProcessStartInfo
{
FileName = "pdftoppm",
RedirectStandardError = true,
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true
};
startInfo.ArgumentList.Add("-png");
startInfo.ArgumentList.Add("-r");
startInfo.ArgumentList.Add(XmlAlignedRenderDpi.ToString());
startInfo.ArgumentList.Add("-f");
startInfo.ArgumentList.Add(pageNumber.ToString());
startInfo.ArgumentList.Add("-l");
startInfo.ArgumentList.Add(pageNumber.ToString());
startInfo.ArgumentList.Add("-singlefile");
if (left.HasValue && top.HasValue && width.HasValue && height.HasValue)
{
startInfo.ArgumentList.Add("-x");
startInfo.ArgumentList.Add(left.Value.ToString());
startInfo.ArgumentList.Add("-y");
startInfo.ArgumentList.Add(top.Value.ToString());
startInfo.ArgumentList.Add("-W");
startInfo.ArgumentList.Add(width.Value.ToString());
startInfo.ArgumentList.Add("-H");
startInfo.ArgumentList.Add(height.Value.ToString());
}
startInfo.ArgumentList.Add(pdfPath);
startInfo.ArgumentList.Add(Path.Combine(Path.GetDirectoryName(outputPath)!, Path.GetFileNameWithoutExtension(outputPath)));
using var process = new Process { StartInfo = startInfo };
process.Start();
await process.WaitForExitAsync(cancellationToken);
if (process.ExitCode != 0)
{
var error = await process.StandardError.ReadToEndAsync(cancellationToken);
throw new InvalidOperationException($"pdftoppm failed for '{pdfPath}': {error}");
}
if (!File.Exists(outputPath))
{
throw new InvalidOperationException($"pdftoppm completed but did not create '{outputPath}'.");
}
}
} }