From 2936d7146fd7c038a6333d8bae183c0d4418d630 Mon Sep 17 00:00:00 2001 From: Frank Tovar Date: Tue, 17 Mar 2026 22:28:17 +0100 Subject: [PATCH] Generate critical source image artifacts --- ...mportArtifactGenerationIntegrationTests.cs | 82 ++++++++++++++++ .../CriticalImportCommandRunner.cs | 11 +++ .../CriticalSourceImageArtifactGenerator.cs | 93 +++++++++++++++++++ .../ImportArtifactPaths.cs | 69 +++++++++++++- .../Parsing/CriticalSourceImageCrop.cs | 29 ++++++ .../PdfXmlExtractor.cs | 81 ++++++++++++++++ 6 files changed, 363 insertions(+), 2 deletions(-) create mode 100644 src/RolemasterDb.ImportTool.Tests/CriticalImportArtifactGenerationIntegrationTests.cs create mode 100644 src/RolemasterDb.ImportTool/CriticalSourceImageArtifactGenerator.cs create mode 100644 src/RolemasterDb.ImportTool/Parsing/CriticalSourceImageCrop.cs diff --git a/src/RolemasterDb.ImportTool.Tests/CriticalImportArtifactGenerationIntegrationTests.cs b/src/RolemasterDb.ImportTool.Tests/CriticalImportArtifactGenerationIntegrationTests.cs new file mode 100644 index 0000000..3fb09e1 --- /dev/null +++ b/src/RolemasterDb.ImportTool.Tests/CriticalImportArtifactGenerationIntegrationTests.cs @@ -0,0 +1,82 @@ +using RolemasterDb.ImportTool.Parsing; + +namespace RolemasterDb.ImportTool.Tests; + +public sealed class CriticalImportArtifactGenerationIntegrationTests +{ + private static readonly PdfXmlExtractor Extractor = new(); + private static readonly StandardCriticalTableParser StandardParser = new(); + + [Fact] + public async Task Generated_artifacts_include_page_and_cell_source_images() + { + var (parseResult, artifactPaths) = await LoadPreparedSlashParseResultAsync(); + var result = FindResult(parseResult, "71-75", "A"); + var cellArtifact = parseResult.Cells.Single(item => + item.GroupKey is null && + item.RollBandLabel == "71-75" && + item.ColumnKey == "A"); + + Assert.True(result.SourceBounds.PageNumber > 0); + Assert.True(result.SourceBounds.Width > 0); + Assert.True(result.SourceBounds.Height > 0); + Assert.NotNull(result.SourceImagePath); + Assert.NotNull(result.SourceImageCrop); + Assert.Equal(result.SourceImagePath, cellArtifact.SourceImagePath); + Assert.NotNull(cellArtifact.SourceImageCrop); + Assert.True(File.Exists(artifactPaths.GetPageImagePath(result.SourceBounds.PageNumber))); + Assert.True(File.Exists(artifactPaths.ResolveRelativePath(result.SourceImagePath!))); + } + + private static async Task<(CriticalTableParseResult ParseResult, ImportArtifactPaths ArtifactPaths)> LoadPreparedSlashParseResultAsync() + { + var entry = LoadManifest().Tables.Single(item => item.Slug == "slash"); + var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml"); + + if (!File.Exists(xmlPath)) + { + await Extractor.ExtractAsync(Path.Combine(GetRepositoryRoot(), entry.PdfPath), xmlPath); + } + + var parseResult = StandardParser.Parse(entry, await File.ReadAllTextAsync(xmlPath)); + var artifactRoot = Path.Combine(GetArtifactCacheRoot(), Guid.NewGuid().ToString("N")); + var artifactPaths = ImportArtifactPaths.Create(artifactRoot, entry.Slug); + var generator = new CriticalSourceImageArtifactGenerator(new PdfXmlExtractor()); + + await generator.GenerateAsync(Path.Combine(GetRepositoryRoot(), entry.PdfPath), artifactPaths, parseResult); + return (parseResult, artifactPaths); + } + + private static ParsedCriticalResult FindResult(CriticalTableParseResult parseResult, string rollBandLabel, string columnKey) => + parseResult.Table.Results.Single(item => + item.GroupKey is null && + item.RollBandLabel == rollBandLabel && + item.ColumnKey == columnKey); + + private static CriticalImportManifest LoadManifest() => + new CriticalImportManifestLoader().Load(Path.Combine(GetRepositoryRoot(), "sources", "critical-import-manifest.json")); + + private static string GetArtifactCacheRoot() + { + var cacheRoot = Path.Combine(Path.GetTempPath(), "RolemasterDb.ImportTool.MergeTests"); + Directory.CreateDirectory(cacheRoot); + return cacheRoot; + } + + private static string GetRepositoryRoot() + { + var probe = new DirectoryInfo(AppContext.BaseDirectory); + + while (probe is not null) + { + if (File.Exists(Path.Combine(probe.FullName, "RolemasterDB.slnx"))) + { + return probe.FullName; + } + + probe = probe.Parent; + } + + throw new InvalidOperationException("Could not find the repository root for integration tests."); + } +} diff --git a/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs b/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs index 8810a17..5bdae92 100644 --- a/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs +++ b/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs @@ -7,10 +7,16 @@ public sealed class CriticalImportCommandRunner private readonly CriticalImportManifestLoader manifestLoader = new(); private readonly ImportArtifactWriter artifactWriter = new(); private readonly PdfXmlExtractor pdfXmlExtractor = new(); + private readonly CriticalSourceImageArtifactGenerator sourceImageArtifactGenerator; private readonly StandardCriticalTableParser standardParser = new(); private readonly VariantColumnCriticalTableParser variantColumnParser = new(); private readonly GroupedVariantCriticalTableParser groupedVariantParser = new(); + public CriticalImportCommandRunner() + { + sourceImageArtifactGenerator = new CriticalSourceImageArtifactGenerator(pdfXmlExtractor); + } + public async Task RunAsync(ResetOptions options) { if (!string.Equals(options.Target, "criticals", StringComparison.OrdinalIgnoreCase)) @@ -47,6 +53,11 @@ public sealed class CriticalImportCommandRunner var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath); var parseResult = Parse(entry, xmlContent); + await sourceImageArtifactGenerator.GenerateAsync( + ResolveRepositoryPath(entry.PdfPath), + artifactPaths, + parseResult, + CancellationToken.None); await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None); if (!parseResult.ValidationReport.IsValid) diff --git a/src/RolemasterDb.ImportTool/CriticalSourceImageArtifactGenerator.cs b/src/RolemasterDb.ImportTool/CriticalSourceImageArtifactGenerator.cs new file mode 100644 index 0000000..6320258 --- /dev/null +++ b/src/RolemasterDb.ImportTool/CriticalSourceImageArtifactGenerator.cs @@ -0,0 +1,93 @@ +using RolemasterDb.ImportTool.Parsing; + +namespace RolemasterDb.ImportTool; + +public sealed class CriticalSourceImageArtifactGenerator(PdfXmlExtractor pdfXmlExtractor) +{ + private const int CropPaddingX = 12; + private const int CropPaddingY = 8; + + public async Task GenerateAsync( + string pdfPath, + ImportArtifactPaths artifactPaths, + CriticalTableParseResult parseResult, + CancellationToken cancellationToken = default) + { + Directory.CreateDirectory(artifactPaths.PagesDirectoryPath); + Directory.CreateDirectory(artifactPaths.CellsDirectoryPath); + + var pageGeometriesByNumber = parseResult.PageGeometries.ToDictionary(item => item.PageNumber); + foreach (var pageGeometry in parseResult.PageGeometries.OrderBy(item => item.PageNumber)) + { + await pdfXmlExtractor.RenderPagePngAsync( + pdfPath, + pageGeometry.PageNumber, + artifactPaths.GetPageImagePath(pageGeometry.PageNumber), + cancellationToken); + } + + var cellsByKey = parseResult.Cells.ToDictionary( + item => CreateCellKey(item.GroupKey, item.RollBandLabel, item.ColumnKey), + StringComparer.Ordinal); + + foreach (var result in parseResult.Table.Results) + { + if (!pageGeometriesByNumber.TryGetValue(result.SourceBounds.PageNumber, out var pageGeometry)) + { + throw new InvalidOperationException( + $"Missing page geometry for page {result.SourceBounds.PageNumber} in table '{parseResult.Table.Slug}'."); + } + + var crop = CreateCrop(result.SourceBounds, pageGeometry); + var relativePath = artifactPaths.GetRelativeCellImagePath(result.GroupKey, result.ColumnKey, result.RollBandLabel); + var fullPath = artifactPaths.ResolveRelativePath(relativePath); + + await pdfXmlExtractor.RenderCropPngAsync( + pdfPath, + crop.PageNumber, + crop.CropLeft, + crop.CropTop, + crop.CropWidth, + crop.CropHeight, + fullPath, + cancellationToken); + + result.SourceImagePath = relativePath; + result.SourceImageCrop = crop; + + var cellKey = CreateCellKey(result.GroupKey, result.RollBandLabel, result.ColumnKey); + if (cellsByKey.TryGetValue(cellKey, out var cellArtifact)) + { + cellArtifact.SourceImagePath = relativePath; + cellArtifact.SourceImageCrop = crop; + } + } + } + + private static CriticalSourceImageCrop CreateCrop( + ParsedCriticalSourceRect sourceBounds, + ParsedPdfPageGeometry pageGeometry) + { + var cropLeft = Math.Max(0, sourceBounds.Left - CropPaddingX); + var cropTop = Math.Max(0, sourceBounds.Top - CropPaddingY); + var cropRight = Math.Min(pageGeometry.Width, sourceBounds.Left + sourceBounds.Width + CropPaddingX); + var cropBottom = Math.Min(pageGeometry.Height, sourceBounds.Top + sourceBounds.Height + CropPaddingY); + + return new CriticalSourceImageCrop( + sourceBounds.PageNumber, + pageGeometry.Width, + pageGeometry.Height, + sourceBounds.Left, + sourceBounds.Top, + sourceBounds.Width, + sourceBounds.Height, + cropLeft, + cropTop, + Math.Max(1, cropRight - cropLeft), + Math.Max(1, cropBottom - cropTop), + PdfXmlExtractor.XmlAlignedRenderDpi); + } + + private static string CreateCellKey(string? groupKey, string rollBandLabel, string columnKey) => + $"{groupKey ?? string.Empty}|{rollBandLabel}|{columnKey}"; +} diff --git a/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs b/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs index 8b45346..cf6ee6b 100644 --- a/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs +++ b/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs @@ -1,35 +1,100 @@ +using System.Text; + namespace RolemasterDb.ImportTool; public sealed class ImportArtifactPaths { private ImportArtifactPaths( + string artifactsRootPath, + string tableSlug, string directoryPath, string xmlPath, string fragmentsJsonPath, string parsedCellsJsonPath, - string validationReportPath) + string validationReportPath, + string pagesDirectoryPath, + string cellsDirectoryPath) { + ArtifactsRootPath = artifactsRootPath; + TableSlug = tableSlug; DirectoryPath = directoryPath; XmlPath = xmlPath; FragmentsJsonPath = fragmentsJsonPath; ParsedCellsJsonPath = parsedCellsJsonPath; ValidationReportPath = validationReportPath; + PagesDirectoryPath = pagesDirectoryPath; + CellsDirectoryPath = cellsDirectoryPath; } + public string ArtifactsRootPath { get; } + public string TableSlug { get; } public string DirectoryPath { get; } public string XmlPath { get; } public string FragmentsJsonPath { get; } public string ParsedCellsJsonPath { get; } public string ValidationReportPath { get; } + public string PagesDirectoryPath { get; } + public string CellsDirectoryPath { get; } public static ImportArtifactPaths Create(string artifactsRootPath, string tableSlug) { var directoryPath = Path.Combine(artifactsRootPath, tableSlug); + var pagesDirectoryPath = Path.Combine(directoryPath, "pages"); + var cellsDirectoryPath = Path.Combine(directoryPath, "cells"); + return new ImportArtifactPaths( + artifactsRootPath, + tableSlug, directoryPath, Path.Combine(directoryPath, "source.xml"), Path.Combine(directoryPath, "fragments.json"), Path.Combine(directoryPath, "parsed-cells.json"), - Path.Combine(directoryPath, "validation-report.json")); + Path.Combine(directoryPath, "validation-report.json"), + pagesDirectoryPath, + cellsDirectoryPath); + } + + public string GetPageImagePath(int pageNumber) => + Path.Combine(PagesDirectoryPath, $"page-{pageNumber:000}.png"); + + public string GetRelativeCellImagePath(string? groupKey, string columnKey, string rollBandLabel) => + Path.Combine( + TableSlug, + "cells", + $"{NormalizeFileSegment(groupKey ?? "none")}__{NormalizeFileSegment(columnKey)}__{NormalizeFileSegment(rollBandLabel)}.png") + .Replace('\\', '/'); + + public string ResolveRelativePath(string relativePath) => + Path.GetFullPath(Path.Combine(ArtifactsRootPath, relativePath.Replace('/', Path.DirectorySeparatorChar))); + + private static string NormalizeFileSegment(string value) + { + var builder = new StringBuilder(); + + foreach (var character in value.Trim().ToLowerInvariant()) + { + if (char.IsLetterOrDigit(character)) + { + builder.Append(character); + continue; + } + + if (character is '-' or '_') + { + builder.Append(character); + continue; + } + + if (character == '+') + { + builder.Append("plus"); + continue; + } + + builder.Append('_'); + } + + var normalized = builder.ToString().Trim('_'); + return string.IsNullOrWhiteSpace(normalized) ? "empty" : normalized; } } diff --git a/src/RolemasterDb.ImportTool/Parsing/CriticalSourceImageCrop.cs b/src/RolemasterDb.ImportTool/Parsing/CriticalSourceImageCrop.cs new file mode 100644 index 0000000..169dec9 --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/CriticalSourceImageCrop.cs @@ -0,0 +1,29 @@ +namespace RolemasterDb.ImportTool.Parsing; + +public sealed class CriticalSourceImageCrop( + int pageNumber, + int pageWidth, + int pageHeight, + int boundsLeft, + int boundsTop, + int boundsWidth, + int boundsHeight, + int cropLeft, + int cropTop, + int cropWidth, + int cropHeight, + int renderDpi) +{ + public int PageNumber { get; } = pageNumber; + public int PageWidth { get; } = pageWidth; + public int PageHeight { get; } = pageHeight; + public int BoundsLeft { get; } = boundsLeft; + public int BoundsTop { get; } = boundsTop; + public int BoundsWidth { get; } = boundsWidth; + public int BoundsHeight { get; } = boundsHeight; + public int CropLeft { get; } = cropLeft; + public int CropTop { get; } = cropTop; + public int CropWidth { get; } = cropWidth; + public int CropHeight { get; } = cropHeight; + public int RenderDpi { get; } = renderDpi; +} diff --git a/src/RolemasterDb.ImportTool/PdfXmlExtractor.cs b/src/RolemasterDb.ImportTool/PdfXmlExtractor.cs index 0bd0198..b6d23d8 100644 --- a/src/RolemasterDb.ImportTool/PdfXmlExtractor.cs +++ b/src/RolemasterDb.ImportTool/PdfXmlExtractor.cs @@ -4,6 +4,8 @@ namespace RolemasterDb.ImportTool; public sealed class PdfXmlExtractor { + public const int XmlAlignedRenderDpi = 108; + public async Task ExtractAsync(string pdfPath, string outputPath, CancellationToken cancellationToken = default) { Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!); @@ -33,4 +35,83 @@ public sealed class PdfXmlExtractor throw new InvalidOperationException($"pdftohtml failed for '{pdfPath}': {error}"); } } + + public Task RenderPagePngAsync( + string pdfPath, + int pageNumber, + string outputPath, + CancellationToken cancellationToken = default) => + RenderPngAsync(pdfPath, pageNumber, outputPath, null, null, null, null, cancellationToken); + + public Task RenderCropPngAsync( + string pdfPath, + int pageNumber, + int left, + int top, + int width, + int height, + string outputPath, + CancellationToken cancellationToken = default) => + RenderPngAsync(pdfPath, pageNumber, outputPath, left, top, width, height, cancellationToken); + + private static async Task RenderPngAsync( + string pdfPath, + int pageNumber, + string outputPath, + int? left, + int? top, + int? width, + int? height, + CancellationToken cancellationToken) + { + Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!); + + var startInfo = new ProcessStartInfo + { + FileName = "pdftoppm", + RedirectStandardError = true, + RedirectStandardOutput = true, + UseShellExecute = false, + CreateNoWindow = true + }; + + startInfo.ArgumentList.Add("-png"); + startInfo.ArgumentList.Add("-r"); + startInfo.ArgumentList.Add(XmlAlignedRenderDpi.ToString()); + startInfo.ArgumentList.Add("-f"); + startInfo.ArgumentList.Add(pageNumber.ToString()); + startInfo.ArgumentList.Add("-l"); + startInfo.ArgumentList.Add(pageNumber.ToString()); + startInfo.ArgumentList.Add("-singlefile"); + + if (left.HasValue && top.HasValue && width.HasValue && height.HasValue) + { + startInfo.ArgumentList.Add("-x"); + startInfo.ArgumentList.Add(left.Value.ToString()); + startInfo.ArgumentList.Add("-y"); + startInfo.ArgumentList.Add(top.Value.ToString()); + startInfo.ArgumentList.Add("-W"); + startInfo.ArgumentList.Add(width.Value.ToString()); + startInfo.ArgumentList.Add("-H"); + startInfo.ArgumentList.Add(height.Value.ToString()); + } + + startInfo.ArgumentList.Add(pdfPath); + startInfo.ArgumentList.Add(Path.Combine(Path.GetDirectoryName(outputPath)!, Path.GetFileNameWithoutExtension(outputPath))); + + using var process = new Process { StartInfo = startInfo }; + process.Start(); + await process.WaitForExitAsync(cancellationToken); + + if (process.ExitCode != 0) + { + var error = await process.StandardError.ReadToEndAsync(cancellationToken); + throw new InvalidOperationException($"pdftoppm failed for '{pdfPath}': {error}"); + } + + if (!File.Exists(outputPath)) + { + throw new InvalidOperationException($"pdftoppm completed but did not create '{outputPath}'."); + } + } }