Generate critical source image artifacts

This commit is contained in:
2026-03-17 22:28:17 +01:00
parent 4979cf87f7
commit 2936d7146f
6 changed files with 363 additions and 2 deletions

View File

@@ -0,0 +1,82 @@
using RolemasterDb.ImportTool.Parsing;
namespace RolemasterDb.ImportTool.Tests;
public sealed class CriticalImportArtifactGenerationIntegrationTests
{
private static readonly PdfXmlExtractor Extractor = new();
private static readonly StandardCriticalTableParser StandardParser = new();
[Fact]
public async Task Generated_artifacts_include_page_and_cell_source_images()
{
var (parseResult, artifactPaths) = await LoadPreparedSlashParseResultAsync();
var result = FindResult(parseResult, "71-75", "A");
var cellArtifact = parseResult.Cells.Single(item =>
item.GroupKey is null &&
item.RollBandLabel == "71-75" &&
item.ColumnKey == "A");
Assert.True(result.SourceBounds.PageNumber > 0);
Assert.True(result.SourceBounds.Width > 0);
Assert.True(result.SourceBounds.Height > 0);
Assert.NotNull(result.SourceImagePath);
Assert.NotNull(result.SourceImageCrop);
Assert.Equal(result.SourceImagePath, cellArtifact.SourceImagePath);
Assert.NotNull(cellArtifact.SourceImageCrop);
Assert.True(File.Exists(artifactPaths.GetPageImagePath(result.SourceBounds.PageNumber)));
Assert.True(File.Exists(artifactPaths.ResolveRelativePath(result.SourceImagePath!)));
}
private static async Task<(CriticalTableParseResult ParseResult, ImportArtifactPaths ArtifactPaths)> LoadPreparedSlashParseResultAsync()
{
var entry = LoadManifest().Tables.Single(item => item.Slug == "slash");
var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml");
if (!File.Exists(xmlPath))
{
await Extractor.ExtractAsync(Path.Combine(GetRepositoryRoot(), entry.PdfPath), xmlPath);
}
var parseResult = StandardParser.Parse(entry, await File.ReadAllTextAsync(xmlPath));
var artifactRoot = Path.Combine(GetArtifactCacheRoot(), Guid.NewGuid().ToString("N"));
var artifactPaths = ImportArtifactPaths.Create(artifactRoot, entry.Slug);
var generator = new CriticalSourceImageArtifactGenerator(new PdfXmlExtractor());
await generator.GenerateAsync(Path.Combine(GetRepositoryRoot(), entry.PdfPath), artifactPaths, parseResult);
return (parseResult, artifactPaths);
}
private static ParsedCriticalResult FindResult(CriticalTableParseResult parseResult, string rollBandLabel, string columnKey) =>
parseResult.Table.Results.Single(item =>
item.GroupKey is null &&
item.RollBandLabel == rollBandLabel &&
item.ColumnKey == columnKey);
private static CriticalImportManifest LoadManifest() =>
new CriticalImportManifestLoader().Load(Path.Combine(GetRepositoryRoot(), "sources", "critical-import-manifest.json"));
private static string GetArtifactCacheRoot()
{
var cacheRoot = Path.Combine(Path.GetTempPath(), "RolemasterDb.ImportTool.MergeTests");
Directory.CreateDirectory(cacheRoot);
return cacheRoot;
}
private static string GetRepositoryRoot()
{
var probe = new DirectoryInfo(AppContext.BaseDirectory);
while (probe is not null)
{
if (File.Exists(Path.Combine(probe.FullName, "RolemasterDB.slnx")))
{
return probe.FullName;
}
probe = probe.Parent;
}
throw new InvalidOperationException("Could not find the repository root for integration tests.");
}
}

View File

@@ -7,10 +7,16 @@ public sealed class CriticalImportCommandRunner
private readonly CriticalImportManifestLoader manifestLoader = new();
private readonly ImportArtifactWriter artifactWriter = new();
private readonly PdfXmlExtractor pdfXmlExtractor = new();
private readonly CriticalSourceImageArtifactGenerator sourceImageArtifactGenerator;
private readonly StandardCriticalTableParser standardParser = new();
private readonly VariantColumnCriticalTableParser variantColumnParser = new();
private readonly GroupedVariantCriticalTableParser groupedVariantParser = new();
public CriticalImportCommandRunner()
{
sourceImageArtifactGenerator = new CriticalSourceImageArtifactGenerator(pdfXmlExtractor);
}
public async Task<int> RunAsync(ResetOptions options)
{
if (!string.Equals(options.Target, "criticals", StringComparison.OrdinalIgnoreCase))
@@ -47,6 +53,11 @@ public sealed class CriticalImportCommandRunner
var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath);
var parseResult = Parse(entry, xmlContent);
await sourceImageArtifactGenerator.GenerateAsync(
ResolveRepositoryPath(entry.PdfPath),
artifactPaths,
parseResult,
CancellationToken.None);
await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None);
if (!parseResult.ValidationReport.IsValid)

View File

@@ -0,0 +1,93 @@
using RolemasterDb.ImportTool.Parsing;
namespace RolemasterDb.ImportTool;
public sealed class CriticalSourceImageArtifactGenerator(PdfXmlExtractor pdfXmlExtractor)
{
private const int CropPaddingX = 12;
private const int CropPaddingY = 8;
public async Task GenerateAsync(
string pdfPath,
ImportArtifactPaths artifactPaths,
CriticalTableParseResult parseResult,
CancellationToken cancellationToken = default)
{
Directory.CreateDirectory(artifactPaths.PagesDirectoryPath);
Directory.CreateDirectory(artifactPaths.CellsDirectoryPath);
var pageGeometriesByNumber = parseResult.PageGeometries.ToDictionary(item => item.PageNumber);
foreach (var pageGeometry in parseResult.PageGeometries.OrderBy(item => item.PageNumber))
{
await pdfXmlExtractor.RenderPagePngAsync(
pdfPath,
pageGeometry.PageNumber,
artifactPaths.GetPageImagePath(pageGeometry.PageNumber),
cancellationToken);
}
var cellsByKey = parseResult.Cells.ToDictionary(
item => CreateCellKey(item.GroupKey, item.RollBandLabel, item.ColumnKey),
StringComparer.Ordinal);
foreach (var result in parseResult.Table.Results)
{
if (!pageGeometriesByNumber.TryGetValue(result.SourceBounds.PageNumber, out var pageGeometry))
{
throw new InvalidOperationException(
$"Missing page geometry for page {result.SourceBounds.PageNumber} in table '{parseResult.Table.Slug}'.");
}
var crop = CreateCrop(result.SourceBounds, pageGeometry);
var relativePath = artifactPaths.GetRelativeCellImagePath(result.GroupKey, result.ColumnKey, result.RollBandLabel);
var fullPath = artifactPaths.ResolveRelativePath(relativePath);
await pdfXmlExtractor.RenderCropPngAsync(
pdfPath,
crop.PageNumber,
crop.CropLeft,
crop.CropTop,
crop.CropWidth,
crop.CropHeight,
fullPath,
cancellationToken);
result.SourceImagePath = relativePath;
result.SourceImageCrop = crop;
var cellKey = CreateCellKey(result.GroupKey, result.RollBandLabel, result.ColumnKey);
if (cellsByKey.TryGetValue(cellKey, out var cellArtifact))
{
cellArtifact.SourceImagePath = relativePath;
cellArtifact.SourceImageCrop = crop;
}
}
}
private static CriticalSourceImageCrop CreateCrop(
ParsedCriticalSourceRect sourceBounds,
ParsedPdfPageGeometry pageGeometry)
{
var cropLeft = Math.Max(0, sourceBounds.Left - CropPaddingX);
var cropTop = Math.Max(0, sourceBounds.Top - CropPaddingY);
var cropRight = Math.Min(pageGeometry.Width, sourceBounds.Left + sourceBounds.Width + CropPaddingX);
var cropBottom = Math.Min(pageGeometry.Height, sourceBounds.Top + sourceBounds.Height + CropPaddingY);
return new CriticalSourceImageCrop(
sourceBounds.PageNumber,
pageGeometry.Width,
pageGeometry.Height,
sourceBounds.Left,
sourceBounds.Top,
sourceBounds.Width,
sourceBounds.Height,
cropLeft,
cropTop,
Math.Max(1, cropRight - cropLeft),
Math.Max(1, cropBottom - cropTop),
PdfXmlExtractor.XmlAlignedRenderDpi);
}
private static string CreateCellKey(string? groupKey, string rollBandLabel, string columnKey) =>
$"{groupKey ?? string.Empty}|{rollBandLabel}|{columnKey}";
}

View File

@@ -1,35 +1,100 @@
using System.Text;
namespace RolemasterDb.ImportTool;
public sealed class ImportArtifactPaths
{
private ImportArtifactPaths(
string artifactsRootPath,
string tableSlug,
string directoryPath,
string xmlPath,
string fragmentsJsonPath,
string parsedCellsJsonPath,
string validationReportPath)
string validationReportPath,
string pagesDirectoryPath,
string cellsDirectoryPath)
{
ArtifactsRootPath = artifactsRootPath;
TableSlug = tableSlug;
DirectoryPath = directoryPath;
XmlPath = xmlPath;
FragmentsJsonPath = fragmentsJsonPath;
ParsedCellsJsonPath = parsedCellsJsonPath;
ValidationReportPath = validationReportPath;
PagesDirectoryPath = pagesDirectoryPath;
CellsDirectoryPath = cellsDirectoryPath;
}
public string ArtifactsRootPath { get; }
public string TableSlug { get; }
public string DirectoryPath { get; }
public string XmlPath { get; }
public string FragmentsJsonPath { get; }
public string ParsedCellsJsonPath { get; }
public string ValidationReportPath { get; }
public string PagesDirectoryPath { get; }
public string CellsDirectoryPath { get; }
public static ImportArtifactPaths Create(string artifactsRootPath, string tableSlug)
{
var directoryPath = Path.Combine(artifactsRootPath, tableSlug);
var pagesDirectoryPath = Path.Combine(directoryPath, "pages");
var cellsDirectoryPath = Path.Combine(directoryPath, "cells");
return new ImportArtifactPaths(
artifactsRootPath,
tableSlug,
directoryPath,
Path.Combine(directoryPath, "source.xml"),
Path.Combine(directoryPath, "fragments.json"),
Path.Combine(directoryPath, "parsed-cells.json"),
Path.Combine(directoryPath, "validation-report.json"));
Path.Combine(directoryPath, "validation-report.json"),
pagesDirectoryPath,
cellsDirectoryPath);
}
public string GetPageImagePath(int pageNumber) =>
Path.Combine(PagesDirectoryPath, $"page-{pageNumber:000}.png");
public string GetRelativeCellImagePath(string? groupKey, string columnKey, string rollBandLabel) =>
Path.Combine(
TableSlug,
"cells",
$"{NormalizeFileSegment(groupKey ?? "none")}__{NormalizeFileSegment(columnKey)}__{NormalizeFileSegment(rollBandLabel)}.png")
.Replace('\\', '/');
public string ResolveRelativePath(string relativePath) =>
Path.GetFullPath(Path.Combine(ArtifactsRootPath, relativePath.Replace('/', Path.DirectorySeparatorChar)));
private static string NormalizeFileSegment(string value)
{
var builder = new StringBuilder();
foreach (var character in value.Trim().ToLowerInvariant())
{
if (char.IsLetterOrDigit(character))
{
builder.Append(character);
continue;
}
if (character is '-' or '_')
{
builder.Append(character);
continue;
}
if (character == '+')
{
builder.Append("plus");
continue;
}
builder.Append('_');
}
var normalized = builder.ToString().Trim('_');
return string.IsNullOrWhiteSpace(normalized) ? "empty" : normalized;
}
}

View File

@@ -0,0 +1,29 @@
namespace RolemasterDb.ImportTool.Parsing;
public sealed class CriticalSourceImageCrop(
int pageNumber,
int pageWidth,
int pageHeight,
int boundsLeft,
int boundsTop,
int boundsWidth,
int boundsHeight,
int cropLeft,
int cropTop,
int cropWidth,
int cropHeight,
int renderDpi)
{
public int PageNumber { get; } = pageNumber;
public int PageWidth { get; } = pageWidth;
public int PageHeight { get; } = pageHeight;
public int BoundsLeft { get; } = boundsLeft;
public int BoundsTop { get; } = boundsTop;
public int BoundsWidth { get; } = boundsWidth;
public int BoundsHeight { get; } = boundsHeight;
public int CropLeft { get; } = cropLeft;
public int CropTop { get; } = cropTop;
public int CropWidth { get; } = cropWidth;
public int CropHeight { get; } = cropHeight;
public int RenderDpi { get; } = renderDpi;
}

View File

@@ -4,6 +4,8 @@ namespace RolemasterDb.ImportTool;
public sealed class PdfXmlExtractor
{
public const int XmlAlignedRenderDpi = 108;
public async Task ExtractAsync(string pdfPath, string outputPath, CancellationToken cancellationToken = default)
{
Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!);
@@ -33,4 +35,83 @@ public sealed class PdfXmlExtractor
throw new InvalidOperationException($"pdftohtml failed for '{pdfPath}': {error}");
}
}
public Task RenderPagePngAsync(
string pdfPath,
int pageNumber,
string outputPath,
CancellationToken cancellationToken = default) =>
RenderPngAsync(pdfPath, pageNumber, outputPath, null, null, null, null, cancellationToken);
public Task RenderCropPngAsync(
string pdfPath,
int pageNumber,
int left,
int top,
int width,
int height,
string outputPath,
CancellationToken cancellationToken = default) =>
RenderPngAsync(pdfPath, pageNumber, outputPath, left, top, width, height, cancellationToken);
private static async Task RenderPngAsync(
string pdfPath,
int pageNumber,
string outputPath,
int? left,
int? top,
int? width,
int? height,
CancellationToken cancellationToken)
{
Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!);
var startInfo = new ProcessStartInfo
{
FileName = "pdftoppm",
RedirectStandardError = true,
RedirectStandardOutput = true,
UseShellExecute = false,
CreateNoWindow = true
};
startInfo.ArgumentList.Add("-png");
startInfo.ArgumentList.Add("-r");
startInfo.ArgumentList.Add(XmlAlignedRenderDpi.ToString());
startInfo.ArgumentList.Add("-f");
startInfo.ArgumentList.Add(pageNumber.ToString());
startInfo.ArgumentList.Add("-l");
startInfo.ArgumentList.Add(pageNumber.ToString());
startInfo.ArgumentList.Add("-singlefile");
if (left.HasValue && top.HasValue && width.HasValue && height.HasValue)
{
startInfo.ArgumentList.Add("-x");
startInfo.ArgumentList.Add(left.Value.ToString());
startInfo.ArgumentList.Add("-y");
startInfo.ArgumentList.Add(top.Value.ToString());
startInfo.ArgumentList.Add("-W");
startInfo.ArgumentList.Add(width.Value.ToString());
startInfo.ArgumentList.Add("-H");
startInfo.ArgumentList.Add(height.Value.ToString());
}
startInfo.ArgumentList.Add(pdfPath);
startInfo.ArgumentList.Add(Path.Combine(Path.GetDirectoryName(outputPath)!, Path.GetFileNameWithoutExtension(outputPath)));
using var process = new Process { StartInfo = startInfo };
process.Start();
await process.WaitForExitAsync(cancellationToken);
if (process.ExitCode != 0)
{
var error = await process.StandardError.ReadToEndAsync(cancellationToken);
throw new InvalidOperationException($"pdftoppm failed for '{pdfPath}': {error}");
}
if (!File.Exists(outputPath))
{
throw new InvalidOperationException($"pdftoppm completed but did not create '{outputPath}'.");
}
}
}