Generate critical source image artifacts
This commit is contained in:
@@ -7,10 +7,16 @@ public sealed class CriticalImportCommandRunner
|
||||
private readonly CriticalImportManifestLoader manifestLoader = new();
|
||||
private readonly ImportArtifactWriter artifactWriter = new();
|
||||
private readonly PdfXmlExtractor pdfXmlExtractor = new();
|
||||
private readonly CriticalSourceImageArtifactGenerator sourceImageArtifactGenerator;
|
||||
private readonly StandardCriticalTableParser standardParser = new();
|
||||
private readonly VariantColumnCriticalTableParser variantColumnParser = new();
|
||||
private readonly GroupedVariantCriticalTableParser groupedVariantParser = new();
|
||||
|
||||
public CriticalImportCommandRunner()
|
||||
{
|
||||
sourceImageArtifactGenerator = new CriticalSourceImageArtifactGenerator(pdfXmlExtractor);
|
||||
}
|
||||
|
||||
public async Task<int> RunAsync(ResetOptions options)
|
||||
{
|
||||
if (!string.Equals(options.Target, "criticals", StringComparison.OrdinalIgnoreCase))
|
||||
@@ -47,6 +53,11 @@ public sealed class CriticalImportCommandRunner
|
||||
|
||||
var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath);
|
||||
var parseResult = Parse(entry, xmlContent);
|
||||
await sourceImageArtifactGenerator.GenerateAsync(
|
||||
ResolveRepositoryPath(entry.PdfPath),
|
||||
artifactPaths,
|
||||
parseResult,
|
||||
CancellationToken.None);
|
||||
await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None);
|
||||
|
||||
if (!parseResult.ValidationReport.IsValid)
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
using RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
namespace RolemasterDb.ImportTool;
|
||||
|
||||
public sealed class CriticalSourceImageArtifactGenerator(PdfXmlExtractor pdfXmlExtractor)
|
||||
{
|
||||
private const int CropPaddingX = 12;
|
||||
private const int CropPaddingY = 8;
|
||||
|
||||
public async Task GenerateAsync(
|
||||
string pdfPath,
|
||||
ImportArtifactPaths artifactPaths,
|
||||
CriticalTableParseResult parseResult,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
Directory.CreateDirectory(artifactPaths.PagesDirectoryPath);
|
||||
Directory.CreateDirectory(artifactPaths.CellsDirectoryPath);
|
||||
|
||||
var pageGeometriesByNumber = parseResult.PageGeometries.ToDictionary(item => item.PageNumber);
|
||||
foreach (var pageGeometry in parseResult.PageGeometries.OrderBy(item => item.PageNumber))
|
||||
{
|
||||
await pdfXmlExtractor.RenderPagePngAsync(
|
||||
pdfPath,
|
||||
pageGeometry.PageNumber,
|
||||
artifactPaths.GetPageImagePath(pageGeometry.PageNumber),
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
var cellsByKey = parseResult.Cells.ToDictionary(
|
||||
item => CreateCellKey(item.GroupKey, item.RollBandLabel, item.ColumnKey),
|
||||
StringComparer.Ordinal);
|
||||
|
||||
foreach (var result in parseResult.Table.Results)
|
||||
{
|
||||
if (!pageGeometriesByNumber.TryGetValue(result.SourceBounds.PageNumber, out var pageGeometry))
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Missing page geometry for page {result.SourceBounds.PageNumber} in table '{parseResult.Table.Slug}'.");
|
||||
}
|
||||
|
||||
var crop = CreateCrop(result.SourceBounds, pageGeometry);
|
||||
var relativePath = artifactPaths.GetRelativeCellImagePath(result.GroupKey, result.ColumnKey, result.RollBandLabel);
|
||||
var fullPath = artifactPaths.ResolveRelativePath(relativePath);
|
||||
|
||||
await pdfXmlExtractor.RenderCropPngAsync(
|
||||
pdfPath,
|
||||
crop.PageNumber,
|
||||
crop.CropLeft,
|
||||
crop.CropTop,
|
||||
crop.CropWidth,
|
||||
crop.CropHeight,
|
||||
fullPath,
|
||||
cancellationToken);
|
||||
|
||||
result.SourceImagePath = relativePath;
|
||||
result.SourceImageCrop = crop;
|
||||
|
||||
var cellKey = CreateCellKey(result.GroupKey, result.RollBandLabel, result.ColumnKey);
|
||||
if (cellsByKey.TryGetValue(cellKey, out var cellArtifact))
|
||||
{
|
||||
cellArtifact.SourceImagePath = relativePath;
|
||||
cellArtifact.SourceImageCrop = crop;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static CriticalSourceImageCrop CreateCrop(
|
||||
ParsedCriticalSourceRect sourceBounds,
|
||||
ParsedPdfPageGeometry pageGeometry)
|
||||
{
|
||||
var cropLeft = Math.Max(0, sourceBounds.Left - CropPaddingX);
|
||||
var cropTop = Math.Max(0, sourceBounds.Top - CropPaddingY);
|
||||
var cropRight = Math.Min(pageGeometry.Width, sourceBounds.Left + sourceBounds.Width + CropPaddingX);
|
||||
var cropBottom = Math.Min(pageGeometry.Height, sourceBounds.Top + sourceBounds.Height + CropPaddingY);
|
||||
|
||||
return new CriticalSourceImageCrop(
|
||||
sourceBounds.PageNumber,
|
||||
pageGeometry.Width,
|
||||
pageGeometry.Height,
|
||||
sourceBounds.Left,
|
||||
sourceBounds.Top,
|
||||
sourceBounds.Width,
|
||||
sourceBounds.Height,
|
||||
cropLeft,
|
||||
cropTop,
|
||||
Math.Max(1, cropRight - cropLeft),
|
||||
Math.Max(1, cropBottom - cropTop),
|
||||
PdfXmlExtractor.XmlAlignedRenderDpi);
|
||||
}
|
||||
|
||||
private static string CreateCellKey(string? groupKey, string rollBandLabel, string columnKey) =>
|
||||
$"{groupKey ?? string.Empty}|{rollBandLabel}|{columnKey}";
|
||||
}
|
||||
@@ -1,35 +1,100 @@
|
||||
using System.Text;
|
||||
|
||||
namespace RolemasterDb.ImportTool;
|
||||
|
||||
public sealed class ImportArtifactPaths
|
||||
{
|
||||
private ImportArtifactPaths(
|
||||
string artifactsRootPath,
|
||||
string tableSlug,
|
||||
string directoryPath,
|
||||
string xmlPath,
|
||||
string fragmentsJsonPath,
|
||||
string parsedCellsJsonPath,
|
||||
string validationReportPath)
|
||||
string validationReportPath,
|
||||
string pagesDirectoryPath,
|
||||
string cellsDirectoryPath)
|
||||
{
|
||||
ArtifactsRootPath = artifactsRootPath;
|
||||
TableSlug = tableSlug;
|
||||
DirectoryPath = directoryPath;
|
||||
XmlPath = xmlPath;
|
||||
FragmentsJsonPath = fragmentsJsonPath;
|
||||
ParsedCellsJsonPath = parsedCellsJsonPath;
|
||||
ValidationReportPath = validationReportPath;
|
||||
PagesDirectoryPath = pagesDirectoryPath;
|
||||
CellsDirectoryPath = cellsDirectoryPath;
|
||||
}
|
||||
|
||||
public string ArtifactsRootPath { get; }
|
||||
public string TableSlug { get; }
|
||||
public string DirectoryPath { get; }
|
||||
public string XmlPath { get; }
|
||||
public string FragmentsJsonPath { get; }
|
||||
public string ParsedCellsJsonPath { get; }
|
||||
public string ValidationReportPath { get; }
|
||||
public string PagesDirectoryPath { get; }
|
||||
public string CellsDirectoryPath { get; }
|
||||
|
||||
public static ImportArtifactPaths Create(string artifactsRootPath, string tableSlug)
|
||||
{
|
||||
var directoryPath = Path.Combine(artifactsRootPath, tableSlug);
|
||||
var pagesDirectoryPath = Path.Combine(directoryPath, "pages");
|
||||
var cellsDirectoryPath = Path.Combine(directoryPath, "cells");
|
||||
|
||||
return new ImportArtifactPaths(
|
||||
artifactsRootPath,
|
||||
tableSlug,
|
||||
directoryPath,
|
||||
Path.Combine(directoryPath, "source.xml"),
|
||||
Path.Combine(directoryPath, "fragments.json"),
|
||||
Path.Combine(directoryPath, "parsed-cells.json"),
|
||||
Path.Combine(directoryPath, "validation-report.json"));
|
||||
Path.Combine(directoryPath, "validation-report.json"),
|
||||
pagesDirectoryPath,
|
||||
cellsDirectoryPath);
|
||||
}
|
||||
|
||||
public string GetPageImagePath(int pageNumber) =>
|
||||
Path.Combine(PagesDirectoryPath, $"page-{pageNumber:000}.png");
|
||||
|
||||
public string GetRelativeCellImagePath(string? groupKey, string columnKey, string rollBandLabel) =>
|
||||
Path.Combine(
|
||||
TableSlug,
|
||||
"cells",
|
||||
$"{NormalizeFileSegment(groupKey ?? "none")}__{NormalizeFileSegment(columnKey)}__{NormalizeFileSegment(rollBandLabel)}.png")
|
||||
.Replace('\\', '/');
|
||||
|
||||
public string ResolveRelativePath(string relativePath) =>
|
||||
Path.GetFullPath(Path.Combine(ArtifactsRootPath, relativePath.Replace('/', Path.DirectorySeparatorChar)));
|
||||
|
||||
private static string NormalizeFileSegment(string value)
|
||||
{
|
||||
var builder = new StringBuilder();
|
||||
|
||||
foreach (var character in value.Trim().ToLowerInvariant())
|
||||
{
|
||||
if (char.IsLetterOrDigit(character))
|
||||
{
|
||||
builder.Append(character);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (character is '-' or '_')
|
||||
{
|
||||
builder.Append(character);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (character == '+')
|
||||
{
|
||||
builder.Append("plus");
|
||||
continue;
|
||||
}
|
||||
|
||||
builder.Append('_');
|
||||
}
|
||||
|
||||
var normalized = builder.ToString().Trim('_');
|
||||
return string.IsNullOrWhiteSpace(normalized) ? "empty" : normalized;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class CriticalSourceImageCrop(
|
||||
int pageNumber,
|
||||
int pageWidth,
|
||||
int pageHeight,
|
||||
int boundsLeft,
|
||||
int boundsTop,
|
||||
int boundsWidth,
|
||||
int boundsHeight,
|
||||
int cropLeft,
|
||||
int cropTop,
|
||||
int cropWidth,
|
||||
int cropHeight,
|
||||
int renderDpi)
|
||||
{
|
||||
public int PageNumber { get; } = pageNumber;
|
||||
public int PageWidth { get; } = pageWidth;
|
||||
public int PageHeight { get; } = pageHeight;
|
||||
public int BoundsLeft { get; } = boundsLeft;
|
||||
public int BoundsTop { get; } = boundsTop;
|
||||
public int BoundsWidth { get; } = boundsWidth;
|
||||
public int BoundsHeight { get; } = boundsHeight;
|
||||
public int CropLeft { get; } = cropLeft;
|
||||
public int CropTop { get; } = cropTop;
|
||||
public int CropWidth { get; } = cropWidth;
|
||||
public int CropHeight { get; } = cropHeight;
|
||||
public int RenderDpi { get; } = renderDpi;
|
||||
}
|
||||
@@ -4,6 +4,8 @@ namespace RolemasterDb.ImportTool;
|
||||
|
||||
public sealed class PdfXmlExtractor
|
||||
{
|
||||
public const int XmlAlignedRenderDpi = 108;
|
||||
|
||||
public async Task ExtractAsync(string pdfPath, string outputPath, CancellationToken cancellationToken = default)
|
||||
{
|
||||
Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!);
|
||||
@@ -33,4 +35,83 @@ public sealed class PdfXmlExtractor
|
||||
throw new InvalidOperationException($"pdftohtml failed for '{pdfPath}': {error}");
|
||||
}
|
||||
}
|
||||
|
||||
public Task RenderPagePngAsync(
|
||||
string pdfPath,
|
||||
int pageNumber,
|
||||
string outputPath,
|
||||
CancellationToken cancellationToken = default) =>
|
||||
RenderPngAsync(pdfPath, pageNumber, outputPath, null, null, null, null, cancellationToken);
|
||||
|
||||
public Task RenderCropPngAsync(
|
||||
string pdfPath,
|
||||
int pageNumber,
|
||||
int left,
|
||||
int top,
|
||||
int width,
|
||||
int height,
|
||||
string outputPath,
|
||||
CancellationToken cancellationToken = default) =>
|
||||
RenderPngAsync(pdfPath, pageNumber, outputPath, left, top, width, height, cancellationToken);
|
||||
|
||||
private static async Task RenderPngAsync(
|
||||
string pdfPath,
|
||||
int pageNumber,
|
||||
string outputPath,
|
||||
int? left,
|
||||
int? top,
|
||||
int? width,
|
||||
int? height,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!);
|
||||
|
||||
var startInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "pdftoppm",
|
||||
RedirectStandardError = true,
|
||||
RedirectStandardOutput = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
|
||||
startInfo.ArgumentList.Add("-png");
|
||||
startInfo.ArgumentList.Add("-r");
|
||||
startInfo.ArgumentList.Add(XmlAlignedRenderDpi.ToString());
|
||||
startInfo.ArgumentList.Add("-f");
|
||||
startInfo.ArgumentList.Add(pageNumber.ToString());
|
||||
startInfo.ArgumentList.Add("-l");
|
||||
startInfo.ArgumentList.Add(pageNumber.ToString());
|
||||
startInfo.ArgumentList.Add("-singlefile");
|
||||
|
||||
if (left.HasValue && top.HasValue && width.HasValue && height.HasValue)
|
||||
{
|
||||
startInfo.ArgumentList.Add("-x");
|
||||
startInfo.ArgumentList.Add(left.Value.ToString());
|
||||
startInfo.ArgumentList.Add("-y");
|
||||
startInfo.ArgumentList.Add(top.Value.ToString());
|
||||
startInfo.ArgumentList.Add("-W");
|
||||
startInfo.ArgumentList.Add(width.Value.ToString());
|
||||
startInfo.ArgumentList.Add("-H");
|
||||
startInfo.ArgumentList.Add(height.Value.ToString());
|
||||
}
|
||||
|
||||
startInfo.ArgumentList.Add(pdfPath);
|
||||
startInfo.ArgumentList.Add(Path.Combine(Path.GetDirectoryName(outputPath)!, Path.GetFileNameWithoutExtension(outputPath)));
|
||||
|
||||
using var process = new Process { StartInfo = startInfo };
|
||||
process.Start();
|
||||
await process.WaitForExitAsync(cancellationToken);
|
||||
|
||||
if (process.ExitCode != 0)
|
||||
{
|
||||
var error = await process.StandardError.ReadToEndAsync(cancellationToken);
|
||||
throw new InvalidOperationException($"pdftoppm failed for '{pdfPath}': {error}");
|
||||
}
|
||||
|
||||
if (!File.Exists(outputPath))
|
||||
{
|
||||
throw new InvalidOperationException($"pdftoppm completed but did not create '{outputPath}'.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user