Add high-res critical image refresh import

This commit is contained in:
2026-03-18 00:44:58 +01:00
parent 30fd257ea5
commit 8cbcf66695
10 changed files with 183 additions and 18 deletions

View File

@@ -22,6 +22,10 @@ public sealed class CriticalImportArtifactGenerationIntegrationTests
Assert.True(result.SourceBounds.Height > 0);
Assert.NotNull(result.SourceImagePath);
Assert.NotNull(result.SourceImageCrop);
Assert.Equal(PdfXmlExtractor.RenderScaleFactor, result.SourceImageCrop!.ScaleFactor);
Assert.Equal(PdfXmlExtractor.ScaledRenderDpi, result.SourceImageCrop.RenderDpi);
Assert.Equal(result.SourceBounds.Width * PdfXmlExtractor.RenderScaleFactor, result.SourceImageCrop.BoundsWidth);
Assert.Equal(result.SourceBounds.Height * PdfXmlExtractor.RenderScaleFactor, result.SourceImageCrop.BoundsHeight);
Assert.Equal(result.SourceImagePath, cellArtifact.SourceImagePath);
Assert.NotNull(cellArtifact.SourceImageCrop);
Assert.True(File.Exists(artifactPaths.GetPageImagePath(result.SourceBounds.PageNumber)));

View File

@@ -191,6 +191,48 @@ public sealed class CriticalImportMergeIntegrationTests
}
}
[Fact]
public async Task Reimport_images_only_refreshes_provenance_without_touching_curated_content()
{
var (parseResult, _) = await LoadPreparedSlashParseResultAsync();
var databasePath = CreateEmptyDatabasePath();
var loader = new CriticalImportLoader(databasePath);
await loader.LoadAsync(parseResult.Table);
await using (var dbContext = CreateDbContext(databasePath))
{
var result = await LoadResultAsync(dbContext, "36-45", "B");
result.IsCurated = true;
result.RawCellText = "Curated raw text";
result.DescriptionText = "Curated description";
result.RawAffixText = "+12H";
result.ParseStatus = "manually_curated";
result.SourcePageNumber = null;
result.SourceImagePath = null;
result.SourceImageCropJson = null;
await dbContext.SaveChangesAsync();
}
await loader.RefreshImageArtifactsAsync(parseResult.Table);
await using (var dbContext = CreateDbContext(databasePath))
{
var result = await LoadResultAsync(dbContext, "36-45", "B");
Assert.True(result.IsCurated);
Assert.Equal("Curated raw text", result.RawCellText);
Assert.Equal("Curated description", result.DescriptionText);
Assert.Equal("+12H", result.RawAffixText);
Assert.Equal("manually_curated", result.ParseStatus);
Assert.NotNull(result.SourcePageNumber);
Assert.False(string.IsNullOrWhiteSpace(result.SourceImagePath));
Assert.False(string.IsNullOrWhiteSpace(result.SourceImageCropJson));
}
}
private static ParsedCriticalTable CreateTrimmedTable(
ParsedCriticalTable table,
params (string RollBandLabel, string ColumnKey)[] excludedResults)

View File

@@ -100,6 +100,40 @@ public sealed class CriticalImportCommandRunner
});
}
public async Task<int> RunAsync(ReimportImagesOptions options)
{
var entry = GetManifestEntry(options.Table);
var artifactPaths = CreateArtifactPaths(entry.Slug);
if (!File.Exists(artifactPaths.XmlPath))
{
await pdfXmlExtractor.ExtractAsync(ResolveRepositoryPath(entry.PdfPath), artifactPaths.XmlPath);
}
var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath);
var parseResult = Parse(entry, xmlContent);
await sourceImageArtifactGenerator.GenerateAsync(
ResolveRepositoryPath(entry.PdfPath),
artifactPaths,
parseResult,
CancellationToken.None);
await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None);
if (!parseResult.ValidationReport.IsValid)
{
throw new InvalidOperationException(
$"Validation failed for '{entry.Slug}'. See {artifactPaths.ValidationReportPath} for details.");
}
var loader = new CriticalImportLoader(ResolveDatabasePath(options.DatabasePath));
var refreshedCount = await loader.RefreshImageArtifactsAsync(parseResult.Table);
Console.WriteLine(
$"Refreshed image artifacts for {entry.Slug}: {refreshedCount} results updated.");
return 0;
}
private CriticalImportManifestEntry GetManifestEntry(string tableSlug)
{
var manifest = manifestLoader.Load(RepositoryPaths.Discover().ManifestPath);

View File

@@ -128,6 +128,50 @@ public sealed class CriticalImportLoader(string databasePath)
return new ImportCommandResult(entity.Slug, entity.Columns.Count, entity.RollBands.Count, entity.Results.Count);
}
public async Task<int> RefreshImageArtifactsAsync(ParsedCriticalTable table, CancellationToken cancellationToken = default)
{
await using var dbContext = CreateDbContext();
await dbContext.Database.EnsureCreatedAsync(cancellationToken);
await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
var entity = await dbContext.CriticalTables
.AsSplitQuery()
.Include(item => item.Results)
.ThenInclude(result => result.CriticalGroup)
.Include(item => item.Results)
.ThenInclude(result => result.CriticalColumn)
.Include(item => item.Results)
.ThenInclude(result => result.CriticalRollBand)
.SingleOrDefaultAsync(item => item.Slug == table.Slug, cancellationToken);
if (entity is null)
{
throw new InvalidOperationException($"Critical table '{table.Slug}' does not exist in the target database.");
}
var existingResultsByKey = entity.Results.ToDictionary(
item => CreateResultKey(item.CriticalGroup?.GroupKey, item.CriticalColumn.ColumnKey, item.CriticalRollBand.Label),
StringComparer.Ordinal);
var refreshedCount = 0;
foreach (var item in table.Results)
{
var resultKey = CreateResultKey(item.GroupKey, item.ColumnKey, item.RollBandLabel);
if (!existingResultsByKey.TryGetValue(resultKey, out var existingResult))
{
continue;
}
ApplyImporterProvenance(existingResult, item);
refreshedCount++;
}
await dbContext.SaveChangesAsync(cancellationToken);
await transaction.CommitAsync(cancellationToken);
return refreshedCount;
}
private RolemasterDbContext CreateDbContext()
{
var options = new DbContextOptionsBuilder<RolemasterDbContext>()

View File

@@ -75,17 +75,18 @@ public sealed class CriticalSourceImageArtifactGenerator(PdfXmlExtractor pdfXmlE
return new CriticalSourceImageCrop(
sourceBounds.PageNumber,
pageGeometry.Width,
pageGeometry.Height,
sourceBounds.Left,
sourceBounds.Top,
sourceBounds.Width,
sourceBounds.Height,
cropLeft,
cropTop,
Math.Max(1, cropRight - cropLeft),
Math.Max(1, cropBottom - cropTop),
PdfXmlExtractor.XmlAlignedRenderDpi);
PdfXmlExtractor.ScaleCoordinate(pageGeometry.Width),
PdfXmlExtractor.ScaleCoordinate(pageGeometry.Height),
PdfXmlExtractor.ScaleCoordinate(sourceBounds.Left),
PdfXmlExtractor.ScaleCoordinate(sourceBounds.Top),
PdfXmlExtractor.ScaleCoordinate(sourceBounds.Width),
PdfXmlExtractor.ScaleCoordinate(sourceBounds.Height),
PdfXmlExtractor.ScaleCoordinate(cropLeft),
PdfXmlExtractor.ScaleCoordinate(cropTop),
PdfXmlExtractor.ScaleCoordinate(Math.Max(1, cropRight - cropLeft)),
PdfXmlExtractor.ScaleCoordinate(Math.Max(1, cropBottom - cropTop)),
PdfXmlExtractor.ScaledRenderDpi,
PdfXmlExtractor.RenderScaleFactor);
}
private static string CreateCellKey(string? groupKey, string rollBandLabel, string columnKey) =>

View File

@@ -12,7 +12,8 @@ public sealed class CriticalSourceImageCrop(
int cropTop,
int cropWidth,
int cropHeight,
int renderDpi)
int renderDpi,
int scaleFactor)
{
public int PageNumber { get; } = pageNumber;
public int PageWidth { get; } = pageWidth;
@@ -26,4 +27,5 @@ public sealed class CriticalSourceImageCrop(
public int CropWidth { get; } = cropWidth;
public int CropHeight { get; } = cropHeight;
public int RenderDpi { get; } = renderDpi;
public int ScaleFactor { get; } = scaleFactor;
}

View File

@@ -4,7 +4,11 @@ namespace RolemasterDb.ImportTool;
public sealed class PdfXmlExtractor
{
public const int RenderScaleFactor = 4;
public const int XmlAlignedRenderDpi = 108;
public const int ScaledRenderDpi = XmlAlignedRenderDpi * RenderScaleFactor;
public static int ScaleCoordinate(int value) => checked(value * RenderScaleFactor);
public async Task ExtractAsync(string pdfPath, string outputPath, CancellationToken cancellationToken = default)
{
@@ -77,7 +81,7 @@ public sealed class PdfXmlExtractor
startInfo.ArgumentList.Add("-png");
startInfo.ArgumentList.Add("-r");
startInfo.ArgumentList.Add(XmlAlignedRenderDpi.ToString());
startInfo.ArgumentList.Add(ScaledRenderDpi.ToString());
startInfo.ArgumentList.Add("-f");
startInfo.ArgumentList.Add(pageNumber.ToString());
startInfo.ArgumentList.Add("-l");

View File

@@ -4,12 +4,13 @@ using RolemasterDb.ImportTool;
var runner = new CriticalImportCommandRunner();
var exitCode = await Parser.Default.ParseArguments<ResetOptions, ExtractOptions, LoadOptions, ImportOptions>(args)
var exitCode = await Parser.Default.ParseArguments<ResetOptions, ExtractOptions, LoadOptions, ImportOptions, ReimportImagesOptions>(args)
.MapResult(
(ResetOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
(ExtractOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
(LoadOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
(ImportOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
(ReimportImagesOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
_ => Task.FromResult(1));
return exitCode;

View File

@@ -0,0 +1,13 @@
using CommandLine;
namespace RolemasterDb.ImportTool;
[Verb("reimport-images", HelpText = "Regenerate critical table page and cell images and refresh only image metadata in SQLite.")]
public sealed class ReimportImagesOptions
{
[Value(0, MetaName = "table", Required = true, HelpText = "The manifest slug of the critical table to refresh.")]
public string Table { get; set; } = string.Empty;
[Option('d', "db", HelpText = "Optional SQLite database path.")]
public string? DatabasePath { get; set; }
}