Add high-res critical image refresh import
This commit is contained in:
@@ -22,6 +22,10 @@ public sealed class CriticalImportArtifactGenerationIntegrationTests
|
||||
Assert.True(result.SourceBounds.Height > 0);
|
||||
Assert.NotNull(result.SourceImagePath);
|
||||
Assert.NotNull(result.SourceImageCrop);
|
||||
Assert.Equal(PdfXmlExtractor.RenderScaleFactor, result.SourceImageCrop!.ScaleFactor);
|
||||
Assert.Equal(PdfXmlExtractor.ScaledRenderDpi, result.SourceImageCrop.RenderDpi);
|
||||
Assert.Equal(result.SourceBounds.Width * PdfXmlExtractor.RenderScaleFactor, result.SourceImageCrop.BoundsWidth);
|
||||
Assert.Equal(result.SourceBounds.Height * PdfXmlExtractor.RenderScaleFactor, result.SourceImageCrop.BoundsHeight);
|
||||
Assert.Equal(result.SourceImagePath, cellArtifact.SourceImagePath);
|
||||
Assert.NotNull(cellArtifact.SourceImageCrop);
|
||||
Assert.True(File.Exists(artifactPaths.GetPageImagePath(result.SourceBounds.PageNumber)));
|
||||
|
||||
@@ -191,6 +191,48 @@ public sealed class CriticalImportMergeIntegrationTests
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Reimport_images_only_refreshes_provenance_without_touching_curated_content()
|
||||
{
|
||||
var (parseResult, _) = await LoadPreparedSlashParseResultAsync();
|
||||
var databasePath = CreateEmptyDatabasePath();
|
||||
var loader = new CriticalImportLoader(databasePath);
|
||||
|
||||
await loader.LoadAsync(parseResult.Table);
|
||||
|
||||
await using (var dbContext = CreateDbContext(databasePath))
|
||||
{
|
||||
var result = await LoadResultAsync(dbContext, "36-45", "B");
|
||||
|
||||
result.IsCurated = true;
|
||||
result.RawCellText = "Curated raw text";
|
||||
result.DescriptionText = "Curated description";
|
||||
result.RawAffixText = "+12H";
|
||||
result.ParseStatus = "manually_curated";
|
||||
result.SourcePageNumber = null;
|
||||
result.SourceImagePath = null;
|
||||
result.SourceImageCropJson = null;
|
||||
|
||||
await dbContext.SaveChangesAsync();
|
||||
}
|
||||
|
||||
await loader.RefreshImageArtifactsAsync(parseResult.Table);
|
||||
|
||||
await using (var dbContext = CreateDbContext(databasePath))
|
||||
{
|
||||
var result = await LoadResultAsync(dbContext, "36-45", "B");
|
||||
|
||||
Assert.True(result.IsCurated);
|
||||
Assert.Equal("Curated raw text", result.RawCellText);
|
||||
Assert.Equal("Curated description", result.DescriptionText);
|
||||
Assert.Equal("+12H", result.RawAffixText);
|
||||
Assert.Equal("manually_curated", result.ParseStatus);
|
||||
Assert.NotNull(result.SourcePageNumber);
|
||||
Assert.False(string.IsNullOrWhiteSpace(result.SourceImagePath));
|
||||
Assert.False(string.IsNullOrWhiteSpace(result.SourceImageCropJson));
|
||||
}
|
||||
}
|
||||
|
||||
private static ParsedCriticalTable CreateTrimmedTable(
|
||||
ParsedCriticalTable table,
|
||||
params (string RollBandLabel, string ColumnKey)[] excludedResults)
|
||||
|
||||
@@ -100,6 +100,40 @@ public sealed class CriticalImportCommandRunner
|
||||
});
|
||||
}
|
||||
|
||||
public async Task<int> RunAsync(ReimportImagesOptions options)
|
||||
{
|
||||
var entry = GetManifestEntry(options.Table);
|
||||
var artifactPaths = CreateArtifactPaths(entry.Slug);
|
||||
|
||||
if (!File.Exists(artifactPaths.XmlPath))
|
||||
{
|
||||
await pdfXmlExtractor.ExtractAsync(ResolveRepositoryPath(entry.PdfPath), artifactPaths.XmlPath);
|
||||
}
|
||||
|
||||
var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath);
|
||||
var parseResult = Parse(entry, xmlContent);
|
||||
await sourceImageArtifactGenerator.GenerateAsync(
|
||||
ResolveRepositoryPath(entry.PdfPath),
|
||||
artifactPaths,
|
||||
parseResult,
|
||||
CancellationToken.None);
|
||||
await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None);
|
||||
|
||||
if (!parseResult.ValidationReport.IsValid)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Validation failed for '{entry.Slug}'. See {artifactPaths.ValidationReportPath} for details.");
|
||||
}
|
||||
|
||||
var loader = new CriticalImportLoader(ResolveDatabasePath(options.DatabasePath));
|
||||
var refreshedCount = await loader.RefreshImageArtifactsAsync(parseResult.Table);
|
||||
|
||||
Console.WriteLine(
|
||||
$"Refreshed image artifacts for {entry.Slug}: {refreshedCount} results updated.");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private CriticalImportManifestEntry GetManifestEntry(string tableSlug)
|
||||
{
|
||||
var manifest = manifestLoader.Load(RepositoryPaths.Discover().ManifestPath);
|
||||
|
||||
@@ -128,6 +128,50 @@ public sealed class CriticalImportLoader(string databasePath)
|
||||
return new ImportCommandResult(entity.Slug, entity.Columns.Count, entity.RollBands.Count, entity.Results.Count);
|
||||
}
|
||||
|
||||
public async Task<int> RefreshImageArtifactsAsync(ParsedCriticalTable table, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await using var dbContext = CreateDbContext();
|
||||
await dbContext.Database.EnsureCreatedAsync(cancellationToken);
|
||||
await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
|
||||
await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
|
||||
|
||||
var entity = await dbContext.CriticalTables
|
||||
.AsSplitQuery()
|
||||
.Include(item => item.Results)
|
||||
.ThenInclude(result => result.CriticalGroup)
|
||||
.Include(item => item.Results)
|
||||
.ThenInclude(result => result.CriticalColumn)
|
||||
.Include(item => item.Results)
|
||||
.ThenInclude(result => result.CriticalRollBand)
|
||||
.SingleOrDefaultAsync(item => item.Slug == table.Slug, cancellationToken);
|
||||
|
||||
if (entity is null)
|
||||
{
|
||||
throw new InvalidOperationException($"Critical table '{table.Slug}' does not exist in the target database.");
|
||||
}
|
||||
|
||||
var existingResultsByKey = entity.Results.ToDictionary(
|
||||
item => CreateResultKey(item.CriticalGroup?.GroupKey, item.CriticalColumn.ColumnKey, item.CriticalRollBand.Label),
|
||||
StringComparer.Ordinal);
|
||||
|
||||
var refreshedCount = 0;
|
||||
foreach (var item in table.Results)
|
||||
{
|
||||
var resultKey = CreateResultKey(item.GroupKey, item.ColumnKey, item.RollBandLabel);
|
||||
if (!existingResultsByKey.TryGetValue(resultKey, out var existingResult))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
ApplyImporterProvenance(existingResult, item);
|
||||
refreshedCount++;
|
||||
}
|
||||
|
||||
await dbContext.SaveChangesAsync(cancellationToken);
|
||||
await transaction.CommitAsync(cancellationToken);
|
||||
return refreshedCount;
|
||||
}
|
||||
|
||||
private RolemasterDbContext CreateDbContext()
|
||||
{
|
||||
var options = new DbContextOptionsBuilder<RolemasterDbContext>()
|
||||
|
||||
@@ -75,17 +75,18 @@ public sealed class CriticalSourceImageArtifactGenerator(PdfXmlExtractor pdfXmlE
|
||||
|
||||
return new CriticalSourceImageCrop(
|
||||
sourceBounds.PageNumber,
|
||||
pageGeometry.Width,
|
||||
pageGeometry.Height,
|
||||
sourceBounds.Left,
|
||||
sourceBounds.Top,
|
||||
sourceBounds.Width,
|
||||
sourceBounds.Height,
|
||||
cropLeft,
|
||||
cropTop,
|
||||
Math.Max(1, cropRight - cropLeft),
|
||||
Math.Max(1, cropBottom - cropTop),
|
||||
PdfXmlExtractor.XmlAlignedRenderDpi);
|
||||
PdfXmlExtractor.ScaleCoordinate(pageGeometry.Width),
|
||||
PdfXmlExtractor.ScaleCoordinate(pageGeometry.Height),
|
||||
PdfXmlExtractor.ScaleCoordinate(sourceBounds.Left),
|
||||
PdfXmlExtractor.ScaleCoordinate(sourceBounds.Top),
|
||||
PdfXmlExtractor.ScaleCoordinate(sourceBounds.Width),
|
||||
PdfXmlExtractor.ScaleCoordinate(sourceBounds.Height),
|
||||
PdfXmlExtractor.ScaleCoordinate(cropLeft),
|
||||
PdfXmlExtractor.ScaleCoordinate(cropTop),
|
||||
PdfXmlExtractor.ScaleCoordinate(Math.Max(1, cropRight - cropLeft)),
|
||||
PdfXmlExtractor.ScaleCoordinate(Math.Max(1, cropBottom - cropTop)),
|
||||
PdfXmlExtractor.ScaledRenderDpi,
|
||||
PdfXmlExtractor.RenderScaleFactor);
|
||||
}
|
||||
|
||||
private static string CreateCellKey(string? groupKey, string rollBandLabel, string columnKey) =>
|
||||
|
||||
@@ -12,7 +12,8 @@ public sealed class CriticalSourceImageCrop(
|
||||
int cropTop,
|
||||
int cropWidth,
|
||||
int cropHeight,
|
||||
int renderDpi)
|
||||
int renderDpi,
|
||||
int scaleFactor)
|
||||
{
|
||||
public int PageNumber { get; } = pageNumber;
|
||||
public int PageWidth { get; } = pageWidth;
|
||||
@@ -26,4 +27,5 @@ public sealed class CriticalSourceImageCrop(
|
||||
public int CropWidth { get; } = cropWidth;
|
||||
public int CropHeight { get; } = cropHeight;
|
||||
public int RenderDpi { get; } = renderDpi;
|
||||
public int ScaleFactor { get; } = scaleFactor;
|
||||
}
|
||||
|
||||
@@ -4,7 +4,11 @@ namespace RolemasterDb.ImportTool;
|
||||
|
||||
public sealed class PdfXmlExtractor
|
||||
{
|
||||
public const int RenderScaleFactor = 4;
|
||||
public const int XmlAlignedRenderDpi = 108;
|
||||
public const int ScaledRenderDpi = XmlAlignedRenderDpi * RenderScaleFactor;
|
||||
|
||||
public static int ScaleCoordinate(int value) => checked(value * RenderScaleFactor);
|
||||
|
||||
public async Task ExtractAsync(string pdfPath, string outputPath, CancellationToken cancellationToken = default)
|
||||
{
|
||||
@@ -77,7 +81,7 @@ public sealed class PdfXmlExtractor
|
||||
|
||||
startInfo.ArgumentList.Add("-png");
|
||||
startInfo.ArgumentList.Add("-r");
|
||||
startInfo.ArgumentList.Add(XmlAlignedRenderDpi.ToString());
|
||||
startInfo.ArgumentList.Add(ScaledRenderDpi.ToString());
|
||||
startInfo.ArgumentList.Add("-f");
|
||||
startInfo.ArgumentList.Add(pageNumber.ToString());
|
||||
startInfo.ArgumentList.Add("-l");
|
||||
|
||||
@@ -4,12 +4,13 @@ using RolemasterDb.ImportTool;
|
||||
|
||||
var runner = new CriticalImportCommandRunner();
|
||||
|
||||
var exitCode = await Parser.Default.ParseArguments<ResetOptions, ExtractOptions, LoadOptions, ImportOptions>(args)
|
||||
var exitCode = await Parser.Default.ParseArguments<ResetOptions, ExtractOptions, LoadOptions, ImportOptions, ReimportImagesOptions>(args)
|
||||
.MapResult(
|
||||
(ResetOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
|
||||
(ExtractOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
|
||||
(LoadOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
|
||||
(ImportOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
|
||||
(ReimportImagesOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
|
||||
_ => Task.FromResult(1));
|
||||
|
||||
return exitCode;
|
||||
|
||||
13
src/RolemasterDb.ImportTool/ReimportImagesOptions.cs
Normal file
13
src/RolemasterDb.ImportTool/ReimportImagesOptions.cs
Normal file
@@ -0,0 +1,13 @@
|
||||
using CommandLine;
|
||||
|
||||
namespace RolemasterDb.ImportTool;
|
||||
|
||||
[Verb("reimport-images", HelpText = "Regenerate critical table page and cell images and refresh only image metadata in SQLite.")]
|
||||
public sealed class ReimportImagesOptions
|
||||
{
|
||||
[Value(0, MetaName = "table", Required = true, HelpText = "The manifest slug of the critical table to refresh.")]
|
||||
public string Table { get; set; } = string.Empty;
|
||||
|
||||
[Option('d', "db", HelpText = "Optional SQLite database path.")]
|
||||
public string? DatabasePath { get; set; }
|
||||
}
|
||||
Reference in New Issue
Block a user