diff --git a/docs/critical_import_tool.md b/docs/critical_import_tool.md
index 85020ce..b570ed4 100644
--- a/docs/critical_import_tool.md
+++ b/docs/critical_import_tool.md
@@ -33,7 +33,7 @@ The current implementation supports:
- `variant_column` critical tables with non-severity columns
- `grouped_variant` critical tables with a group axis plus variant columns
- XML-based extraction using `pdftohtml -xml`
-- XML-aligned page rendering and per-cell PNG crops using `pdftoppm -png -r 108`
+- XML-aligned page rendering and per-cell PNG crops using `pdftoppm -png -r 432`
- geometry-based parsing across the currently enabled table set:
- `arcane-aether`
- `arcane-nether`
@@ -359,6 +359,22 @@ Example:
dotnet run --project .\src\RolemasterDb.ImportTool\RolemasterDb.ImportTool.csproj -- import slash
```
+### `reimport-images
`
+
+Reuses `source.xml`, regenerates page PNGs and cell PNGs, rewrites the JSON artifacts, and refreshes only source-image metadata in SQLite.
+
+Use this when:
+
+- crop resolution or render settings changed
+- you want better source images without reloading result text
+- you want to keep curated and uncurated content untouched while refreshing artifacts
+
+Example:
+
+```powershell
+dotnet run --project .\src\RolemasterDb.ImportTool\RolemasterDb.ImportTool.csproj -- reimport-images slash
+```
+
## Manifest
The importer manifest is stored at:
@@ -433,7 +449,7 @@ Each parsed cell now includes:
### `pages/page-001.png`
-Rendered PDF page images at `108 DPI`, which matches the coordinate space emitted by `pdftohtml -xml`.
+Rendered PDF page images at `432 DPI`, using a central render scale factor of `4` over the XML coordinate space emitted by `pdftohtml -xml`.
Use this when:
@@ -607,10 +623,14 @@ The importer now uses two Poppler tools:
- `pdftohtml -xml -i -noframes`
- extracts geometry-aware XML text
-- `pdftoppm -png -r 108`
+- `pdftoppm -png -r 432`
- renders page PNGs and per-cell crop PNGs
-The `108 DPI` render setting is deliberate: for the current PDFs and Poppler output, it produces page images whose pixel dimensions match the XML `page width` and `page height`, so crop coordinates can be applied directly without an extra scale-conversion step.
+The importer keeps a central render scale factor of `4`. The XML still defines bounds in its original coordinate space, but rendered PNGs and stored crop metadata now use the scaled coordinate space and a `432 DPI` render setting. In practice:
+
+- XML coordinates are multiplied by `4` before crop extraction
+- page and crop metadata stored with each result reflect the scaled PNG coordinate space
+- crop alignment remains deterministic without changing the parsing pipeline
## Interaction With Web App Startup
diff --git a/src/RolemasterDb.ImportTool.Tests/CriticalImportArtifactGenerationIntegrationTests.cs b/src/RolemasterDb.ImportTool.Tests/CriticalImportArtifactGenerationIntegrationTests.cs
index 3fb09e1..5e79d8c 100644
--- a/src/RolemasterDb.ImportTool.Tests/CriticalImportArtifactGenerationIntegrationTests.cs
+++ b/src/RolemasterDb.ImportTool.Tests/CriticalImportArtifactGenerationIntegrationTests.cs
@@ -22,6 +22,10 @@ public sealed class CriticalImportArtifactGenerationIntegrationTests
Assert.True(result.SourceBounds.Height > 0);
Assert.NotNull(result.SourceImagePath);
Assert.NotNull(result.SourceImageCrop);
+ Assert.Equal(PdfXmlExtractor.RenderScaleFactor, result.SourceImageCrop!.ScaleFactor);
+ Assert.Equal(PdfXmlExtractor.ScaledRenderDpi, result.SourceImageCrop.RenderDpi);
+ Assert.Equal(result.SourceBounds.Width * PdfXmlExtractor.RenderScaleFactor, result.SourceImageCrop.BoundsWidth);
+ Assert.Equal(result.SourceBounds.Height * PdfXmlExtractor.RenderScaleFactor, result.SourceImageCrop.BoundsHeight);
Assert.Equal(result.SourceImagePath, cellArtifact.SourceImagePath);
Assert.NotNull(cellArtifact.SourceImageCrop);
Assert.True(File.Exists(artifactPaths.GetPageImagePath(result.SourceBounds.PageNumber)));
diff --git a/src/RolemasterDb.ImportTool.Tests/CriticalImportMergeIntegrationTests.cs b/src/RolemasterDb.ImportTool.Tests/CriticalImportMergeIntegrationTests.cs
index 96490a4..5293b72 100644
--- a/src/RolemasterDb.ImportTool.Tests/CriticalImportMergeIntegrationTests.cs
+++ b/src/RolemasterDb.ImportTool.Tests/CriticalImportMergeIntegrationTests.cs
@@ -191,6 +191,48 @@ public sealed class CriticalImportMergeIntegrationTests
}
}
+ [Fact]
+ public async Task Reimport_images_only_refreshes_provenance_without_touching_curated_content()
+ {
+ var (parseResult, _) = await LoadPreparedSlashParseResultAsync();
+ var databasePath = CreateEmptyDatabasePath();
+ var loader = new CriticalImportLoader(databasePath);
+
+ await loader.LoadAsync(parseResult.Table);
+
+ await using (var dbContext = CreateDbContext(databasePath))
+ {
+ var result = await LoadResultAsync(dbContext, "36-45", "B");
+
+ result.IsCurated = true;
+ result.RawCellText = "Curated raw text";
+ result.DescriptionText = "Curated description";
+ result.RawAffixText = "+12H";
+ result.ParseStatus = "manually_curated";
+ result.SourcePageNumber = null;
+ result.SourceImagePath = null;
+ result.SourceImageCropJson = null;
+
+ await dbContext.SaveChangesAsync();
+ }
+
+ await loader.RefreshImageArtifactsAsync(parseResult.Table);
+
+ await using (var dbContext = CreateDbContext(databasePath))
+ {
+ var result = await LoadResultAsync(dbContext, "36-45", "B");
+
+ Assert.True(result.IsCurated);
+ Assert.Equal("Curated raw text", result.RawCellText);
+ Assert.Equal("Curated description", result.DescriptionText);
+ Assert.Equal("+12H", result.RawAffixText);
+ Assert.Equal("manually_curated", result.ParseStatus);
+ Assert.NotNull(result.SourcePageNumber);
+ Assert.False(string.IsNullOrWhiteSpace(result.SourceImagePath));
+ Assert.False(string.IsNullOrWhiteSpace(result.SourceImageCropJson));
+ }
+ }
+
private static ParsedCriticalTable CreateTrimmedTable(
ParsedCriticalTable table,
params (string RollBandLabel, string ColumnKey)[] excludedResults)
diff --git a/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs b/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs
index 5bdae92..4fc0f90 100644
--- a/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs
+++ b/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs
@@ -100,6 +100,40 @@ public sealed class CriticalImportCommandRunner
});
}
+ public async Task RunAsync(ReimportImagesOptions options)
+ {
+ var entry = GetManifestEntry(options.Table);
+ var artifactPaths = CreateArtifactPaths(entry.Slug);
+
+ if (!File.Exists(artifactPaths.XmlPath))
+ {
+ await pdfXmlExtractor.ExtractAsync(ResolveRepositoryPath(entry.PdfPath), artifactPaths.XmlPath);
+ }
+
+ var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath);
+ var parseResult = Parse(entry, xmlContent);
+ await sourceImageArtifactGenerator.GenerateAsync(
+ ResolveRepositoryPath(entry.PdfPath),
+ artifactPaths,
+ parseResult,
+ CancellationToken.None);
+ await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None);
+
+ if (!parseResult.ValidationReport.IsValid)
+ {
+ throw new InvalidOperationException(
+ $"Validation failed for '{entry.Slug}'. See {artifactPaths.ValidationReportPath} for details.");
+ }
+
+ var loader = new CriticalImportLoader(ResolveDatabasePath(options.DatabasePath));
+ var refreshedCount = await loader.RefreshImageArtifactsAsync(parseResult.Table);
+
+ Console.WriteLine(
+ $"Refreshed image artifacts for {entry.Slug}: {refreshedCount} results updated.");
+
+ return 0;
+ }
+
private CriticalImportManifestEntry GetManifestEntry(string tableSlug)
{
var manifest = manifestLoader.Load(RepositoryPaths.Discover().ManifestPath);
diff --git a/src/RolemasterDb.ImportTool/CriticalImportLoader.cs b/src/RolemasterDb.ImportTool/CriticalImportLoader.cs
index 7ca83c5..6947fbb 100644
--- a/src/RolemasterDb.ImportTool/CriticalImportLoader.cs
+++ b/src/RolemasterDb.ImportTool/CriticalImportLoader.cs
@@ -128,6 +128,50 @@ public sealed class CriticalImportLoader(string databasePath)
return new ImportCommandResult(entity.Slug, entity.Columns.Count, entity.RollBands.Count, entity.Results.Count);
}
+ public async Task RefreshImageArtifactsAsync(ParsedCriticalTable table, CancellationToken cancellationToken = default)
+ {
+ await using var dbContext = CreateDbContext();
+ await dbContext.Database.EnsureCreatedAsync(cancellationToken);
+ await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
+ await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
+
+ var entity = await dbContext.CriticalTables
+ .AsSplitQuery()
+ .Include(item => item.Results)
+ .ThenInclude(result => result.CriticalGroup)
+ .Include(item => item.Results)
+ .ThenInclude(result => result.CriticalColumn)
+ .Include(item => item.Results)
+ .ThenInclude(result => result.CriticalRollBand)
+ .SingleOrDefaultAsync(item => item.Slug == table.Slug, cancellationToken);
+
+ if (entity is null)
+ {
+ throw new InvalidOperationException($"Critical table '{table.Slug}' does not exist in the target database.");
+ }
+
+ var existingResultsByKey = entity.Results.ToDictionary(
+ item => CreateResultKey(item.CriticalGroup?.GroupKey, item.CriticalColumn.ColumnKey, item.CriticalRollBand.Label),
+ StringComparer.Ordinal);
+
+ var refreshedCount = 0;
+ foreach (var item in table.Results)
+ {
+ var resultKey = CreateResultKey(item.GroupKey, item.ColumnKey, item.RollBandLabel);
+ if (!existingResultsByKey.TryGetValue(resultKey, out var existingResult))
+ {
+ continue;
+ }
+
+ ApplyImporterProvenance(existingResult, item);
+ refreshedCount++;
+ }
+
+ await dbContext.SaveChangesAsync(cancellationToken);
+ await transaction.CommitAsync(cancellationToken);
+ return refreshedCount;
+ }
+
private RolemasterDbContext CreateDbContext()
{
var options = new DbContextOptionsBuilder()
diff --git a/src/RolemasterDb.ImportTool/CriticalSourceImageArtifactGenerator.cs b/src/RolemasterDb.ImportTool/CriticalSourceImageArtifactGenerator.cs
index 6320258..b6caaad 100644
--- a/src/RolemasterDb.ImportTool/CriticalSourceImageArtifactGenerator.cs
+++ b/src/RolemasterDb.ImportTool/CriticalSourceImageArtifactGenerator.cs
@@ -75,17 +75,18 @@ public sealed class CriticalSourceImageArtifactGenerator(PdfXmlExtractor pdfXmlE
return new CriticalSourceImageCrop(
sourceBounds.PageNumber,
- pageGeometry.Width,
- pageGeometry.Height,
- sourceBounds.Left,
- sourceBounds.Top,
- sourceBounds.Width,
- sourceBounds.Height,
- cropLeft,
- cropTop,
- Math.Max(1, cropRight - cropLeft),
- Math.Max(1, cropBottom - cropTop),
- PdfXmlExtractor.XmlAlignedRenderDpi);
+ PdfXmlExtractor.ScaleCoordinate(pageGeometry.Width),
+ PdfXmlExtractor.ScaleCoordinate(pageGeometry.Height),
+ PdfXmlExtractor.ScaleCoordinate(sourceBounds.Left),
+ PdfXmlExtractor.ScaleCoordinate(sourceBounds.Top),
+ PdfXmlExtractor.ScaleCoordinate(sourceBounds.Width),
+ PdfXmlExtractor.ScaleCoordinate(sourceBounds.Height),
+ PdfXmlExtractor.ScaleCoordinate(cropLeft),
+ PdfXmlExtractor.ScaleCoordinate(cropTop),
+ PdfXmlExtractor.ScaleCoordinate(Math.Max(1, cropRight - cropLeft)),
+ PdfXmlExtractor.ScaleCoordinate(Math.Max(1, cropBottom - cropTop)),
+ PdfXmlExtractor.ScaledRenderDpi,
+ PdfXmlExtractor.RenderScaleFactor);
}
private static string CreateCellKey(string? groupKey, string rollBandLabel, string columnKey) =>
diff --git a/src/RolemasterDb.ImportTool/Parsing/CriticalSourceImageCrop.cs b/src/RolemasterDb.ImportTool/Parsing/CriticalSourceImageCrop.cs
index 169dec9..1c7f13d 100644
--- a/src/RolemasterDb.ImportTool/Parsing/CriticalSourceImageCrop.cs
+++ b/src/RolemasterDb.ImportTool/Parsing/CriticalSourceImageCrop.cs
@@ -12,7 +12,8 @@ public sealed class CriticalSourceImageCrop(
int cropTop,
int cropWidth,
int cropHeight,
- int renderDpi)
+ int renderDpi,
+ int scaleFactor)
{
public int PageNumber { get; } = pageNumber;
public int PageWidth { get; } = pageWidth;
@@ -26,4 +27,5 @@ public sealed class CriticalSourceImageCrop(
public int CropWidth { get; } = cropWidth;
public int CropHeight { get; } = cropHeight;
public int RenderDpi { get; } = renderDpi;
+ public int ScaleFactor { get; } = scaleFactor;
}
diff --git a/src/RolemasterDb.ImportTool/PdfXmlExtractor.cs b/src/RolemasterDb.ImportTool/PdfXmlExtractor.cs
index b6d23d8..b81a330 100644
--- a/src/RolemasterDb.ImportTool/PdfXmlExtractor.cs
+++ b/src/RolemasterDb.ImportTool/PdfXmlExtractor.cs
@@ -4,7 +4,11 @@ namespace RolemasterDb.ImportTool;
public sealed class PdfXmlExtractor
{
+ public const int RenderScaleFactor = 4;
public const int XmlAlignedRenderDpi = 108;
+ public const int ScaledRenderDpi = XmlAlignedRenderDpi * RenderScaleFactor;
+
+ public static int ScaleCoordinate(int value) => checked(value * RenderScaleFactor);
public async Task ExtractAsync(string pdfPath, string outputPath, CancellationToken cancellationToken = default)
{
@@ -77,7 +81,7 @@ public sealed class PdfXmlExtractor
startInfo.ArgumentList.Add("-png");
startInfo.ArgumentList.Add("-r");
- startInfo.ArgumentList.Add(XmlAlignedRenderDpi.ToString());
+ startInfo.ArgumentList.Add(ScaledRenderDpi.ToString());
startInfo.ArgumentList.Add("-f");
startInfo.ArgumentList.Add(pageNumber.ToString());
startInfo.ArgumentList.Add("-l");
diff --git a/src/RolemasterDb.ImportTool/Program.cs b/src/RolemasterDb.ImportTool/Program.cs
index 6d2a3ec..dd3b4e5 100644
--- a/src/RolemasterDb.ImportTool/Program.cs
+++ b/src/RolemasterDb.ImportTool/Program.cs
@@ -4,12 +4,13 @@ using RolemasterDb.ImportTool;
var runner = new CriticalImportCommandRunner();
-var exitCode = await Parser.Default.ParseArguments(args)
+var exitCode = await Parser.Default.ParseArguments(args)
.MapResult(
(ResetOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
(ExtractOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
(LoadOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
(ImportOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
+ (ReimportImagesOptions options) => ExecuteAsync(() => runner.RunAsync(options)),
_ => Task.FromResult(1));
return exitCode;
diff --git a/src/RolemasterDb.ImportTool/ReimportImagesOptions.cs b/src/RolemasterDb.ImportTool/ReimportImagesOptions.cs
new file mode 100644
index 0000000..04e70f2
--- /dev/null
+++ b/src/RolemasterDb.ImportTool/ReimportImagesOptions.cs
@@ -0,0 +1,13 @@
+using CommandLine;
+
+namespace RolemasterDb.ImportTool;
+
+[Verb("reimport-images", HelpText = "Regenerate critical table page and cell images and refresh only image metadata in SQLite.")]
+public sealed class ReimportImagesOptions
+{
+ [Value(0, MetaName = "table", Required = true, HelpText = "The manifest slug of the critical table to refresh.")]
+ public string Table { get; set; } = string.Empty;
+
+ [Option('d', "db", HelpText = "Optional SQLite database path.")]
+ public string? DatabasePath { get; set; }
+}