Track critical cell source bounds
This commit is contained in:
@@ -1,10 +1,15 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
internal sealed class ColumnarCellEntry(string? groupKey, string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
|
||||
internal sealed class ColumnarCellEntry(
|
||||
string? groupKey,
|
||||
string rollBandLabel,
|
||||
int rowIndex,
|
||||
string columnKey,
|
||||
List<ColumnarCellLine> lines)
|
||||
{
|
||||
public string? GroupKey { get; } = groupKey;
|
||||
public string RollBandLabel { get; } = rollBandLabel;
|
||||
public int RowIndex { get; } = rowIndex;
|
||||
public string ColumnKey { get; } = columnKey;
|
||||
public List<string> Lines { get; } = lines;
|
||||
public List<ColumnarCellLine> Lines { get; } = lines;
|
||||
}
|
||||
|
||||
7
src/RolemasterDb.ImportTool/Parsing/ColumnarCellLine.cs
Normal file
7
src/RolemasterDb.ImportTool/Parsing/ColumnarCellLine.cs
Normal file
@@ -0,0 +1,7 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
internal sealed class ColumnarCellLine(string text, List<XmlTextFragment> fragments)
|
||||
{
|
||||
public string Text { get; } = text;
|
||||
public List<XmlTextFragment> Fragments { get; } = fragments;
|
||||
}
|
||||
@@ -2,11 +2,13 @@ namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class CriticalTableParseResult(
|
||||
ParsedCriticalTable table,
|
||||
IReadOnlyList<ParsedPdfPageGeometry> pageGeometries,
|
||||
IReadOnlyList<XmlTextFragment> fragments,
|
||||
IReadOnlyList<ParsedCriticalCellArtifact> cells,
|
||||
ImportValidationReport validationReport)
|
||||
{
|
||||
public ParsedCriticalTable Table { get; } = table;
|
||||
public IReadOnlyList<ParsedPdfPageGeometry> PageGeometries { get; } = pageGeometries;
|
||||
public IReadOnlyList<XmlTextFragment> Fragments { get; } = fragments;
|
||||
public IReadOnlyList<ParsedCriticalCellArtifact> Cells { get; } = cells;
|
||||
public ImportValidationReport ValidationReport { get; } = validationReport;
|
||||
|
||||
@@ -53,6 +53,26 @@ internal static class CriticalTableParserSupport
|
||||
return RemoveRedundantContainedFragments(fragments);
|
||||
}
|
||||
|
||||
internal static List<ParsedPdfPageGeometry> LoadPageGeometries(string xmlContent)
|
||||
{
|
||||
using var stringReader = new StringReader(xmlContent);
|
||||
using var xmlReader = XmlReader.Create(
|
||||
stringReader,
|
||||
new XmlReaderSettings
|
||||
{
|
||||
DtdProcessing = DtdProcessing.Ignore
|
||||
});
|
||||
|
||||
var document = XDocument.Load(xmlReader);
|
||||
|
||||
return document.Descendants("page")
|
||||
.Select(page => new ParsedPdfPageGeometry(
|
||||
int.Parse(page.Attribute("number")?.Value ?? "1"),
|
||||
int.Parse(page.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing page width attribute.")),
|
||||
int.Parse(page.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing page height attribute."))))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
internal static List<XmlTextFragment> FindRowLabelFragments(
|
||||
IReadOnlyList<XmlTextFragment> fragments,
|
||||
int leftCutoff,
|
||||
@@ -143,7 +163,7 @@ internal static class CriticalTableParserSupport
|
||||
return columns[^1].Key;
|
||||
}
|
||||
|
||||
internal static IReadOnlyList<string> BuildLines(IReadOnlyList<XmlTextFragment> fragments)
|
||||
internal static IReadOnlyList<ColumnarCellLine> BuildLines(IReadOnlyList<XmlTextFragment> fragments)
|
||||
{
|
||||
var lines = new List<List<XmlTextFragment>>();
|
||||
|
||||
@@ -159,8 +179,10 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
|
||||
return lines
|
||||
.Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item))
|
||||
.Select(line => new ColumnarCellLine(
|
||||
CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))),
|
||||
line.OrderBy(item => item.Left).ToList()))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item.Text))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
@@ -516,7 +538,7 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
|
||||
var leadingAffixCount = 0;
|
||||
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
||||
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount].Text, affixLegendSymbols))
|
||||
{
|
||||
leadingAffixCount++;
|
||||
}
|
||||
@@ -564,7 +586,9 @@ internal static class CriticalTableParserSupport
|
||||
|
||||
foreach (var cellEntry in cellEntries)
|
||||
{
|
||||
var content = SharedParsing.CriticalCellTextParser.Parse(cellEntry.Lines, sharedLegend);
|
||||
var lineTexts = cellEntry.Lines.Select(line => line.Text).ToList();
|
||||
var content = SharedParsing.CriticalCellTextParser.Parse(lineTexts, sharedLegend);
|
||||
var sourceBounds = BuildSourceBounds(cellEntry.Lines.SelectMany(line => line.Fragments).ToList());
|
||||
validationErrors.AddRange(content.ValidationErrors.Select(error =>
|
||||
$"Cell '{BuildCellIdentifier(cellEntry)}': {error}"));
|
||||
|
||||
@@ -575,13 +599,14 @@ internal static class CriticalTableParserSupport
|
||||
cellEntry.GroupKey,
|
||||
cellEntry.RollBandLabel,
|
||||
cellEntry.ColumnKey,
|
||||
cellEntry.Lines.ToList(),
|
||||
lineTexts,
|
||||
content.BaseLines,
|
||||
content.RawCellText,
|
||||
content.DescriptionText,
|
||||
content.RawAffixText,
|
||||
effects,
|
||||
branches));
|
||||
branches,
|
||||
sourceBounds));
|
||||
|
||||
parsedResults.Add(new ParsedCriticalResult(
|
||||
cellEntry.GroupKey,
|
||||
@@ -591,10 +616,37 @@ internal static class CriticalTableParserSupport
|
||||
content.DescriptionText,
|
||||
content.RawAffixText,
|
||||
effects,
|
||||
branches));
|
||||
branches,
|
||||
sourceBounds));
|
||||
}
|
||||
}
|
||||
|
||||
private static ParsedCriticalSourceRect BuildSourceBounds(IReadOnlyList<XmlTextFragment> fragments)
|
||||
{
|
||||
if (fragments.Count == 0)
|
||||
{
|
||||
throw new InvalidOperationException("Cannot build source bounds for an empty fragment set.");
|
||||
}
|
||||
|
||||
var pageNumber = fragments[0].PageNumber;
|
||||
if (fragments.Any(fragment => fragment.PageNumber != pageNumber))
|
||||
{
|
||||
throw new InvalidOperationException("A parsed cell spans multiple PDF pages, which cannot be cropped reliably.");
|
||||
}
|
||||
|
||||
var left = fragments.Min(fragment => fragment.Left);
|
||||
var top = fragments.Min(fragment => fragment.Top);
|
||||
var right = fragments.Max(fragment => fragment.Left + fragment.Width);
|
||||
var bottom = fragments.Max(fragment => fragment.Top + fragment.Height);
|
||||
|
||||
return new ParsedCriticalSourceRect(
|
||||
pageNumber,
|
||||
left,
|
||||
top,
|
||||
Math.Max(1, right - left),
|
||||
Math.Max(1, bottom - top));
|
||||
}
|
||||
|
||||
private static SharedParsing.AffixLegend ToSharedAffixLegend(AffixLegend affixLegend) =>
|
||||
new(
|
||||
affixLegend.SymbolEffects,
|
||||
|
||||
@@ -17,6 +17,7 @@ public sealed class GroupedVariantCriticalTableParser
|
||||
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||
{
|
||||
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
||||
var pageGeometries = CriticalTableParserSupport.LoadPageGeometries(xmlContent);
|
||||
var groupHeaders = FindGroupHeaders(fragments);
|
||||
var columnHeaders = FindColumnHeaders(fragments);
|
||||
var validationErrors = new List<string>();
|
||||
@@ -141,7 +142,7 @@ public sealed class GroupedVariantCriticalTableParser
|
||||
parsedRollBands,
|
||||
parsedResults);
|
||||
|
||||
return new CriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
||||
return new CriticalTableParseResult(table, pageGeometries, fragments, parsedCells, validationReport);
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> FindGroupHeaders(IReadOnlyList<XmlTextFragment> fragments)
|
||||
|
||||
@@ -10,7 +10,8 @@ public sealed class ParsedCriticalCellArtifact(
|
||||
string descriptionText,
|
||||
string? rawAffixText,
|
||||
IReadOnlyList<ParsedCriticalEffect> effects,
|
||||
IReadOnlyList<ParsedCriticalBranch> branches)
|
||||
IReadOnlyList<ParsedCriticalBranch> branches,
|
||||
ParsedCriticalSourceRect sourceBounds)
|
||||
{
|
||||
public string? GroupKey { get; } = groupKey;
|
||||
public string RollBandLabel { get; } = rollBandLabel;
|
||||
@@ -22,4 +23,7 @@ public sealed class ParsedCriticalCellArtifact(
|
||||
public string? RawAffixText { get; } = rawAffixText;
|
||||
public IReadOnlyList<ParsedCriticalEffect> Effects { get; } = effects;
|
||||
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
|
||||
public ParsedCriticalSourceRect SourceBounds { get; } = sourceBounds;
|
||||
public string? SourceImagePath { get; set; }
|
||||
public CriticalSourceImageCrop? SourceImageCrop { get; set; }
|
||||
}
|
||||
|
||||
@@ -8,7 +8,8 @@ public sealed class ParsedCriticalResult(
|
||||
string descriptionText,
|
||||
string? rawAffixText,
|
||||
IReadOnlyList<ParsedCriticalEffect> effects,
|
||||
IReadOnlyList<ParsedCriticalBranch> branches)
|
||||
IReadOnlyList<ParsedCriticalBranch> branches,
|
||||
ParsedCriticalSourceRect sourceBounds)
|
||||
{
|
||||
public string? GroupKey { get; } = groupKey;
|
||||
public string ColumnKey { get; } = columnKey;
|
||||
@@ -18,4 +19,7 @@ public sealed class ParsedCriticalResult(
|
||||
public string? RawAffixText { get; } = rawAffixText;
|
||||
public IReadOnlyList<ParsedCriticalEffect> Effects { get; } = effects;
|
||||
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
|
||||
public ParsedCriticalSourceRect SourceBounds { get; } = sourceBounds;
|
||||
public string? SourceImagePath { get; set; }
|
||||
public CriticalSourceImageCrop? SourceImageCrop { get; set; }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class ParsedCriticalSourceRect(
|
||||
int pageNumber,
|
||||
int left,
|
||||
int top,
|
||||
int width,
|
||||
int height)
|
||||
{
|
||||
public int PageNumber { get; } = pageNumber;
|
||||
public int Left { get; } = left;
|
||||
public int Top { get; } = top;
|
||||
public int Width { get; } = width;
|
||||
public int Height { get; } = height;
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class ParsedPdfPageGeometry(int pageNumber, int width, int height)
|
||||
{
|
||||
public int PageNumber { get; } = pageNumber;
|
||||
public int Width { get; } = width;
|
||||
public int Height { get; } = height;
|
||||
}
|
||||
@@ -5,6 +5,7 @@ public sealed class StandardCriticalTableParser
|
||||
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||
{
|
||||
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
||||
var pageGeometries = CriticalTableParserSupport.LoadPageGeometries(xmlContent);
|
||||
var headerFragments = FindHeaderFragments(fragments);
|
||||
var validationErrors = new List<string>();
|
||||
var validationWarnings = new List<string>();
|
||||
@@ -121,7 +122,7 @@ public sealed class StandardCriticalTableParser
|
||||
parsedRollBands,
|
||||
parsedResults);
|
||||
|
||||
return new CriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
||||
return new CriticalTableParseResult(table, pageGeometries, fragments, parsedCells, validationReport);
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> FindHeaderFragments(IReadOnlyList<XmlTextFragment> fragments)
|
||||
|
||||
@@ -14,6 +14,7 @@ public sealed class VariantColumnCriticalTableParser
|
||||
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||
{
|
||||
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
||||
var pageGeometries = CriticalTableParserSupport.LoadPageGeometries(xmlContent);
|
||||
var headerFragments = FindHeaderFragments(fragments);
|
||||
var validationErrors = new List<string>();
|
||||
var validationWarnings = new List<string>();
|
||||
@@ -137,7 +138,7 @@ public sealed class VariantColumnCriticalTableParser
|
||||
parsedRollBands,
|
||||
parsedResults);
|
||||
|
||||
return new CriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
||||
return new CriticalTableParseResult(table, pageGeometries, fragments, parsedCells, validationReport);
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> FindHeaderFragments(IReadOnlyList<XmlTextFragment> fragments)
|
||||
|
||||
Reference in New Issue
Block a user