Track critical cell source bounds
This commit is contained in:
@@ -1,10 +1,15 @@
|
|||||||
namespace RolemasterDb.ImportTool.Parsing;
|
namespace RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
internal sealed class ColumnarCellEntry(string? groupKey, string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
|
internal sealed class ColumnarCellEntry(
|
||||||
|
string? groupKey,
|
||||||
|
string rollBandLabel,
|
||||||
|
int rowIndex,
|
||||||
|
string columnKey,
|
||||||
|
List<ColumnarCellLine> lines)
|
||||||
{
|
{
|
||||||
public string? GroupKey { get; } = groupKey;
|
public string? GroupKey { get; } = groupKey;
|
||||||
public string RollBandLabel { get; } = rollBandLabel;
|
public string RollBandLabel { get; } = rollBandLabel;
|
||||||
public int RowIndex { get; } = rowIndex;
|
public int RowIndex { get; } = rowIndex;
|
||||||
public string ColumnKey { get; } = columnKey;
|
public string ColumnKey { get; } = columnKey;
|
||||||
public List<string> Lines { get; } = lines;
|
public List<ColumnarCellLine> Lines { get; } = lines;
|
||||||
}
|
}
|
||||||
|
|||||||
7
src/RolemasterDb.ImportTool/Parsing/ColumnarCellLine.cs
Normal file
7
src/RolemasterDb.ImportTool/Parsing/ColumnarCellLine.cs
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
namespace RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
|
internal sealed class ColumnarCellLine(string text, List<XmlTextFragment> fragments)
|
||||||
|
{
|
||||||
|
public string Text { get; } = text;
|
||||||
|
public List<XmlTextFragment> Fragments { get; } = fragments;
|
||||||
|
}
|
||||||
@@ -2,11 +2,13 @@ namespace RolemasterDb.ImportTool.Parsing;
|
|||||||
|
|
||||||
public sealed class CriticalTableParseResult(
|
public sealed class CriticalTableParseResult(
|
||||||
ParsedCriticalTable table,
|
ParsedCriticalTable table,
|
||||||
|
IReadOnlyList<ParsedPdfPageGeometry> pageGeometries,
|
||||||
IReadOnlyList<XmlTextFragment> fragments,
|
IReadOnlyList<XmlTextFragment> fragments,
|
||||||
IReadOnlyList<ParsedCriticalCellArtifact> cells,
|
IReadOnlyList<ParsedCriticalCellArtifact> cells,
|
||||||
ImportValidationReport validationReport)
|
ImportValidationReport validationReport)
|
||||||
{
|
{
|
||||||
public ParsedCriticalTable Table { get; } = table;
|
public ParsedCriticalTable Table { get; } = table;
|
||||||
|
public IReadOnlyList<ParsedPdfPageGeometry> PageGeometries { get; } = pageGeometries;
|
||||||
public IReadOnlyList<XmlTextFragment> Fragments { get; } = fragments;
|
public IReadOnlyList<XmlTextFragment> Fragments { get; } = fragments;
|
||||||
public IReadOnlyList<ParsedCriticalCellArtifact> Cells { get; } = cells;
|
public IReadOnlyList<ParsedCriticalCellArtifact> Cells { get; } = cells;
|
||||||
public ImportValidationReport ValidationReport { get; } = validationReport;
|
public ImportValidationReport ValidationReport { get; } = validationReport;
|
||||||
|
|||||||
@@ -53,6 +53,26 @@ internal static class CriticalTableParserSupport
|
|||||||
return RemoveRedundantContainedFragments(fragments);
|
return RemoveRedundantContainedFragments(fragments);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal static List<ParsedPdfPageGeometry> LoadPageGeometries(string xmlContent)
|
||||||
|
{
|
||||||
|
using var stringReader = new StringReader(xmlContent);
|
||||||
|
using var xmlReader = XmlReader.Create(
|
||||||
|
stringReader,
|
||||||
|
new XmlReaderSettings
|
||||||
|
{
|
||||||
|
DtdProcessing = DtdProcessing.Ignore
|
||||||
|
});
|
||||||
|
|
||||||
|
var document = XDocument.Load(xmlReader);
|
||||||
|
|
||||||
|
return document.Descendants("page")
|
||||||
|
.Select(page => new ParsedPdfPageGeometry(
|
||||||
|
int.Parse(page.Attribute("number")?.Value ?? "1"),
|
||||||
|
int.Parse(page.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing page width attribute.")),
|
||||||
|
int.Parse(page.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing page height attribute."))))
|
||||||
|
.ToList();
|
||||||
|
}
|
||||||
|
|
||||||
internal static List<XmlTextFragment> FindRowLabelFragments(
|
internal static List<XmlTextFragment> FindRowLabelFragments(
|
||||||
IReadOnlyList<XmlTextFragment> fragments,
|
IReadOnlyList<XmlTextFragment> fragments,
|
||||||
int leftCutoff,
|
int leftCutoff,
|
||||||
@@ -143,7 +163,7 @@ internal static class CriticalTableParserSupport
|
|||||||
return columns[^1].Key;
|
return columns[^1].Key;
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static IReadOnlyList<string> BuildLines(IReadOnlyList<XmlTextFragment> fragments)
|
internal static IReadOnlyList<ColumnarCellLine> BuildLines(IReadOnlyList<XmlTextFragment> fragments)
|
||||||
{
|
{
|
||||||
var lines = new List<List<XmlTextFragment>>();
|
var lines = new List<List<XmlTextFragment>>();
|
||||||
|
|
||||||
@@ -159,8 +179,10 @@ internal static class CriticalTableParserSupport
|
|||||||
}
|
}
|
||||||
|
|
||||||
return lines
|
return lines
|
||||||
.Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))))
|
.Select(line => new ColumnarCellLine(
|
||||||
.Where(item => !string.IsNullOrWhiteSpace(item))
|
CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))),
|
||||||
|
line.OrderBy(item => item.Left).ToList()))
|
||||||
|
.Where(item => !string.IsNullOrWhiteSpace(item.Text))
|
||||||
.ToList();
|
.ToList();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -516,7 +538,7 @@ internal static class CriticalTableParserSupport
|
|||||||
}
|
}
|
||||||
|
|
||||||
var leadingAffixCount = 0;
|
var leadingAffixCount = 0;
|
||||||
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount].Text, affixLegendSymbols))
|
||||||
{
|
{
|
||||||
leadingAffixCount++;
|
leadingAffixCount++;
|
||||||
}
|
}
|
||||||
@@ -564,7 +586,9 @@ internal static class CriticalTableParserSupport
|
|||||||
|
|
||||||
foreach (var cellEntry in cellEntries)
|
foreach (var cellEntry in cellEntries)
|
||||||
{
|
{
|
||||||
var content = SharedParsing.CriticalCellTextParser.Parse(cellEntry.Lines, sharedLegend);
|
var lineTexts = cellEntry.Lines.Select(line => line.Text).ToList();
|
||||||
|
var content = SharedParsing.CriticalCellTextParser.Parse(lineTexts, sharedLegend);
|
||||||
|
var sourceBounds = BuildSourceBounds(cellEntry.Lines.SelectMany(line => line.Fragments).ToList());
|
||||||
validationErrors.AddRange(content.ValidationErrors.Select(error =>
|
validationErrors.AddRange(content.ValidationErrors.Select(error =>
|
||||||
$"Cell '{BuildCellIdentifier(cellEntry)}': {error}"));
|
$"Cell '{BuildCellIdentifier(cellEntry)}': {error}"));
|
||||||
|
|
||||||
@@ -575,13 +599,14 @@ internal static class CriticalTableParserSupport
|
|||||||
cellEntry.GroupKey,
|
cellEntry.GroupKey,
|
||||||
cellEntry.RollBandLabel,
|
cellEntry.RollBandLabel,
|
||||||
cellEntry.ColumnKey,
|
cellEntry.ColumnKey,
|
||||||
cellEntry.Lines.ToList(),
|
lineTexts,
|
||||||
content.BaseLines,
|
content.BaseLines,
|
||||||
content.RawCellText,
|
content.RawCellText,
|
||||||
content.DescriptionText,
|
content.DescriptionText,
|
||||||
content.RawAffixText,
|
content.RawAffixText,
|
||||||
effects,
|
effects,
|
||||||
branches));
|
branches,
|
||||||
|
sourceBounds));
|
||||||
|
|
||||||
parsedResults.Add(new ParsedCriticalResult(
|
parsedResults.Add(new ParsedCriticalResult(
|
||||||
cellEntry.GroupKey,
|
cellEntry.GroupKey,
|
||||||
@@ -591,10 +616,37 @@ internal static class CriticalTableParserSupport
|
|||||||
content.DescriptionText,
|
content.DescriptionText,
|
||||||
content.RawAffixText,
|
content.RawAffixText,
|
||||||
effects,
|
effects,
|
||||||
branches));
|
branches,
|
||||||
|
sourceBounds));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static ParsedCriticalSourceRect BuildSourceBounds(IReadOnlyList<XmlTextFragment> fragments)
|
||||||
|
{
|
||||||
|
if (fragments.Count == 0)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException("Cannot build source bounds for an empty fragment set.");
|
||||||
|
}
|
||||||
|
|
||||||
|
var pageNumber = fragments[0].PageNumber;
|
||||||
|
if (fragments.Any(fragment => fragment.PageNumber != pageNumber))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException("A parsed cell spans multiple PDF pages, which cannot be cropped reliably.");
|
||||||
|
}
|
||||||
|
|
||||||
|
var left = fragments.Min(fragment => fragment.Left);
|
||||||
|
var top = fragments.Min(fragment => fragment.Top);
|
||||||
|
var right = fragments.Max(fragment => fragment.Left + fragment.Width);
|
||||||
|
var bottom = fragments.Max(fragment => fragment.Top + fragment.Height);
|
||||||
|
|
||||||
|
return new ParsedCriticalSourceRect(
|
||||||
|
pageNumber,
|
||||||
|
left,
|
||||||
|
top,
|
||||||
|
Math.Max(1, right - left),
|
||||||
|
Math.Max(1, bottom - top));
|
||||||
|
}
|
||||||
|
|
||||||
private static SharedParsing.AffixLegend ToSharedAffixLegend(AffixLegend affixLegend) =>
|
private static SharedParsing.AffixLegend ToSharedAffixLegend(AffixLegend affixLegend) =>
|
||||||
new(
|
new(
|
||||||
affixLegend.SymbolEffects,
|
affixLegend.SymbolEffects,
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ public sealed class GroupedVariantCriticalTableParser
|
|||||||
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||||
{
|
{
|
||||||
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
||||||
|
var pageGeometries = CriticalTableParserSupport.LoadPageGeometries(xmlContent);
|
||||||
var groupHeaders = FindGroupHeaders(fragments);
|
var groupHeaders = FindGroupHeaders(fragments);
|
||||||
var columnHeaders = FindColumnHeaders(fragments);
|
var columnHeaders = FindColumnHeaders(fragments);
|
||||||
var validationErrors = new List<string>();
|
var validationErrors = new List<string>();
|
||||||
@@ -141,7 +142,7 @@ public sealed class GroupedVariantCriticalTableParser
|
|||||||
parsedRollBands,
|
parsedRollBands,
|
||||||
parsedResults);
|
parsedResults);
|
||||||
|
|
||||||
return new CriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
return new CriticalTableParseResult(table, pageGeometries, fragments, parsedCells, validationReport);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<XmlTextFragment> FindGroupHeaders(IReadOnlyList<XmlTextFragment> fragments)
|
private static List<XmlTextFragment> FindGroupHeaders(IReadOnlyList<XmlTextFragment> fragments)
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ public sealed class ParsedCriticalCellArtifact(
|
|||||||
string descriptionText,
|
string descriptionText,
|
||||||
string? rawAffixText,
|
string? rawAffixText,
|
||||||
IReadOnlyList<ParsedCriticalEffect> effects,
|
IReadOnlyList<ParsedCriticalEffect> effects,
|
||||||
IReadOnlyList<ParsedCriticalBranch> branches)
|
IReadOnlyList<ParsedCriticalBranch> branches,
|
||||||
|
ParsedCriticalSourceRect sourceBounds)
|
||||||
{
|
{
|
||||||
public string? GroupKey { get; } = groupKey;
|
public string? GroupKey { get; } = groupKey;
|
||||||
public string RollBandLabel { get; } = rollBandLabel;
|
public string RollBandLabel { get; } = rollBandLabel;
|
||||||
@@ -22,4 +23,7 @@ public sealed class ParsedCriticalCellArtifact(
|
|||||||
public string? RawAffixText { get; } = rawAffixText;
|
public string? RawAffixText { get; } = rawAffixText;
|
||||||
public IReadOnlyList<ParsedCriticalEffect> Effects { get; } = effects;
|
public IReadOnlyList<ParsedCriticalEffect> Effects { get; } = effects;
|
||||||
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
|
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
|
||||||
|
public ParsedCriticalSourceRect SourceBounds { get; } = sourceBounds;
|
||||||
|
public string? SourceImagePath { get; set; }
|
||||||
|
public CriticalSourceImageCrop? SourceImageCrop { get; set; }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,8 @@ public sealed class ParsedCriticalResult(
|
|||||||
string descriptionText,
|
string descriptionText,
|
||||||
string? rawAffixText,
|
string? rawAffixText,
|
||||||
IReadOnlyList<ParsedCriticalEffect> effects,
|
IReadOnlyList<ParsedCriticalEffect> effects,
|
||||||
IReadOnlyList<ParsedCriticalBranch> branches)
|
IReadOnlyList<ParsedCriticalBranch> branches,
|
||||||
|
ParsedCriticalSourceRect sourceBounds)
|
||||||
{
|
{
|
||||||
public string? GroupKey { get; } = groupKey;
|
public string? GroupKey { get; } = groupKey;
|
||||||
public string ColumnKey { get; } = columnKey;
|
public string ColumnKey { get; } = columnKey;
|
||||||
@@ -18,4 +19,7 @@ public sealed class ParsedCriticalResult(
|
|||||||
public string? RawAffixText { get; } = rawAffixText;
|
public string? RawAffixText { get; } = rawAffixText;
|
||||||
public IReadOnlyList<ParsedCriticalEffect> Effects { get; } = effects;
|
public IReadOnlyList<ParsedCriticalEffect> Effects { get; } = effects;
|
||||||
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
|
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
|
||||||
|
public ParsedCriticalSourceRect SourceBounds { get; } = sourceBounds;
|
||||||
|
public string? SourceImagePath { get; set; }
|
||||||
|
public CriticalSourceImageCrop? SourceImageCrop { get; set; }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,15 @@
|
|||||||
|
namespace RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
|
public sealed class ParsedCriticalSourceRect(
|
||||||
|
int pageNumber,
|
||||||
|
int left,
|
||||||
|
int top,
|
||||||
|
int width,
|
||||||
|
int height)
|
||||||
|
{
|
||||||
|
public int PageNumber { get; } = pageNumber;
|
||||||
|
public int Left { get; } = left;
|
||||||
|
public int Top { get; } = top;
|
||||||
|
public int Width { get; } = width;
|
||||||
|
public int Height { get; } = height;
|
||||||
|
}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
namespace RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
|
public sealed class ParsedPdfPageGeometry(int pageNumber, int width, int height)
|
||||||
|
{
|
||||||
|
public int PageNumber { get; } = pageNumber;
|
||||||
|
public int Width { get; } = width;
|
||||||
|
public int Height { get; } = height;
|
||||||
|
}
|
||||||
@@ -5,6 +5,7 @@ public sealed class StandardCriticalTableParser
|
|||||||
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||||
{
|
{
|
||||||
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
||||||
|
var pageGeometries = CriticalTableParserSupport.LoadPageGeometries(xmlContent);
|
||||||
var headerFragments = FindHeaderFragments(fragments);
|
var headerFragments = FindHeaderFragments(fragments);
|
||||||
var validationErrors = new List<string>();
|
var validationErrors = new List<string>();
|
||||||
var validationWarnings = new List<string>();
|
var validationWarnings = new List<string>();
|
||||||
@@ -121,7 +122,7 @@ public sealed class StandardCriticalTableParser
|
|||||||
parsedRollBands,
|
parsedRollBands,
|
||||||
parsedResults);
|
parsedResults);
|
||||||
|
|
||||||
return new CriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
return new CriticalTableParseResult(table, pageGeometries, fragments, parsedCells, validationReport);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<XmlTextFragment> FindHeaderFragments(IReadOnlyList<XmlTextFragment> fragments)
|
private static List<XmlTextFragment> FindHeaderFragments(IReadOnlyList<XmlTextFragment> fragments)
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ public sealed class VariantColumnCriticalTableParser
|
|||||||
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||||
{
|
{
|
||||||
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
||||||
|
var pageGeometries = CriticalTableParserSupport.LoadPageGeometries(xmlContent);
|
||||||
var headerFragments = FindHeaderFragments(fragments);
|
var headerFragments = FindHeaderFragments(fragments);
|
||||||
var validationErrors = new List<string>();
|
var validationErrors = new List<string>();
|
||||||
var validationWarnings = new List<string>();
|
var validationWarnings = new List<string>();
|
||||||
@@ -137,7 +138,7 @@ public sealed class VariantColumnCriticalTableParser
|
|||||||
parsedRollBands,
|
parsedRollBands,
|
||||||
parsedResults);
|
parsedResults);
|
||||||
|
|
||||||
return new CriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
return new CriticalTableParseResult(table, pageGeometries, fragments, parsedCells, validationReport);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<XmlTextFragment> FindHeaderFragments(IReadOnlyList<XmlTextFragment> fragments)
|
private static List<XmlTextFragment> FindHeaderFragments(IReadOnlyList<XmlTextFragment> fragments)
|
||||||
|
|||||||
Reference in New Issue
Block a user