174 lines
7.0 KiB
C#
174 lines
7.0 KiB
C#
namespace RolemasterDb.ImportTool.Parsing;
|
|
|
|
public sealed class VariantColumnCriticalTableParser
|
|
{
|
|
private static readonly ColumnDefinition[] ExpectedColumns =
|
|
[
|
|
new("NORMAL", "Normal"),
|
|
new("MAGIC", "Magic"),
|
|
new("MITHRIL", "Mithril"),
|
|
new("HOLY_ARMS", "Holy Arms"),
|
|
new("SLAYING", "Slaying")
|
|
];
|
|
|
|
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
|
{
|
|
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
|
var headerFragments = FindHeaderFragments(fragments);
|
|
var validationErrors = new List<string>();
|
|
var validationWarnings = new List<string>();
|
|
|
|
var columnAnchors = headerFragments
|
|
.OrderBy(item => item.Left)
|
|
.Select(item =>
|
|
{
|
|
var definition = ResolveColumnDefinition(item.Text);
|
|
return (definition.Key, definition.Label, item.CenterX);
|
|
})
|
|
.ToList();
|
|
|
|
var headerTop = headerFragments.Max(item => item.Top);
|
|
var keyTop = CriticalTableParserSupport.FindKeyTop(fragments);
|
|
var affixLegend = CriticalTableParserSupport.ParseAffixLegend(fragments, keyTop);
|
|
var affixLegendSymbols = affixLegend.ClassificationSymbols;
|
|
var leftCutoff = headerFragments.Min(item => item.Left) - 10;
|
|
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
|
|
fragments,
|
|
leftCutoff,
|
|
headerTop + CriticalTableParserSupport.HeaderToRowLabelMinimumGap,
|
|
keyTop);
|
|
var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments);
|
|
var bodyStartTop = CriticalTableParserSupport.ResolveBodyStartTop(headerTop, rowAnchors);
|
|
|
|
if (rowAnchors.Count == 0)
|
|
{
|
|
validationErrors.Add("No roll-band labels were found in the XML artifact.");
|
|
}
|
|
|
|
var columnCenters = columnAnchors
|
|
.Select(item => (item.Key, item.CenterX))
|
|
.ToList();
|
|
|
|
var bodyFragments = CriticalTableParserSupport.BuildBodyFragments(
|
|
fragments,
|
|
bodyStartTop,
|
|
keyTop,
|
|
leftCutoff,
|
|
rowAnchors,
|
|
headerFragments,
|
|
columnCenters,
|
|
affixLegendSymbols);
|
|
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
|
|
|
var parsedRollBands = rowAnchors
|
|
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
|
|
.ToList();
|
|
|
|
var cellEntries = new List<ColumnarCellEntry>();
|
|
|
|
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
|
|
{
|
|
var rowStart = rowIndex == 0
|
|
? bodyStartTop
|
|
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
|
|
|
|
var rowEnd = rowIndex == rowAnchors.Count - 1
|
|
? keyTop - 1
|
|
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
|
|
|
|
var rowFragments = bodyFragments
|
|
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
|
|
.ToList();
|
|
|
|
foreach (var columnAnchor in columnAnchors)
|
|
{
|
|
var cellFragments = rowFragments
|
|
.Where(item => CriticalTableParserSupport.ResolveColumn(item.CenterX, columnCenters) == columnAnchor.Key)
|
|
.OrderBy(item => item.Top)
|
|
.ThenBy(item => item.Left)
|
|
.ToList();
|
|
|
|
if (cellFragments.Count == 0)
|
|
{
|
|
validationErrors.Add($"Missing content for roll band '{rowAnchors[rowIndex].Label}', column '{columnAnchor.Key}'.");
|
|
continue;
|
|
}
|
|
|
|
cellEntries.Add(new ColumnarCellEntry(
|
|
null,
|
|
rowAnchors[rowIndex].Label,
|
|
rowIndex,
|
|
columnAnchor.Key,
|
|
CriticalTableParserSupport.BuildLines(cellFragments).ToList()));
|
|
}
|
|
}
|
|
|
|
CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
|
|
|
|
var parsedCells = new List<ParsedCriticalCellArtifact>();
|
|
var parsedResults = new List<ParsedCriticalResult>();
|
|
CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegend, parsedCells, parsedResults, validationErrors);
|
|
|
|
if (columnAnchors.Count != ExpectedColumns.Length)
|
|
{
|
|
validationErrors.Add($"Expected {ExpectedColumns.Length} variant columns but found {columnAnchors.Count}.");
|
|
}
|
|
|
|
if (parsedCells.Count != rowAnchors.Count * columnAnchors.Count)
|
|
{
|
|
validationErrors.Add($"Expected {rowAnchors.Count * columnAnchors.Count} parsed cells but produced {parsedCells.Count}.");
|
|
}
|
|
|
|
var validationReport = new ImportValidationReport(
|
|
validationErrors.Count == 0,
|
|
validationErrors,
|
|
validationWarnings,
|
|
rowAnchors.Count,
|
|
parsedCells.Count);
|
|
|
|
var table = new ParsedCriticalTable(
|
|
entry.Slug,
|
|
entry.DisplayName,
|
|
entry.Family,
|
|
Path.GetFileName(entry.PdfPath),
|
|
"Imported from PDF XML extraction.",
|
|
[],
|
|
ExpectedColumns.Select((item, index) => new ParsedCriticalColumn(item.Key, item.Label, "variant", index + 1)).ToList(),
|
|
parsedRollBands,
|
|
parsedResults);
|
|
|
|
return new CriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
|
}
|
|
|
|
private static List<XmlTextFragment> FindHeaderFragments(IReadOnlyList<XmlTextFragment> fragments)
|
|
{
|
|
var expectedLabels = ExpectedColumns
|
|
.Select(item => item.Label.ToLowerInvariant())
|
|
.ToList();
|
|
|
|
var headerCandidates = fragments
|
|
.Where(item => expectedLabels.Contains(item.Text.Trim().ToLowerInvariant(), StringComparer.Ordinal))
|
|
.OrderBy(item => item.Top)
|
|
.ThenBy(item => item.Left)
|
|
.ToList();
|
|
|
|
foreach (var group in CriticalTableParserSupport.GroupByTop(headerCandidates))
|
|
{
|
|
var ordered = group.OrderBy(item => item.Left).ToList();
|
|
var labels = ordered.Select(item => item.Text.Trim().ToLowerInvariant()).ToList();
|
|
if (labels.SequenceEqual(expectedLabels))
|
|
{
|
|
return ordered;
|
|
}
|
|
}
|
|
|
|
throw new InvalidOperationException("Could not find the variant-column header row in the XML artifact.");
|
|
}
|
|
|
|
private static ColumnDefinition ResolveColumnDefinition(string value) =>
|
|
ExpectedColumns.SingleOrDefault(item => string.Equals(item.Label, value.Trim(), StringComparison.OrdinalIgnoreCase))
|
|
?? throw new InvalidOperationException($"Unsupported variant column label '{value}'.");
|
|
|
|
private sealed record ColumnDefinition(string Key, string Label);
|
|
}
|