namespace RolemasterDb.ImportTool.Parsing; public sealed class VariantColumnCriticalTableParser { private static readonly ColumnDefinition[] ExpectedColumns = [ new("NORMAL", "Normal"), new("MAGIC", "Magic"), new("MITHRIL", "Mithril"), new("HOLY_ARMS", "Holy Arms"), new("SLAYING", "Slaying") ]; public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent) { var fragments = CriticalTableParserSupport.LoadFragments(xmlContent); var headerFragments = FindHeaderFragments(fragments); var validationErrors = new List(); var validationWarnings = new List(); var columnAnchors = headerFragments .OrderBy(item => item.Left) .Select(item => { var definition = ResolveColumnDefinition(item.Text); return (definition.Key, definition.Label, item.CenterX); }) .ToList(); var headerTop = headerFragments.Max(item => item.Top); var keyTop = CriticalTableParserSupport.FindKeyTop(fragments); var affixLegend = CriticalTableParserSupport.ParseAffixLegend(fragments, keyTop); var affixLegendSymbols = affixLegend.ClassificationSymbols; var leftCutoff = headerFragments.Min(item => item.Left) - 10; var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments( fragments, leftCutoff, headerTop + CriticalTableParserSupport.HeaderToRowLabelMinimumGap, keyTop); var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments); var bodyStartTop = CriticalTableParserSupport.ResolveBodyStartTop(headerTop, rowAnchors); if (rowAnchors.Count == 0) { validationErrors.Add("No roll-band labels were found in the XML artifact."); } var columnCenters = columnAnchors .Select(item => (item.Key, item.CenterX)) .ToList(); var bodyFragments = CriticalTableParserSupport.BuildBodyFragments( fragments, bodyStartTop, keyTop, leftCutoff, rowAnchors, headerFragments, columnCenters, affixLegendSymbols); var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols); var parsedRollBands = rowAnchors .Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder)) .ToList(); var cellEntries = new List(); for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++) { var rowStart = rowIndex == 0 ? bodyStartTop : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); var rowEnd = rowIndex == rowAnchors.Count - 1 ? keyTop - 1 : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); var rowFragments = bodyFragments .Where(item => item.Top >= rowStart && item.Top < rowEnd) .ToList(); foreach (var columnAnchor in columnAnchors) { var cellFragments = rowFragments .Where(item => CriticalTableParserSupport.ResolveColumn(item.CenterX, columnCenters) == columnAnchor.Key) .OrderBy(item => item.Top) .ThenBy(item => item.Left) .ToList(); if (cellFragments.Count == 0) { validationErrors.Add($"Missing content for roll band '{rowAnchors[rowIndex].Label}', column '{columnAnchor.Key}'."); continue; } cellEntries.Add(new ColumnarCellEntry( null, rowAnchors[rowIndex].Label, rowIndex, columnAnchor.Key, CriticalTableParserSupport.BuildLines(cellFragments).ToList())); } } CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); var parsedCells = new List(); var parsedResults = new List(); CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegend, parsedCells, parsedResults, validationErrors); if (columnAnchors.Count != ExpectedColumns.Length) { validationErrors.Add($"Expected {ExpectedColumns.Length} variant columns but found {columnAnchors.Count}."); } if (parsedCells.Count != rowAnchors.Count * columnAnchors.Count) { validationErrors.Add($"Expected {rowAnchors.Count * columnAnchors.Count} parsed cells but produced {parsedCells.Count}."); } var validationReport = new ImportValidationReport( validationErrors.Count == 0, validationErrors, validationWarnings, rowAnchors.Count, parsedCells.Count); var table = new ParsedCriticalTable( entry.Slug, entry.DisplayName, entry.Family, Path.GetFileName(entry.PdfPath), "Imported from PDF XML extraction.", [], ExpectedColumns.Select((item, index) => new ParsedCriticalColumn(item.Key, item.Label, "variant", index + 1)).ToList(), parsedRollBands, parsedResults); return new CriticalTableParseResult(table, fragments, parsedCells, validationReport); } private static List FindHeaderFragments(IReadOnlyList fragments) { var expectedLabels = ExpectedColumns .Select(item => item.Label.ToLowerInvariant()) .ToList(); var headerCandidates = fragments .Where(item => expectedLabels.Contains(item.Text.Trim().ToLowerInvariant(), StringComparer.Ordinal)) .OrderBy(item => item.Top) .ThenBy(item => item.Left) .ToList(); foreach (var group in CriticalTableParserSupport.GroupByTop(headerCandidates)) { var ordered = group.OrderBy(item => item.Left).ToList(); var labels = ordered.Select(item => item.Text.Trim().ToLowerInvariant()).ToList(); if (labels.SequenceEqual(expectedLabels)) { return ordered; } } throw new InvalidOperationException("Could not find the variant-column header row in the XML artifact."); } private static ColumnDefinition ResolveColumnDefinition(string value) => ExpectedColumns.SingleOrDefault(item => string.Equals(item.Label, value.Trim(), StringComparison.OrdinalIgnoreCase)) ?? throw new InvalidOperationException($"Unsupported variant column label '{value}'."); private sealed record ColumnDefinition(string Key, string Label); }