From c7467aad13797d84b265ce7d1f030f95649e09a2 Mon Sep 17 00:00:00 2001 From: Frank Tovar Date: Sat, 14 Mar 2026 02:29:28 +0100 Subject: [PATCH] Finalize phase 3 mana critical imports --- docs/critical_import_tool.md | 6 +- sources/critical-import-manifest.json | 8 + ...dardCriticalTableParserIntegrationTests.cs | 33 ++++ .../Parsing/StandardCriticalTableParser.cs | 141 ++++++++++++++++-- 4 files changed, 175 insertions(+), 13 deletions(-) diff --git a/docs/critical_import_tool.md b/docs/critical_import_tool.md index 643e11d..cf0dbc8 100644 --- a/docs/critical_import_tool.md +++ b/docs/critical_import_tool.md @@ -44,6 +44,7 @@ The current implementation supports: - `krush` - `ma-strikes` - `ma-sweeps` + - `mana` - `puncture` - `slash` - `subdual` @@ -57,7 +58,6 @@ The current implementation does not yet support: - variant-column critical tables - grouped variant tables -- `Mana.pdf`, whose current XML layout and affix notation still need a dedicated parser pass - OCR/image-based PDFs such as `Void.pdf` - normalized `critical_branch` population - normalized `critical_effect` population @@ -228,6 +228,7 @@ The currently enabled phase-3 table set is: - `krush` - `ma-strikes` - `ma-sweeps` +- `mana` - `puncture` - `slash` - `subdual` @@ -237,9 +238,10 @@ The currently enabled phase-3 table set is: Current phase-3 notes: - header detection now tolerates minor `top` misalignment across the `A-E` header glyphs +- row boundaries can snap to the last affix-to-prose transition between adjacent roll labels when midpoint slicing would leak into the next row +- affix symbols are learned from the footer legend before body parsing, so symbol-only affix fragments are classified correctly - footer page numbers are filtered out before body parsing - validation allows a single contiguous affix block either before or after prose -- `Mana.pdf` is intentionally left out for now because its row-anchor geometry and notation still need dedicated handling ### Phase 4: Variant and Grouped Tables diff --git a/sources/critical-import-manifest.json b/sources/critical-import-manifest.json index 97893bf..0c1953e 100644 --- a/sources/critical-import-manifest.json +++ b/sources/critical-import-manifest.json @@ -96,6 +96,14 @@ "pdfPath": "sources/MA Sweeps.pdf", "enabled": true }, + { + "slug": "mana", + "displayName": "Mana Critical Strike Table", + "family": "standard", + "extractionMethod": "xml", + "pdfPath": "sources/Mana.pdf", + "enabled": true + }, { "slug": "puncture", "displayName": "Puncture Critical Strike Table", diff --git a/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs b/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs index 0d1ca98..bc957b5 100644 --- a/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs +++ b/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs @@ -18,6 +18,7 @@ public sealed class StandardCriticalTableParserIntegrationTests "krush", "ma-strikes", "ma-sweeps", + "mana", "puncture", "slash", "subdual", @@ -41,6 +42,8 @@ public sealed class StandardCriticalTableParserIntegrationTests yield return ["ballistic-shrapnel", "86-90", "E", "destroy his heart"]; yield return ["arcane-aether", "96-99", "E", "smoking pulp"]; yield return ["ma-strikes", "96-99", "E", "drives bone into brain"]; + yield return ["mana", "96-99", "E", "momentarily transformed"]; + yield return ["mana", "100", "E", "Mana consumes everything"]; yield return ["tiny", "100", "E", "Vein and artery severed"]; } @@ -104,6 +107,36 @@ public sealed class StandardCriticalTableParserIntegrationTests Assert.StartsWith("You recover from your initial swing", result.RawCellText, StringComparison.Ordinal); } + [Fact] + public async Task Mana_boundary_repair_keeps_96_99_and_100_cells_separated() + { + var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal)); + var parseResult = await LoadParseResultAsync(entry); + var row96E = parseResult.Table.Results.Single(item => + string.Equals(item.RollBandLabel, "96-99", StringComparison.Ordinal) && + string.Equals(item.ColumnKey, "E", StringComparison.Ordinal)); + var row100E = parseResult.Table.Results.Single(item => + string.Equals(item.RollBandLabel, "100", StringComparison.Ordinal) && + string.Equals(item.ColumnKey, "E", StringComparison.Ordinal)); + + Assert.Contains("momentarily transformed", row96E.DescriptionText, StringComparison.OrdinalIgnoreCase); + Assert.DoesNotContain("Mana consumes everything", row96E.DescriptionText, StringComparison.OrdinalIgnoreCase); + Assert.StartsWith("Mana consumes everything.", row100E.DescriptionText, StringComparison.Ordinal); + } + + [Fact] + public async Task Mana_symbol_only_affix_lines_do_not_pollute_descriptions() + { + var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal)); + var parseResult = await LoadParseResultAsync(entry); + var row100C = parseResult.Table.Results.Single(item => + string.Equals(item.RollBandLabel, "100", StringComparison.Ordinal) && + string.Equals(item.ColumnKey, "C", StringComparison.Ordinal)); + + Assert.DoesNotContain('\uF052', row100C.DescriptionText); + Assert.DoesNotContain('\uF06C', row100C.DescriptionText); + } + private static async Task LoadParseResultAsync(CriticalImportManifestEntry entry) { var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml"); diff --git a/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs index a0e7508..ea572cb 100644 --- a/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs +++ b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs @@ -34,6 +34,7 @@ public sealed class StandardCriticalTableParser item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase)) .Select(item => (int?)item.Top) .Min() ?? int.MaxValue; + var affixLegendSymbols = DetectAffixLegendSymbols(fragments, keyTop); var rowLabelFragments = FindRowLabelFragments(fragments, headerFragments, keyTop); var rowAnchors = rowLabelFragments @@ -49,11 +50,12 @@ public sealed class StandardCriticalTableParser var bodyFragments = fragments .Where(item => item.Top >= bodyStartTop && - item.Top < keyTop - 1 && + item.Top < keyTop - TopGroupingTolerance && !IsFooterPageNumberFragment(item, keyTop) && !rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, item.Text, StringComparison.OrdinalIgnoreCase)) && !headerFragments.Contains(item)) .ToList(); + var bodyLines = BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols); var parsedRollBands = rowAnchors .Select(anchor => CreateRollBand(anchor.Label, anchor.SortOrder)) @@ -65,11 +67,11 @@ public sealed class StandardCriticalTableParser { var rowStart = rowIndex == 0 ? bodyStartTop - : (int)Math.Floor((rowAnchors[rowIndex - 1].Top + rowAnchors[rowIndex].Top) / 2.0) + 1; + : ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); var rowEnd = rowIndex == rowAnchors.Count - 1 ? keyTop - 1 - : (int)Math.Floor((rowAnchors[rowIndex].Top + rowAnchors[rowIndex + 1].Top) / 2.0) + 1; + : ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); var rowFragments = bodyFragments .Where(item => item.Top >= rowStart && item.Top < rowEnd) @@ -97,14 +99,14 @@ public sealed class StandardCriticalTableParser } } - RepairLeadingAffixLeakage(cellEntries); + RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); var parsedCells = new List(); var parsedResults = new List(); foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey)) { - var segmentCount = CountLineTypeSegments(cellEntry.Lines); + var segmentCount = CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols); if (segmentCount > 2) { @@ -112,8 +114,8 @@ public sealed class StandardCriticalTableParser $"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines."); } - var rawAffixLines = cellEntry.Lines.Where(IsAffixLikeLine).ToList(); - var descriptionLines = cellEntry.Lines.Where(line => !IsAffixLikeLine(line)).ToList(); + var rawAffixLines = cellEntry.Lines.Where(line => IsAffixLikeLine(line, affixLegendSymbols)).ToList(); + var descriptionLines = cellEntry.Lines.Where(line => !IsAffixLikeLine(line, affixLegendSymbols)).ToList(); var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines); var descriptionText = CollapseWhitespace(string.Join(' ', descriptionLines)); var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines); @@ -301,7 +303,7 @@ public sealed class StandardCriticalTableParser .ToList(); } - private static bool IsAffixLikeLine(string line) + private static bool IsAffixLikeLine(string line, ISet affixLegendSymbols) { var value = line.Trim(); if (value.Length == 0) @@ -325,6 +327,33 @@ public sealed class StandardCriticalTableParser return value.Contains(':', StringComparison.Ordinal); } + if (affixLegendSymbols.Count > 0 && + affixLegendSymbols.Any(symbol => value.Contains(symbol, StringComparison.Ordinal))) + { + if (value.Any(char.IsDigit)) + { + return true; + } + + var remainder = value; + foreach (var symbol in affixLegendSymbols.OrderByDescending(item => item.Length)) + { + remainder = remainder.Replace(symbol, string.Empty, StringComparison.Ordinal); + } + + remainder = remainder + .Replace("+", string.Empty, StringComparison.Ordinal) + .Replace("-", string.Empty, StringComparison.Ordinal) + .Replace("(", string.Empty, StringComparison.Ordinal) + .Replace(")", string.Empty, StringComparison.Ordinal) + .Replace("/", string.Empty, StringComparison.Ordinal); + + if (string.IsNullOrWhiteSpace(remainder)) + { + return true; + } + } + return value.StartsWith("+", StringComparison.Ordinal) || value.StartsWith("\u2211", StringComparison.Ordinal) || value.StartsWith("\u220F", StringComparison.Ordinal) || @@ -336,6 +365,9 @@ public sealed class StandardCriticalTableParser } private static void RepairLeadingAffixLeakage(List cellEntries) + => RepairLeadingAffixLeakage(cellEntries, new HashSet(StringComparer.Ordinal)); + + private static void RepairLeadingAffixLeakage(List cellEntries, ISet affixLegendSymbols) { var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex); var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList(); @@ -353,7 +385,7 @@ public sealed class StandardCriticalTableParser } var leadingAffixCount = 0; - while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount])) + while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols)) { leadingAffixCount++; } @@ -379,14 +411,14 @@ public sealed class StandardCriticalTableParser .Replace('\n', ' ') .Trim(); - private static int CountLineTypeSegments(IReadOnlyList lines) + private static int CountLineTypeSegments(IReadOnlyList lines, ISet affixLegendSymbols) { var segmentCount = 0; bool? previousIsAffix = null; foreach (var line in lines) { - var currentIsAffix = IsAffixLikeLine(line); + var currentIsAffix = IsAffixLikeLine(line, affixLegendSymbols); if (previousIsAffix == currentIsAffix) { continue; @@ -399,6 +431,91 @@ public sealed class StandardCriticalTableParser return segmentCount; } + private static HashSet DetectAffixLegendSymbols(IReadOnlyList fragments, int keyTop) + { + if (keyTop == int.MaxValue) + { + return []; + } + + var footerLines = GroupByTop(fragments + .Where(item => item.Top >= keyTop - TopGroupingTolerance) + .OrderBy(item => item.Top) + .ThenBy(item => item.Left) + .ToList()) + .Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text)))) + .ToList(); + + var symbols = new HashSet(StringComparer.Ordinal); + + foreach (var footerLine in footerLines) + { + AddLegendMatch(symbols, footerLine, @"must parry\s*=\s*(\S)"); + AddLegendMatch(symbols, footerLine, @"no parry\s*=\s*(\S)"); + AddLegendMatch(symbols, footerLine, @"stun(?:ned)?\s*=\s*(\S)"); + AddLegendMatch(symbols, footerLine, @"bleed\s*=\s*(\S)"); + AddLegendMatch(symbols, footerLine, @"powerpoint modification.*=\s*(\S)"); + } + + return symbols; + } + + private static void AddLegendMatch(HashSet symbols, string value, string pattern) + { + foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase)) + { + if (match.Groups.Count > 1) + { + symbols.Add(match.Groups[1].Value); + } + } + } + + private static List BuildBodyLines( + IReadOnlyList bodyFragments, + IReadOnlyList columnCenters, + ISet affixLegendSymbols) + { + var bodyLines = new List(); + + foreach (var lineFragments in GroupByTop(bodyFragments.OrderBy(item => item.Top).ThenBy(item => item.Left).ToList())) + { + var columnTexts = lineFragments + .GroupBy(item => ResolveColumn(item.CenterX, columnCenters), StringComparer.OrdinalIgnoreCase) + .Select(group => CollapseWhitespace(string.Join(' ', group.OrderBy(item => item.Left).Select(item => item.Text)))) + .Where(item => !string.IsNullOrWhiteSpace(item)) + .ToList(); + + var isAffixLike = columnTexts.Count > 0 && + columnTexts.All(text => IsAffixLikeLine(text, affixLegendSymbols)); + + bodyLines.Add(new BodyLine(lineFragments[0].Top, isAffixLike)); + } + + return bodyLines; + } + + private static int ResolveRowBoundaryTop( + RowAnchor current, + RowAnchor next, + IReadOnlyList bodyLines) + { + var linesBetweenLabels = bodyLines + .Where(item => item.Top >= current.Top && item.Top < next.Top) + .OrderBy(item => item.Top) + .ToList(); + + for (var index = linesBetweenLabels.Count - 2; index >= 0; index--) + { + if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike) + { + return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1; + } + } + + return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1; + } + private static bool IsFooterPageNumberFragment(XmlTextFragment fragment, int keyTop) { if (keyTop == int.MaxValue) @@ -432,6 +549,8 @@ public sealed class StandardCriticalTableParser private sealed record RowAnchor(string Label, int Top, int SortOrder); + private sealed record BodyLine(int Top, bool IsAffixLike); + private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List lines) { public string RollBandLabel { get; } = rollBandLabel;