Finalize phase 3 mana critical imports

This commit is contained in:
2026-03-14 02:29:28 +01:00
parent 216cfd3433
commit c7467aad13
4 changed files with 175 additions and 13 deletions

View File

@@ -44,6 +44,7 @@ The current implementation supports:
- `krush`
- `ma-strikes`
- `ma-sweeps`
- `mana`
- `puncture`
- `slash`
- `subdual`
@@ -57,7 +58,6 @@ The current implementation does not yet support:
- variant-column critical tables
- grouped variant tables
- `Mana.pdf`, whose current XML layout and affix notation still need a dedicated parser pass
- OCR/image-based PDFs such as `Void.pdf`
- normalized `critical_branch` population
- normalized `critical_effect` population
@@ -228,6 +228,7 @@ The currently enabled phase-3 table set is:
- `krush`
- `ma-strikes`
- `ma-sweeps`
- `mana`
- `puncture`
- `slash`
- `subdual`
@@ -237,9 +238,10 @@ The currently enabled phase-3 table set is:
Current phase-3 notes:
- header detection now tolerates minor `top` misalignment across the `A-E` header glyphs
- row boundaries can snap to the last affix-to-prose transition between adjacent roll labels when midpoint slicing would leak into the next row
- affix symbols are learned from the footer legend before body parsing, so symbol-only affix fragments are classified correctly
- footer page numbers are filtered out before body parsing
- validation allows a single contiguous affix block either before or after prose
- `Mana.pdf` is intentionally left out for now because its row-anchor geometry and notation still need dedicated handling
### Phase 4: Variant and Grouped Tables

View File

@@ -96,6 +96,14 @@
"pdfPath": "sources/MA Sweeps.pdf",
"enabled": true
},
{
"slug": "mana",
"displayName": "Mana Critical Strike Table",
"family": "standard",
"extractionMethod": "xml",
"pdfPath": "sources/Mana.pdf",
"enabled": true
},
{
"slug": "puncture",
"displayName": "Puncture Critical Strike Table",

View File

@@ -18,6 +18,7 @@ public sealed class StandardCriticalTableParserIntegrationTests
"krush",
"ma-strikes",
"ma-sweeps",
"mana",
"puncture",
"slash",
"subdual",
@@ -41,6 +42,8 @@ public sealed class StandardCriticalTableParserIntegrationTests
yield return ["ballistic-shrapnel", "86-90", "E", "destroy his heart"];
yield return ["arcane-aether", "96-99", "E", "smoking pulp"];
yield return ["ma-strikes", "96-99", "E", "drives bone into brain"];
yield return ["mana", "96-99", "E", "momentarily transformed"];
yield return ["mana", "100", "E", "Mana consumes everything"];
yield return ["tiny", "100", "E", "Vein and artery severed"];
}
@@ -104,6 +107,36 @@ public sealed class StandardCriticalTableParserIntegrationTests
Assert.StartsWith("You recover from your initial swing", result.RawCellText, StringComparison.Ordinal);
}
[Fact]
public async Task Mana_boundary_repair_keeps_96_99_and_100_cells_separated()
{
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
var parseResult = await LoadParseResultAsync(entry);
var row96E = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "96-99", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "E", StringComparison.Ordinal));
var row100E = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "100", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "E", StringComparison.Ordinal));
Assert.Contains("momentarily transformed", row96E.DescriptionText, StringComparison.OrdinalIgnoreCase);
Assert.DoesNotContain("Mana consumes everything", row96E.DescriptionText, StringComparison.OrdinalIgnoreCase);
Assert.StartsWith("Mana consumes everything.", row100E.DescriptionText, StringComparison.Ordinal);
}
[Fact]
public async Task Mana_symbol_only_affix_lines_do_not_pollute_descriptions()
{
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
var parseResult = await LoadParseResultAsync(entry);
var row100C = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "100", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "C", StringComparison.Ordinal));
Assert.DoesNotContain('\uF052', row100C.DescriptionText);
Assert.DoesNotContain('\uF06C', row100C.DescriptionText);
}
private static async Task<StandardCriticalTableParseResult> LoadParseResultAsync(CriticalImportManifestEntry entry)
{
var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml");

View File

@@ -34,6 +34,7 @@ public sealed class StandardCriticalTableParser
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
.Select(item => (int?)item.Top)
.Min() ?? int.MaxValue;
var affixLegendSymbols = DetectAffixLegendSymbols(fragments, keyTop);
var rowLabelFragments = FindRowLabelFragments(fragments, headerFragments, keyTop);
var rowAnchors = rowLabelFragments
@@ -49,11 +50,12 @@ public sealed class StandardCriticalTableParser
var bodyFragments = fragments
.Where(item =>
item.Top >= bodyStartTop &&
item.Top < keyTop - 1 &&
item.Top < keyTop - TopGroupingTolerance &&
!IsFooterPageNumberFragment(item, keyTop) &&
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, item.Text, StringComparison.OrdinalIgnoreCase)) &&
!headerFragments.Contains(item))
.ToList();
var bodyLines = BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
var parsedRollBands = rowAnchors
.Select(anchor => CreateRollBand(anchor.Label, anchor.SortOrder))
@@ -65,11 +67,11 @@ public sealed class StandardCriticalTableParser
{
var rowStart = rowIndex == 0
? bodyStartTop
: (int)Math.Floor((rowAnchors[rowIndex - 1].Top + rowAnchors[rowIndex].Top) / 2.0) + 1;
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
var rowEnd = rowIndex == rowAnchors.Count - 1
? keyTop - 1
: (int)Math.Floor((rowAnchors[rowIndex].Top + rowAnchors[rowIndex + 1].Top) / 2.0) + 1;
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
var rowFragments = bodyFragments
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
@@ -97,14 +99,14 @@ public sealed class StandardCriticalTableParser
}
}
RepairLeadingAffixLeakage(cellEntries);
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
var parsedCells = new List<ParsedCriticalCellArtifact>();
var parsedResults = new List<ParsedCriticalResult>();
foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey))
{
var segmentCount = CountLineTypeSegments(cellEntry.Lines);
var segmentCount = CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
if (segmentCount > 2)
{
@@ -112,8 +114,8 @@ public sealed class StandardCriticalTableParser
$"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
}
var rawAffixLines = cellEntry.Lines.Where(IsAffixLikeLine).ToList();
var descriptionLines = cellEntry.Lines.Where(line => !IsAffixLikeLine(line)).ToList();
var rawAffixLines = cellEntry.Lines.Where(line => IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionLines = cellEntry.Lines.Where(line => !IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
var descriptionText = CollapseWhitespace(string.Join(' ', descriptionLines));
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
@@ -301,7 +303,7 @@ public sealed class StandardCriticalTableParser
.ToList();
}
private static bool IsAffixLikeLine(string line)
private static bool IsAffixLikeLine(string line, ISet<string> affixLegendSymbols)
{
var value = line.Trim();
if (value.Length == 0)
@@ -325,6 +327,33 @@ public sealed class StandardCriticalTableParser
return value.Contains(':', StringComparison.Ordinal);
}
if (affixLegendSymbols.Count > 0 &&
affixLegendSymbols.Any(symbol => value.Contains(symbol, StringComparison.Ordinal)))
{
if (value.Any(char.IsDigit))
{
return true;
}
var remainder = value;
foreach (var symbol in affixLegendSymbols.OrderByDescending(item => item.Length))
{
remainder = remainder.Replace(symbol, string.Empty, StringComparison.Ordinal);
}
remainder = remainder
.Replace("+", string.Empty, StringComparison.Ordinal)
.Replace("-", string.Empty, StringComparison.Ordinal)
.Replace("(", string.Empty, StringComparison.Ordinal)
.Replace(")", string.Empty, StringComparison.Ordinal)
.Replace("/", string.Empty, StringComparison.Ordinal);
if (string.IsNullOrWhiteSpace(remainder))
{
return true;
}
}
return value.StartsWith("+", StringComparison.Ordinal) ||
value.StartsWith("\u2211", StringComparison.Ordinal) ||
value.StartsWith("\u220F", StringComparison.Ordinal) ||
@@ -336,6 +365,9 @@ public sealed class StandardCriticalTableParser
}
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries)
=> RepairLeadingAffixLeakage(cellEntries, new HashSet<string>(StringComparer.Ordinal));
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
{
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
@@ -353,7 +385,7 @@ public sealed class StandardCriticalTableParser
}
var leadingAffixCount = 0;
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount]))
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
{
leadingAffixCount++;
}
@@ -379,14 +411,14 @@ public sealed class StandardCriticalTableParser
.Replace('\n', ' ')
.Trim();
private static int CountLineTypeSegments(IReadOnlyList<string> lines)
private static int CountLineTypeSegments(IReadOnlyList<string> lines, ISet<string> affixLegendSymbols)
{
var segmentCount = 0;
bool? previousIsAffix = null;
foreach (var line in lines)
{
var currentIsAffix = IsAffixLikeLine(line);
var currentIsAffix = IsAffixLikeLine(line, affixLegendSymbols);
if (previousIsAffix == currentIsAffix)
{
continue;
@@ -399,6 +431,91 @@ public sealed class StandardCriticalTableParser
return segmentCount;
}
private static HashSet<string> DetectAffixLegendSymbols(IReadOnlyList<XmlTextFragment> fragments, int keyTop)
{
if (keyTop == int.MaxValue)
{
return [];
}
var footerLines = GroupByTop(fragments
.Where(item => item.Top >= keyTop - TopGroupingTolerance)
.OrderBy(item => item.Top)
.ThenBy(item => item.Left)
.ToList())
.Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))))
.ToList();
var symbols = new HashSet<string>(StringComparer.Ordinal);
foreach (var footerLine in footerLines)
{
AddLegendMatch(symbols, footerLine, @"must parry\s*=\s*(\S)");
AddLegendMatch(symbols, footerLine, @"no parry\s*=\s*(\S)");
AddLegendMatch(symbols, footerLine, @"stun(?:ned)?\s*=\s*(\S)");
AddLegendMatch(symbols, footerLine, @"bleed\s*=\s*(\S)");
AddLegendMatch(symbols, footerLine, @"powerpoint modification.*=\s*(\S)");
}
return symbols;
}
private static void AddLegendMatch(HashSet<string> symbols, string value, string pattern)
{
foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase))
{
if (match.Groups.Count > 1)
{
symbols.Add(match.Groups[1].Value);
}
}
}
private static List<BodyLine> BuildBodyLines(
IReadOnlyList<XmlTextFragment> bodyFragments,
IReadOnlyList<ColumnAnchor> columnCenters,
ISet<string> affixLegendSymbols)
{
var bodyLines = new List<BodyLine>();
foreach (var lineFragments in GroupByTop(bodyFragments.OrderBy(item => item.Top).ThenBy(item => item.Left).ToList()))
{
var columnTexts = lineFragments
.GroupBy(item => ResolveColumn(item.CenterX, columnCenters), StringComparer.OrdinalIgnoreCase)
.Select(group => CollapseWhitespace(string.Join(' ', group.OrderBy(item => item.Left).Select(item => item.Text))))
.Where(item => !string.IsNullOrWhiteSpace(item))
.ToList();
var isAffixLike = columnTexts.Count > 0 &&
columnTexts.All(text => IsAffixLikeLine(text, affixLegendSymbols));
bodyLines.Add(new BodyLine(lineFragments[0].Top, isAffixLike));
}
return bodyLines;
}
private static int ResolveRowBoundaryTop(
RowAnchor current,
RowAnchor next,
IReadOnlyList<BodyLine> bodyLines)
{
var linesBetweenLabels = bodyLines
.Where(item => item.Top >= current.Top && item.Top < next.Top)
.OrderBy(item => item.Top)
.ToList();
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
{
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
{
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
}
}
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
}
private static bool IsFooterPageNumberFragment(XmlTextFragment fragment, int keyTop)
{
if (keyTop == int.MaxValue)
@@ -432,6 +549,8 @@ public sealed class StandardCriticalTableParser
private sealed record RowAnchor(string Label, int Top, int SortOrder);
private sealed record BodyLine(int Top, bool IsAffixLike);
private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
{
public string RollBandLabel { get; } = rollBandLabel;