Finalize phase 3 mana critical imports
This commit is contained in:
@@ -44,6 +44,7 @@ The current implementation supports:
|
||||
- `krush`
|
||||
- `ma-strikes`
|
||||
- `ma-sweeps`
|
||||
- `mana`
|
||||
- `puncture`
|
||||
- `slash`
|
||||
- `subdual`
|
||||
@@ -57,7 +58,6 @@ The current implementation does not yet support:
|
||||
|
||||
- variant-column critical tables
|
||||
- grouped variant tables
|
||||
- `Mana.pdf`, whose current XML layout and affix notation still need a dedicated parser pass
|
||||
- OCR/image-based PDFs such as `Void.pdf`
|
||||
- normalized `critical_branch` population
|
||||
- normalized `critical_effect` population
|
||||
@@ -228,6 +228,7 @@ The currently enabled phase-3 table set is:
|
||||
- `krush`
|
||||
- `ma-strikes`
|
||||
- `ma-sweeps`
|
||||
- `mana`
|
||||
- `puncture`
|
||||
- `slash`
|
||||
- `subdual`
|
||||
@@ -237,9 +238,10 @@ The currently enabled phase-3 table set is:
|
||||
Current phase-3 notes:
|
||||
|
||||
- header detection now tolerates minor `top` misalignment across the `A-E` header glyphs
|
||||
- row boundaries can snap to the last affix-to-prose transition between adjacent roll labels when midpoint slicing would leak into the next row
|
||||
- affix symbols are learned from the footer legend before body parsing, so symbol-only affix fragments are classified correctly
|
||||
- footer page numbers are filtered out before body parsing
|
||||
- validation allows a single contiguous affix block either before or after prose
|
||||
- `Mana.pdf` is intentionally left out for now because its row-anchor geometry and notation still need dedicated handling
|
||||
|
||||
### Phase 4: Variant and Grouped Tables
|
||||
|
||||
|
||||
@@ -96,6 +96,14 @@
|
||||
"pdfPath": "sources/MA Sweeps.pdf",
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
"slug": "mana",
|
||||
"displayName": "Mana Critical Strike Table",
|
||||
"family": "standard",
|
||||
"extractionMethod": "xml",
|
||||
"pdfPath": "sources/Mana.pdf",
|
||||
"enabled": true
|
||||
},
|
||||
{
|
||||
"slug": "puncture",
|
||||
"displayName": "Puncture Critical Strike Table",
|
||||
|
||||
@@ -18,6 +18,7 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
"krush",
|
||||
"ma-strikes",
|
||||
"ma-sweeps",
|
||||
"mana",
|
||||
"puncture",
|
||||
"slash",
|
||||
"subdual",
|
||||
@@ -41,6 +42,8 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
yield return ["ballistic-shrapnel", "86-90", "E", "destroy his heart"];
|
||||
yield return ["arcane-aether", "96-99", "E", "smoking pulp"];
|
||||
yield return ["ma-strikes", "96-99", "E", "drives bone into brain"];
|
||||
yield return ["mana", "96-99", "E", "momentarily transformed"];
|
||||
yield return ["mana", "100", "E", "Mana consumes everything"];
|
||||
yield return ["tiny", "100", "E", "Vein and artery severed"];
|
||||
}
|
||||
|
||||
@@ -104,6 +107,36 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
Assert.StartsWith("You recover from your initial swing", result.RawCellText, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Mana_boundary_repair_keeps_96_99_and_100_cells_separated()
|
||||
{
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row96E = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "96-99", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "E", StringComparison.Ordinal));
|
||||
var row100E = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "100", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "E", StringComparison.Ordinal));
|
||||
|
||||
Assert.Contains("momentarily transformed", row96E.DescriptionText, StringComparison.OrdinalIgnoreCase);
|
||||
Assert.DoesNotContain("Mana consumes everything", row96E.DescriptionText, StringComparison.OrdinalIgnoreCase);
|
||||
Assert.StartsWith("Mana consumes everything.", row100E.DescriptionText, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Mana_symbol_only_affix_lines_do_not_pollute_descriptions()
|
||||
{
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row100C = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "100", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "C", StringComparison.Ordinal));
|
||||
|
||||
Assert.DoesNotContain('\uF052', row100C.DescriptionText);
|
||||
Assert.DoesNotContain('\uF06C', row100C.DescriptionText);
|
||||
}
|
||||
|
||||
private static async Task<StandardCriticalTableParseResult> LoadParseResultAsync(CriticalImportManifestEntry entry)
|
||||
{
|
||||
var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml");
|
||||
|
||||
@@ -34,6 +34,7 @@ public sealed class StandardCriticalTableParser
|
||||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
|
||||
.Select(item => (int?)item.Top)
|
||||
.Min() ?? int.MaxValue;
|
||||
var affixLegendSymbols = DetectAffixLegendSymbols(fragments, keyTop);
|
||||
var rowLabelFragments = FindRowLabelFragments(fragments, headerFragments, keyTop);
|
||||
|
||||
var rowAnchors = rowLabelFragments
|
||||
@@ -49,11 +50,12 @@ public sealed class StandardCriticalTableParser
|
||||
var bodyFragments = fragments
|
||||
.Where(item =>
|
||||
item.Top >= bodyStartTop &&
|
||||
item.Top < keyTop - 1 &&
|
||||
item.Top < keyTop - TopGroupingTolerance &&
|
||||
!IsFooterPageNumberFragment(item, keyTop) &&
|
||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, item.Text, StringComparison.OrdinalIgnoreCase)) &&
|
||||
!headerFragments.Contains(item))
|
||||
.ToList();
|
||||
var bodyLines = BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
|
||||
var parsedRollBands = rowAnchors
|
||||
.Select(anchor => CreateRollBand(anchor.Label, anchor.SortOrder))
|
||||
@@ -65,11 +67,11 @@ public sealed class StandardCriticalTableParser
|
||||
{
|
||||
var rowStart = rowIndex == 0
|
||||
? bodyStartTop
|
||||
: (int)Math.Floor((rowAnchors[rowIndex - 1].Top + rowAnchors[rowIndex].Top) / 2.0) + 1;
|
||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
|
||||
|
||||
var rowEnd = rowIndex == rowAnchors.Count - 1
|
||||
? keyTop - 1
|
||||
: (int)Math.Floor((rowAnchors[rowIndex].Top + rowAnchors[rowIndex + 1].Top) / 2.0) + 1;
|
||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
|
||||
|
||||
var rowFragments = bodyFragments
|
||||
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
|
||||
@@ -97,14 +99,14 @@ public sealed class StandardCriticalTableParser
|
||||
}
|
||||
}
|
||||
|
||||
RepairLeadingAffixLeakage(cellEntries);
|
||||
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
|
||||
|
||||
var parsedCells = new List<ParsedCriticalCellArtifact>();
|
||||
var parsedResults = new List<ParsedCriticalResult>();
|
||||
|
||||
foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey))
|
||||
{
|
||||
var segmentCount = CountLineTypeSegments(cellEntry.Lines);
|
||||
var segmentCount = CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
|
||||
|
||||
if (segmentCount > 2)
|
||||
{
|
||||
@@ -112,8 +114,8 @@ public sealed class StandardCriticalTableParser
|
||||
$"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
|
||||
}
|
||||
|
||||
var rawAffixLines = cellEntry.Lines.Where(IsAffixLikeLine).ToList();
|
||||
var descriptionLines = cellEntry.Lines.Where(line => !IsAffixLikeLine(line)).ToList();
|
||||
var rawAffixLines = cellEntry.Lines.Where(line => IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||
var descriptionLines = cellEntry.Lines.Where(line => !IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
|
||||
var descriptionText = CollapseWhitespace(string.Join(' ', descriptionLines));
|
||||
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
|
||||
@@ -301,7 +303,7 @@ public sealed class StandardCriticalTableParser
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static bool IsAffixLikeLine(string line)
|
||||
private static bool IsAffixLikeLine(string line, ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var value = line.Trim();
|
||||
if (value.Length == 0)
|
||||
@@ -325,6 +327,33 @@ public sealed class StandardCriticalTableParser
|
||||
return value.Contains(':', StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
if (affixLegendSymbols.Count > 0 &&
|
||||
affixLegendSymbols.Any(symbol => value.Contains(symbol, StringComparison.Ordinal)))
|
||||
{
|
||||
if (value.Any(char.IsDigit))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
var remainder = value;
|
||||
foreach (var symbol in affixLegendSymbols.OrderByDescending(item => item.Length))
|
||||
{
|
||||
remainder = remainder.Replace(symbol, string.Empty, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
remainder = remainder
|
||||
.Replace("+", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("-", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("(", string.Empty, StringComparison.Ordinal)
|
||||
.Replace(")", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("/", string.Empty, StringComparison.Ordinal);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(remainder))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return value.StartsWith("+", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u2211", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u220F", StringComparison.Ordinal) ||
|
||||
@@ -336,6 +365,9 @@ public sealed class StandardCriticalTableParser
|
||||
}
|
||||
|
||||
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries)
|
||||
=> RepairLeadingAffixLeakage(cellEntries, new HashSet<string>(StringComparer.Ordinal));
|
||||
|
||||
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
||||
var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
|
||||
@@ -353,7 +385,7 @@ public sealed class StandardCriticalTableParser
|
||||
}
|
||||
|
||||
var leadingAffixCount = 0;
|
||||
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount]))
|
||||
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
||||
{
|
||||
leadingAffixCount++;
|
||||
}
|
||||
@@ -379,14 +411,14 @@ public sealed class StandardCriticalTableParser
|
||||
.Replace('\n', ' ')
|
||||
.Trim();
|
||||
|
||||
private static int CountLineTypeSegments(IReadOnlyList<string> lines)
|
||||
private static int CountLineTypeSegments(IReadOnlyList<string> lines, ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var segmentCount = 0;
|
||||
bool? previousIsAffix = null;
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var currentIsAffix = IsAffixLikeLine(line);
|
||||
var currentIsAffix = IsAffixLikeLine(line, affixLegendSymbols);
|
||||
if (previousIsAffix == currentIsAffix)
|
||||
{
|
||||
continue;
|
||||
@@ -399,6 +431,91 @@ public sealed class StandardCriticalTableParser
|
||||
return segmentCount;
|
||||
}
|
||||
|
||||
private static HashSet<string> DetectAffixLegendSymbols(IReadOnlyList<XmlTextFragment> fragments, int keyTop)
|
||||
{
|
||||
if (keyTop == int.MaxValue)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
var footerLines = GroupByTop(fragments
|
||||
.Where(item => item.Top >= keyTop - TopGroupingTolerance)
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList())
|
||||
.Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.ToList();
|
||||
|
||||
var symbols = new HashSet<string>(StringComparer.Ordinal);
|
||||
|
||||
foreach (var footerLine in footerLines)
|
||||
{
|
||||
AddLegendMatch(symbols, footerLine, @"must parry\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"no parry\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"stun(?:ned)?\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"bleed\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"powerpoint modification.*=\s*(\S)");
|
||||
}
|
||||
|
||||
return symbols;
|
||||
}
|
||||
|
||||
private static void AddLegendMatch(HashSet<string> symbols, string value, string pattern)
|
||||
{
|
||||
foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase))
|
||||
{
|
||||
if (match.Groups.Count > 1)
|
||||
{
|
||||
symbols.Add(match.Groups[1].Value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<BodyLine> BuildBodyLines(
|
||||
IReadOnlyList<XmlTextFragment> bodyFragments,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var bodyLines = new List<BodyLine>();
|
||||
|
||||
foreach (var lineFragments in GroupByTop(bodyFragments.OrderBy(item => item.Top).ThenBy(item => item.Left).ToList()))
|
||||
{
|
||||
var columnTexts = lineFragments
|
||||
.GroupBy(item => ResolveColumn(item.CenterX, columnCenters), StringComparer.OrdinalIgnoreCase)
|
||||
.Select(group => CollapseWhitespace(string.Join(' ', group.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item))
|
||||
.ToList();
|
||||
|
||||
var isAffixLike = columnTexts.Count > 0 &&
|
||||
columnTexts.All(text => IsAffixLikeLine(text, affixLegendSymbols));
|
||||
|
||||
bodyLines.Add(new BodyLine(lineFragments[0].Top, isAffixLike));
|
||||
}
|
||||
|
||||
return bodyLines;
|
||||
}
|
||||
|
||||
private static int ResolveRowBoundaryTop(
|
||||
RowAnchor current,
|
||||
RowAnchor next,
|
||||
IReadOnlyList<BodyLine> bodyLines)
|
||||
{
|
||||
var linesBetweenLabels = bodyLines
|
||||
.Where(item => item.Top >= current.Top && item.Top < next.Top)
|
||||
.OrderBy(item => item.Top)
|
||||
.ToList();
|
||||
|
||||
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
|
||||
{
|
||||
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
|
||||
{
|
||||
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
||||
}
|
||||
|
||||
private static bool IsFooterPageNumberFragment(XmlTextFragment fragment, int keyTop)
|
||||
{
|
||||
if (keyTop == int.MaxValue)
|
||||
@@ -432,6 +549,8 @@ public sealed class StandardCriticalTableParser
|
||||
|
||||
private sealed record RowAnchor(string Label, int Top, int SortOrder);
|
||||
|
||||
private sealed record BodyLine(int Top, bool IsAffixLike);
|
||||
|
||||
private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
|
||||
{
|
||||
public string RollBandLabel { get; } = rollBandLabel;
|
||||
|
||||
Reference in New Issue
Block a user