Fix mana affix column leakage

This commit is contained in:
2026-03-14 02:53:33 +01:00
parent 73ce64e879
commit a391a1421a
4 changed files with 162 additions and 0 deletions

Binary file not shown.

View File

@@ -137,6 +137,70 @@ public sealed class StandardCriticalTableParserIntegrationTests
Assert.DoesNotContain('\uF06C', row100C.DescriptionText);
}
[Fact]
public async Task Mana_affix_boundaries_keep_71_75_a_and_b_separate()
{
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
var parseResult = await LoadParseResultAsync(entry);
var row71A = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "A", StringComparison.Ordinal));
var row71B = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
Assert.DoesNotContain("+10H -", row71A.RawAffixText, StringComparison.Ordinal);
Assert.Contains("+10H -", row71B.RawAffixText, StringComparison.Ordinal);
}
[Fact]
public async Task Mana_affix_boundaries_keep_71_75_d_and_e_separate()
{
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
var parseResult = await LoadParseResultAsync(entry);
var row71D = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "D", StringComparison.Ordinal));
var row71E = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "E", StringComparison.Ordinal));
Assert.DoesNotContain("+16H - 6", row71D.RawAffixText, StringComparison.Ordinal);
Assert.Contains("+16H - 6", row71E.RawAffixText, StringComparison.Ordinal);
}
[Fact]
public async Task Mana_affix_boundaries_keep_91_95_b_and_c_separate()
{
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
var parseResult = await LoadParseResultAsync(entry);
var row91B = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "91-95", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
var row91C = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "91-95", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "C", StringComparison.Ordinal));
Assert.DoesNotContain("+19H - 9", row91B.RawAffixText, StringComparison.Ordinal);
Assert.Contains("+19H - 9", row91C.RawAffixText, StringComparison.Ordinal);
}
[Fact]
public async Task Mana_affix_boundaries_keep_86_90_b_and_c_separate()
{
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
var parseResult = await LoadParseResultAsync(entry);
var row86B = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "86-90", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
var row86C = parseResult.Table.Results.Single(item =>
string.Equals(item.RollBandLabel, "86-90", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "C", StringComparison.Ordinal));
Assert.DoesNotContain("+16H - 8", row86B.RawAffixText, StringComparison.Ordinal);
Assert.Contains("+16H - 8", row86C.RawAffixText, StringComparison.Ordinal);
}
private static async Task<StandardCriticalTableParseResult> LoadParseResultAsync(CriticalImportManifestEntry entry)
{
var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml");

View File

@@ -11,6 +11,7 @@ public sealed class StandardCriticalTableParser
private const int FooterPageNumberExclusionGap = 80;
private const int RowLabelDuplicateTolerance = 15;
private const int TopGroupingTolerance = 2;
private static readonly Regex MultiFragmentSplitRegex = new(@"\S(?:.*?\S)?(?=(?:\s{2,}|$))", RegexOptions.Compiled);
private static readonly Regex NumericAffixLineRegex = new(@"^\d+(?:H|∑|∏|π|∫|\s*[-])", RegexOptions.Compiled);
private static readonly Regex StandaloneModifierAffixLineRegex = new(@"^(?:\d+)?\((?:\+|-)\d+\)$", RegexOptions.Compiled);
@@ -55,6 +56,7 @@ public sealed class StandardCriticalTableParser
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, item.Text, StringComparison.OrdinalIgnoreCase)) &&
!headerFragments.Contains(item))
.ToList();
bodyFragments = SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
var bodyLines = BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
var parsedRollBands = rowAnchors
@@ -460,6 +462,101 @@ public sealed class StandardCriticalTableParser
return symbols;
}
private static List<XmlTextFragment> SplitBoundaryCrossingAffixFragments(
IReadOnlyList<XmlTextFragment> bodyFragments,
IReadOnlyList<ColumnAnchor> columnCenters,
ISet<string> affixLegendSymbols)
{
var splitFragments = new List<XmlTextFragment>(bodyFragments.Count);
foreach (var fragment in bodyFragments)
{
splitFragments.AddRange(SplitBoundaryCrossingAffixFragment(fragment, columnCenters, affixLegendSymbols));
}
return splitFragments;
}
private static IReadOnlyList<XmlTextFragment> SplitBoundaryCrossingAffixFragment(
XmlTextFragment fragment,
IReadOnlyList<ColumnAnchor> columnCenters,
ISet<string> affixLegendSymbols)
{
if (!LooksLikeBoundaryCrossingAffixFragment(fragment, columnCenters, affixLegendSymbols))
{
return [fragment];
}
var matches = MultiFragmentSplitRegex.Matches(fragment.Text);
if (matches.Count < 2)
{
return [fragment];
}
var characterWidth = fragment.Width / (double)Math.Max(fragment.Text.Length, 1);
var splitFragments = new List<XmlTextFragment>(matches.Count);
foreach (Match match in matches)
{
var segmentText = CollapseWhitespace(match.Value);
if (segmentText.Length == 0)
{
continue;
}
var segmentLeft = fragment.Left + (int)Math.Round(characterWidth * match.Index);
var segmentWidth = Math.Max(1, (int)Math.Round(characterWidth * match.Length));
splitFragments.Add(new XmlTextFragment(
fragment.PageNumber,
fragment.Top,
segmentLeft,
segmentWidth,
fragment.Height,
segmentText));
}
if (splitFragments.Count < 2)
{
return [fragment];
}
var originalColumn = ResolveColumn(fragment.CenterX, columnCenters);
var distinctColumns = splitFragments
.Select(item => ResolveColumn(item.CenterX, columnCenters))
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToList();
return distinctColumns.Count > 1 || distinctColumns.Any(item => !string.Equals(item, originalColumn, StringComparison.OrdinalIgnoreCase))
? splitFragments
: [fragment];
}
private static bool LooksLikeBoundaryCrossingAffixFragment(
XmlTextFragment fragment,
IReadOnlyList<ColumnAnchor> columnCenters,
ISet<string> affixLegendSymbols)
{
if (!IsAffixLikeLine(fragment.Text, affixLegendSymbols) ||
!fragment.Text.Contains(" ", StringComparison.Ordinal))
{
return false;
}
var fragmentRight = fragment.Left + fragment.Width;
for (var index = 0; index < columnCenters.Count - 1; index++)
{
var boundary = (columnCenters[index].CenterX + columnCenters[index + 1].CenterX) / 2.0;
if (fragment.Left < boundary && fragmentRight > boundary)
{
return true;
}
}
return false;
}
private static void AddLegendMatch(HashSet<string> symbols, string value, string pattern)
{
foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase))