Fix mana affix column leakage
This commit is contained in:
Binary file not shown.
@@ -137,6 +137,70 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
Assert.DoesNotContain('\uF06C', row100C.DescriptionText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Mana_affix_boundaries_keep_71_75_a_and_b_separate()
|
||||
{
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row71A = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "A", StringComparison.Ordinal));
|
||||
var row71B = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
|
||||
|
||||
Assert.DoesNotContain("+10H -", row71A.RawAffixText, StringComparison.Ordinal);
|
||||
Assert.Contains("+10H -", row71B.RawAffixText, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Mana_affix_boundaries_keep_71_75_d_and_e_separate()
|
||||
{
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row71D = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "D", StringComparison.Ordinal));
|
||||
var row71E = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "E", StringComparison.Ordinal));
|
||||
|
||||
Assert.DoesNotContain("+16H - 6", row71D.RawAffixText, StringComparison.Ordinal);
|
||||
Assert.Contains("+16H - 6", row71E.RawAffixText, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Mana_affix_boundaries_keep_91_95_b_and_c_separate()
|
||||
{
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row91B = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "91-95", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
|
||||
var row91C = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "91-95", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "C", StringComparison.Ordinal));
|
||||
|
||||
Assert.DoesNotContain("+19H - 9", row91B.RawAffixText, StringComparison.Ordinal);
|
||||
Assert.Contains("+19H - 9", row91C.RawAffixText, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Mana_affix_boundaries_keep_86_90_b_and_c_separate()
|
||||
{
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row86B = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "86-90", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
|
||||
var row86C = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.RollBandLabel, "86-90", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "C", StringComparison.Ordinal));
|
||||
|
||||
Assert.DoesNotContain("+16H - 8", row86B.RawAffixText, StringComparison.Ordinal);
|
||||
Assert.Contains("+16H - 8", row86C.RawAffixText, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private static async Task<StandardCriticalTableParseResult> LoadParseResultAsync(CriticalImportManifestEntry entry)
|
||||
{
|
||||
var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml");
|
||||
|
||||
@@ -11,6 +11,7 @@ public sealed class StandardCriticalTableParser
|
||||
private const int FooterPageNumberExclusionGap = 80;
|
||||
private const int RowLabelDuplicateTolerance = 15;
|
||||
private const int TopGroupingTolerance = 2;
|
||||
private static readonly Regex MultiFragmentSplitRegex = new(@"\S(?:.*?\S)?(?=(?:\s{2,}|$))", RegexOptions.Compiled);
|
||||
private static readonly Regex NumericAffixLineRegex = new(@"^\d+(?:H|∑|∏|π|∫|\s*[–-])", RegexOptions.Compiled);
|
||||
private static readonly Regex StandaloneModifierAffixLineRegex = new(@"^(?:\d+)?\((?:\+|-)\d+\)$", RegexOptions.Compiled);
|
||||
|
||||
@@ -55,6 +56,7 @@ public sealed class StandardCriticalTableParser
|
||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, item.Text, StringComparison.OrdinalIgnoreCase)) &&
|
||||
!headerFragments.Contains(item))
|
||||
.ToList();
|
||||
bodyFragments = SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
var bodyLines = BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
|
||||
var parsedRollBands = rowAnchors
|
||||
@@ -460,6 +462,101 @@ public sealed class StandardCriticalTableParser
|
||||
return symbols;
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> SplitBoundaryCrossingAffixFragments(
|
||||
IReadOnlyList<XmlTextFragment> bodyFragments,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var splitFragments = new List<XmlTextFragment>(bodyFragments.Count);
|
||||
|
||||
foreach (var fragment in bodyFragments)
|
||||
{
|
||||
splitFragments.AddRange(SplitBoundaryCrossingAffixFragment(fragment, columnCenters, affixLegendSymbols));
|
||||
}
|
||||
|
||||
return splitFragments;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<XmlTextFragment> SplitBoundaryCrossingAffixFragment(
|
||||
XmlTextFragment fragment,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
if (!LooksLikeBoundaryCrossingAffixFragment(fragment, columnCenters, affixLegendSymbols))
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var matches = MultiFragmentSplitRegex.Matches(fragment.Text);
|
||||
if (matches.Count < 2)
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var characterWidth = fragment.Width / (double)Math.Max(fragment.Text.Length, 1);
|
||||
var splitFragments = new List<XmlTextFragment>(matches.Count);
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var segmentText = CollapseWhitespace(match.Value);
|
||||
if (segmentText.Length == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var segmentLeft = fragment.Left + (int)Math.Round(characterWidth * match.Index);
|
||||
var segmentWidth = Math.Max(1, (int)Math.Round(characterWidth * match.Length));
|
||||
|
||||
splitFragments.Add(new XmlTextFragment(
|
||||
fragment.PageNumber,
|
||||
fragment.Top,
|
||||
segmentLeft,
|
||||
segmentWidth,
|
||||
fragment.Height,
|
||||
segmentText));
|
||||
}
|
||||
|
||||
if (splitFragments.Count < 2)
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var originalColumn = ResolveColumn(fragment.CenterX, columnCenters);
|
||||
var distinctColumns = splitFragments
|
||||
.Select(item => ResolveColumn(item.CenterX, columnCenters))
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
|
||||
return distinctColumns.Count > 1 || distinctColumns.Any(item => !string.Equals(item, originalColumn, StringComparison.OrdinalIgnoreCase))
|
||||
? splitFragments
|
||||
: [fragment];
|
||||
}
|
||||
|
||||
private static bool LooksLikeBoundaryCrossingAffixFragment(
|
||||
XmlTextFragment fragment,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
if (!IsAffixLikeLine(fragment.Text, affixLegendSymbols) ||
|
||||
!fragment.Text.Contains(" ", StringComparison.Ordinal))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var fragmentRight = fragment.Left + fragment.Width;
|
||||
|
||||
for (var index = 0; index < columnCenters.Count - 1; index++)
|
||||
{
|
||||
var boundary = (columnCenters[index].CenterX + columnCenters[index + 1].CenterX) / 2.0;
|
||||
if (fragment.Left < boundary && fragmentRight > boundary)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void AddLegendMatch(HashSet<string> symbols, string value, string pattern)
|
||||
{
|
||||
foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase))
|
||||
|
||||
Reference in New Issue
Block a user