Fix mana affix column leakage
This commit is contained in:
@@ -11,6 +11,7 @@ public sealed class StandardCriticalTableParser
|
||||
private const int FooterPageNumberExclusionGap = 80;
|
||||
private const int RowLabelDuplicateTolerance = 15;
|
||||
private const int TopGroupingTolerance = 2;
|
||||
private static readonly Regex MultiFragmentSplitRegex = new(@"\S(?:.*?\S)?(?=(?:\s{2,}|$))", RegexOptions.Compiled);
|
||||
private static readonly Regex NumericAffixLineRegex = new(@"^\d+(?:H|∑|∏|π|∫|\s*[–-])", RegexOptions.Compiled);
|
||||
private static readonly Regex StandaloneModifierAffixLineRegex = new(@"^(?:\d+)?\((?:\+|-)\d+\)$", RegexOptions.Compiled);
|
||||
|
||||
@@ -55,6 +56,7 @@ public sealed class StandardCriticalTableParser
|
||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, item.Text, StringComparison.OrdinalIgnoreCase)) &&
|
||||
!headerFragments.Contains(item))
|
||||
.ToList();
|
||||
bodyFragments = SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
var bodyLines = BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
|
||||
var parsedRollBands = rowAnchors
|
||||
@@ -460,6 +462,101 @@ public sealed class StandardCriticalTableParser
|
||||
return symbols;
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> SplitBoundaryCrossingAffixFragments(
|
||||
IReadOnlyList<XmlTextFragment> bodyFragments,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var splitFragments = new List<XmlTextFragment>(bodyFragments.Count);
|
||||
|
||||
foreach (var fragment in bodyFragments)
|
||||
{
|
||||
splitFragments.AddRange(SplitBoundaryCrossingAffixFragment(fragment, columnCenters, affixLegendSymbols));
|
||||
}
|
||||
|
||||
return splitFragments;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<XmlTextFragment> SplitBoundaryCrossingAffixFragment(
|
||||
XmlTextFragment fragment,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
if (!LooksLikeBoundaryCrossingAffixFragment(fragment, columnCenters, affixLegendSymbols))
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var matches = MultiFragmentSplitRegex.Matches(fragment.Text);
|
||||
if (matches.Count < 2)
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var characterWidth = fragment.Width / (double)Math.Max(fragment.Text.Length, 1);
|
||||
var splitFragments = new List<XmlTextFragment>(matches.Count);
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var segmentText = CollapseWhitespace(match.Value);
|
||||
if (segmentText.Length == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var segmentLeft = fragment.Left + (int)Math.Round(characterWidth * match.Index);
|
||||
var segmentWidth = Math.Max(1, (int)Math.Round(characterWidth * match.Length));
|
||||
|
||||
splitFragments.Add(new XmlTextFragment(
|
||||
fragment.PageNumber,
|
||||
fragment.Top,
|
||||
segmentLeft,
|
||||
segmentWidth,
|
||||
fragment.Height,
|
||||
segmentText));
|
||||
}
|
||||
|
||||
if (splitFragments.Count < 2)
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var originalColumn = ResolveColumn(fragment.CenterX, columnCenters);
|
||||
var distinctColumns = splitFragments
|
||||
.Select(item => ResolveColumn(item.CenterX, columnCenters))
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
|
||||
return distinctColumns.Count > 1 || distinctColumns.Any(item => !string.Equals(item, originalColumn, StringComparison.OrdinalIgnoreCase))
|
||||
? splitFragments
|
||||
: [fragment];
|
||||
}
|
||||
|
||||
private static bool LooksLikeBoundaryCrossingAffixFragment(
|
||||
XmlTextFragment fragment,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
if (!IsAffixLikeLine(fragment.Text, affixLegendSymbols) ||
|
||||
!fragment.Text.Contains(" ", StringComparison.Ordinal))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var fragmentRight = fragment.Left + fragment.Width;
|
||||
|
||||
for (var index = 0; index < columnCenters.Count - 1; index++)
|
||||
{
|
||||
var boundary = (columnCenters[index].CenterX + columnCenters[index + 1].CenterX) / 2.0;
|
||||
if (fragment.Left < boundary && fragmentRight > boundary)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void AddLegendMatch(HashSet<string> symbols, string value, string pattern)
|
||||
{
|
||||
foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase))
|
||||
|
||||
Reference in New Issue
Block a user