Fix duplicate critical affix import fragments

This commit is contained in:
2026-03-14 12:51:59 +01:00
parent e4e8995fd8
commit e4be338621
3 changed files with 74 additions and 1 deletions

View File

@@ -30,7 +30,7 @@ internal static class CriticalTableParserSupport
var document = XDocument.Load(xmlReader);
return document.Descendants("page")
var fragments = document.Descendants("page")
.SelectMany(page =>
{
var pageNumber = int.Parse(page.Attribute("number")?.Value ?? "1");
@@ -45,6 +45,8 @@ internal static class CriticalTableParserSupport
.Where(item => !string.IsNullOrWhiteSpace(item.Text));
})
.ToList();
return RemoveRedundantContainedFragments(fragments);
}
internal static List<XmlTextFragment> FindRowLabelFragments(
@@ -263,6 +265,56 @@ internal static class CriticalTableParserSupport
.Replace('', '\'')
.Trim();
private static List<XmlTextFragment> RemoveRedundantContainedFragments(IReadOnlyList<XmlTextFragment> fragments)
{
var redundant = new HashSet<XmlTextFragment>();
foreach (var group in fragments.GroupBy(item => (item.PageNumber, item.Top, item.Height)))
{
var ordered = group
.OrderByDescending(item => item.Width)
.ThenBy(item => item.Left)
.ToList();
for (var index = 0; index < ordered.Count; index++)
{
var container = ordered[index];
if (container.Text.Length <= 1)
{
continue;
}
for (var candidateIndex = index + 1; candidateIndex < ordered.Count; candidateIndex++)
{
var candidate = ordered[candidateIndex];
if (candidate.Width > container.Width ||
!container.Text.Contains(candidate.Text, StringComparison.Ordinal) ||
!IsHorizontallyContained(candidate, container))
{
continue;
}
redundant.Add(candidate);
}
}
}
return fragments
.Where(item => !redundant.Contains(item))
.ToList();
}
private static bool IsHorizontallyContained(XmlTextFragment candidate, XmlTextFragment container)
{
const int containmentTolerance = 1;
var candidateRight = candidate.Left + candidate.Width;
var containerRight = container.Left + container.Width;
return candidate.Left >= container.Left - containmentTolerance &&
candidateRight <= containerRight + containmentTolerance;
}
internal static string? NormalizeConditionKey(string conditionText)
{
var normalized = CollapseWhitespace(conditionText)