Fix duplicate critical affix import fragments
This commit is contained in:
@@ -30,7 +30,7 @@ internal static class CriticalTableParserSupport
|
||||
|
||||
var document = XDocument.Load(xmlReader);
|
||||
|
||||
return document.Descendants("page")
|
||||
var fragments = document.Descendants("page")
|
||||
.SelectMany(page =>
|
||||
{
|
||||
var pageNumber = int.Parse(page.Attribute("number")?.Value ?? "1");
|
||||
@@ -45,6 +45,8 @@ internal static class CriticalTableParserSupport
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item.Text));
|
||||
})
|
||||
.ToList();
|
||||
|
||||
return RemoveRedundantContainedFragments(fragments);
|
||||
}
|
||||
|
||||
internal static List<XmlTextFragment> FindRowLabelFragments(
|
||||
@@ -263,6 +265,56 @@ internal static class CriticalTableParserSupport
|
||||
.Replace('’', '\'')
|
||||
.Trim();
|
||||
|
||||
private static List<XmlTextFragment> RemoveRedundantContainedFragments(IReadOnlyList<XmlTextFragment> fragments)
|
||||
{
|
||||
var redundant = new HashSet<XmlTextFragment>();
|
||||
|
||||
foreach (var group in fragments.GroupBy(item => (item.PageNumber, item.Top, item.Height)))
|
||||
{
|
||||
var ordered = group
|
||||
.OrderByDescending(item => item.Width)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
|
||||
for (var index = 0; index < ordered.Count; index++)
|
||||
{
|
||||
var container = ordered[index];
|
||||
if (container.Text.Length <= 1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (var candidateIndex = index + 1; candidateIndex < ordered.Count; candidateIndex++)
|
||||
{
|
||||
var candidate = ordered[candidateIndex];
|
||||
if (candidate.Width > container.Width ||
|
||||
!container.Text.Contains(candidate.Text, StringComparison.Ordinal) ||
|
||||
!IsHorizontallyContained(candidate, container))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
redundant.Add(candidate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fragments
|
||||
.Where(item => !redundant.Contains(item))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static bool IsHorizontallyContained(XmlTextFragment candidate, XmlTextFragment container)
|
||||
{
|
||||
const int containmentTolerance = 1;
|
||||
|
||||
var candidateRight = candidate.Left + candidate.Width;
|
||||
var containerRight = container.Left + container.Width;
|
||||
|
||||
return candidate.Left >= container.Left - containmentTolerance &&
|
||||
candidateRight <= containerRight + containmentTolerance;
|
||||
}
|
||||
|
||||
internal static string? NormalizeConditionKey(string conditionText)
|
||||
{
|
||||
var normalized = CollapseWhitespace(conditionText)
|
||||
|
||||
Reference in New Issue
Block a user