Promote Void OCR source
This commit is contained in:
BIN
sources/Void OCR CleanScan v2.pdf
Normal file
BIN
sources/Void OCR CleanScan v2.pdf
Normal file
Binary file not shown.
@@ -174,7 +174,7 @@
|
||||
"family": "standard",
|
||||
"extractionMethod": "ocr",
|
||||
"axisTemplateSlug": "mana-standard-19",
|
||||
"pdfPath": "sources/Void.pdf",
|
||||
"pdfPath": "sources/Void OCR CleanScan v2.pdf",
|
||||
"enabled": true
|
||||
}
|
||||
]
|
||||
|
||||
Binary file not shown.
@@ -0,0 +1,32 @@
|
||||
using RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
namespace RolemasterDb.ImportTool.Tests;
|
||||
|
||||
public sealed class CriticalTableParserSupportTests
|
||||
{
|
||||
[Theory]
|
||||
[InlineData("7-70", "67-70")]
|
||||
[InlineData("6-10", "06-10")]
|
||||
[InlineData("1-95", "91-95")]
|
||||
public void NormalizeRollBandLabel_repairs_known_ocr_missing_leading_digit_cases(string damagedLabel, string expectedLabel)
|
||||
{
|
||||
Assert.True(CriticalTableParserSupport.IsRollBandLabel(damagedLabel));
|
||||
Assert.Equal(expectedLabel, CriticalTableParserSupport.NormalizeRollBandLabel(damagedLabel));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FindRowLabelFragments_keeps_repaired_ocr_row_labels_in_sequence()
|
||||
{
|
||||
List<PositionedTextFragment> fragments =
|
||||
[
|
||||
new PositionedTextFragment(1, 100, 10, 20, 10, "61-65"),
|
||||
new PositionedTextFragment(1, 120, 10, 20, 10, "7-70"),
|
||||
new PositionedTextFragment(1, 140, 10, 20, 10, "71-75")
|
||||
];
|
||||
|
||||
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(fragments, leftCutoff: 100, bodyStartTop: 90, keyTop: 200);
|
||||
var labels = rowLabelFragments.Select(item => CriticalTableParserSupport.NormalizeRollBandLabel(item.Text)).ToList();
|
||||
|
||||
Assert.Equal(["61-65", "67-70", "71-75"], labels);
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Xml;
|
||||
using System.Xml.Linq;
|
||||
|
||||
using RolemasterDb.App.Domain;
|
||||
using SharedParsing = RolemasterDb.CriticalParsing;
|
||||
|
||||
@@ -21,34 +20,41 @@ internal static class CriticalTableParserSupport
|
||||
private static readonly Regex NumericAffixLineRegex = new(@"^\d+(?:H|∑|∏|π|∫|\s*[–-])", RegexOptions.Compiled);
|
||||
private static readonly Regex StandaloneModifierAffixLineRegex = new(@"^(?:\d+)?\((?:\+|-|–)\d+\)$", RegexOptions.Compiled);
|
||||
private static readonly Regex BoundaryBonusLineRegex = new(@"^(?:all allies|all foe's allies|all foes|all opponents)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex RollBandLabelRegex = new(@"^\d{2,3}(?:-\d{2,3})?$|^\d{2,3}\+$", RegexOptions.Compiled);
|
||||
|
||||
// Left-edge OCR occasionally drops the first digit of the lower bound on standard-table row labels.
|
||||
private static readonly IReadOnlyDictionary<string, string> OcrDamagedStandardRollBandLabels = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["1-05"] = "01-05",
|
||||
["6-10"] = "06-10",
|
||||
["1-15"] = "11-15",
|
||||
["6-20"] = "16-20",
|
||||
["1-35"] = "21-35",
|
||||
["6-45"] = "36-45",
|
||||
["6-50"] = "46-50",
|
||||
["1-55"] = "51-55",
|
||||
["6-60"] = "56-60",
|
||||
["1-65"] = "61-65",
|
||||
["7-70"] = "67-70",
|
||||
["1-75"] = "71-75",
|
||||
["6-80"] = "76-80",
|
||||
["1-85"] = "81-85",
|
||||
["6-90"] = "86-90",
|
||||
["1-95"] = "91-95"
|
||||
};
|
||||
|
||||
internal static List<PositionedTextFragment> LoadFragments(string xmlContent)
|
||||
{
|
||||
using var stringReader = new StringReader(xmlContent);
|
||||
using var xmlReader = XmlReader.Create(
|
||||
stringReader,
|
||||
new XmlReaderSettings
|
||||
{
|
||||
DtdProcessing = DtdProcessing.Ignore
|
||||
});
|
||||
using var xmlReader = XmlReader.Create(stringReader, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore });
|
||||
|
||||
var document = XDocument.Load(xmlReader);
|
||||
|
||||
var fragments = document.Descendants("page")
|
||||
.SelectMany(page =>
|
||||
{
|
||||
var pageNumber = int.Parse(page.Attribute("number")?.Value ?? "1");
|
||||
return page.Elements("text")
|
||||
.Select(item => new PositionedTextFragment(
|
||||
pageNumber,
|
||||
int.Parse(item.Attribute("top")?.Value ?? throw new InvalidOperationException("Missing text top attribute.")),
|
||||
int.Parse(item.Attribute("left")?.Value ?? throw new InvalidOperationException("Missing text left attribute.")),
|
||||
int.Parse(item.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing text width attribute.")),
|
||||
int.Parse(item.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing text height attribute.")),
|
||||
NormalizeText(string.Concat(item.DescendantNodes().OfType<XText>().Select(node => node.Value)))))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item.Text));
|
||||
})
|
||||
.ToList();
|
||||
var fragments = document.Descendants("page").SelectMany(page =>
|
||||
{
|
||||
var pageNumber = int.Parse(page.Attribute("number")?.Value ?? "1");
|
||||
return page.Elements("text").Select(item => new PositionedTextFragment(pageNumber, int.Parse(item.Attribute("top")?.Value ?? throw new InvalidOperationException("Missing text top attribute.")), int.Parse(item.Attribute("left")?.Value ?? throw new InvalidOperationException("Missing text left attribute.")), int.Parse(item.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing text width attribute.")), int.Parse(item.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing text height attribute.")), NormalizeText(string.Concat(item.DescendantNodes().OfType<XText>().Select(node => node.Value))))).Where(item => !string.IsNullOrWhiteSpace(item.Text));
|
||||
}).ToList();
|
||||
|
||||
return RemoveRedundantContainedFragments(fragments);
|
||||
}
|
||||
@@ -56,38 +62,16 @@ internal static class CriticalTableParserSupport
|
||||
internal static List<ParsedPdfPageGeometry> LoadPageGeometries(string xmlContent)
|
||||
{
|
||||
using var stringReader = new StringReader(xmlContent);
|
||||
using var xmlReader = XmlReader.Create(
|
||||
stringReader,
|
||||
new XmlReaderSettings
|
||||
{
|
||||
DtdProcessing = DtdProcessing.Ignore
|
||||
});
|
||||
using var xmlReader = XmlReader.Create(stringReader, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore });
|
||||
|
||||
var document = XDocument.Load(xmlReader);
|
||||
|
||||
return document.Descendants("page")
|
||||
.Select(page => new ParsedPdfPageGeometry(
|
||||
int.Parse(page.Attribute("number")?.Value ?? "1"),
|
||||
int.Parse(page.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing page width attribute.")),
|
||||
int.Parse(page.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing page height attribute."))))
|
||||
.ToList();
|
||||
return document.Descendants("page").Select(page => new ParsedPdfPageGeometry(int.Parse(page.Attribute("number")?.Value ?? "1"), int.Parse(page.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing page width attribute.")), int.Parse(page.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing page height attribute.")))).ToList();
|
||||
}
|
||||
|
||||
internal static List<PositionedTextFragment> FindRowLabelFragments(
|
||||
IReadOnlyList<PositionedTextFragment> fragments,
|
||||
int leftCutoff,
|
||||
int bodyStartTop,
|
||||
int keyTop)
|
||||
internal static List<PositionedTextFragment> FindRowLabelFragments(IReadOnlyList<PositionedTextFragment> fragments, int leftCutoff, int bodyStartTop, int keyTop)
|
||||
{
|
||||
var candidates = fragments
|
||||
.Where(item =>
|
||||
item.Left < leftCutoff &&
|
||||
item.Top >= bodyStartTop &&
|
||||
item.Top < keyTop - FooterLabelExclusionGap &&
|
||||
(IsRollBandLabel(item.Text) || LooksLikeSplitRollBandStart(item.Text)))
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
var candidates = fragments.Where(item => item.Left < leftCutoff && item.Top >= bodyStartTop && item.Top < keyTop - FooterLabelExclusionGap && (IsRollBandLabel(item.Text) || LooksLikeSplitRollBandStart(item.Text))).OrderBy(item => item.Top).ThenBy(item => item.Left).ToList();
|
||||
|
||||
var merged = new List<PositionedTextFragment>();
|
||||
|
||||
@@ -112,9 +96,7 @@ internal static class CriticalTableParserSupport
|
||||
foreach (var candidate in merged)
|
||||
{
|
||||
var previous = deduped.LastOrDefault();
|
||||
if (previous is not null &&
|
||||
string.Equals(NormalizeRollBandLabel(previous.Text), NormalizeRollBandLabel(candidate.Text), StringComparison.OrdinalIgnoreCase) &&
|
||||
Math.Abs(previous.Top - candidate.Top) <= RowLabelDuplicateTolerance)
|
||||
if (previous is not null && string.Equals(NormalizeRollBandLabel(previous.Text), NormalizeRollBandLabel(candidate.Text), StringComparison.OrdinalIgnoreCase) && Math.Abs(previous.Top - candidate.Top) <= RowLabelDuplicateTolerance)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@@ -126,14 +108,13 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
|
||||
internal static bool IsRollBandLabel(string value) =>
|
||||
Regex.IsMatch(value.Trim(), @"^\d{2,3}(?:\s*-\s*\d{2,3})?$|^\d{2,3}\+$");
|
||||
TryNormalizeRollBandLabel(value, out _);
|
||||
|
||||
internal static bool IsPotentialRowLabelFragment(PositionedTextFragment fragment, int leftCutoff) =>
|
||||
fragment.Left < leftCutoff &&
|
||||
(IsRollBandLabel(fragment.Text) || LooksLikeSplitRollBandStart(fragment.Text));
|
||||
fragment.Left < leftCutoff && (IsRollBandLabel(fragment.Text) || LooksLikeSplitRollBandStart(fragment.Text));
|
||||
|
||||
internal static string NormalizeRollBandLabel(string label) =>
|
||||
Regex.Replace(CollapseWhitespace(label), @"\s*-\s*", "-");
|
||||
TryNormalizeRollBandLabel(label, out var normalized) ? normalized : Regex.Replace(CollapseWhitespace(label), @"\s*-\s*", "-");
|
||||
|
||||
internal static ParsedCriticalRollBand CreateRollBand(string label, int sortOrder)
|
||||
{
|
||||
@@ -144,9 +125,7 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
|
||||
var parts = normalizedLabel.Split('-', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||
return parts.Length == 1
|
||||
? new ParsedCriticalRollBand(normalizedLabel, int.Parse(parts[0]), int.Parse(parts[0]), sortOrder)
|
||||
: new ParsedCriticalRollBand(normalizedLabel, int.Parse(parts[0]), int.Parse(parts[1]), sortOrder);
|
||||
return parts.Length == 1 ? new ParsedCriticalRollBand(normalizedLabel, int.Parse(parts[0]), int.Parse(parts[0]), sortOrder) : new ParsedCriticalRollBand(normalizedLabel, int.Parse(parts[0]), int.Parse(parts[1]), sortOrder);
|
||||
}
|
||||
|
||||
internal static string ResolveColumn(double centerX, IReadOnlyList<(string Key, double CenterX)> columns)
|
||||
@@ -178,12 +157,7 @@ internal static class CriticalTableParserSupport
|
||||
lines[^1].Add(fragment);
|
||||
}
|
||||
|
||||
return lines
|
||||
.Select(line => new ColumnarCellLine(
|
||||
CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))),
|
||||
line.OrderBy(item => item.Left).ToList()))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item.Text))
|
||||
.ToList();
|
||||
return lines.Select(line => new ColumnarCellLine(CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))), line.OrderBy(item => item.Left).ToList())).Where(item => !string.IsNullOrWhiteSpace(item.Text)).ToList();
|
||||
}
|
||||
|
||||
internal static bool IsAffixLikeLine(string line, IReadOnlySet<string> affixLegendSymbols)
|
||||
@@ -204,8 +178,7 @@ internal static class CriticalTableParserSupport
|
||||
return true;
|
||||
}
|
||||
|
||||
if (affixLegendSymbols.Count > 0 &&
|
||||
affixLegendSymbols.Any(symbol => value.Contains(symbol, StringComparison.Ordinal)))
|
||||
if (affixLegendSymbols.Count > 0 && affixLegendSymbols.Any(symbol => value.Contains(symbol, StringComparison.Ordinal)))
|
||||
{
|
||||
if (value.Any(char.IsDigit))
|
||||
{
|
||||
@@ -218,13 +191,7 @@ internal static class CriticalTableParserSupport
|
||||
remainder = remainder.Replace(symbol, string.Empty, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
remainder = remainder
|
||||
.Replace("+", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("-", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("–", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("(", string.Empty, StringComparison.Ordinal)
|
||||
.Replace(")", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("/", string.Empty, StringComparison.Ordinal);
|
||||
remainder = remainder.Replace("+", string.Empty, StringComparison.Ordinal).Replace("-", string.Empty, StringComparison.Ordinal).Replace("–", string.Empty, StringComparison.Ordinal).Replace("(", string.Empty, StringComparison.Ordinal).Replace(")", string.Empty, StringComparison.Ordinal).Replace("/", string.Empty, StringComparison.Ordinal);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(remainder))
|
||||
{
|
||||
@@ -232,15 +199,7 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
}
|
||||
|
||||
return value.StartsWith("+", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u2211", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u220F", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u03C0", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u222B", StringComparison.Ordinal) ||
|
||||
StandaloneModifierAffixLineRegex.IsMatch(value) ||
|
||||
NumericAffixLineRegex.IsMatch(value) ||
|
||||
value.Contains(" - ", StringComparison.Ordinal) ||
|
||||
value.Contains(" – ", StringComparison.Ordinal);
|
||||
return value.StartsWith("+", StringComparison.Ordinal) || value.StartsWith("\u2211", StringComparison.Ordinal) || value.StartsWith("\u220F", StringComparison.Ordinal) || value.StartsWith("\u03C0", StringComparison.Ordinal) || value.StartsWith("\u222B", StringComparison.Ordinal) || StandaloneModifierAffixLineRegex.IsMatch(value) || NumericAffixLineRegex.IsMatch(value) || value.Contains(" - ", StringComparison.Ordinal) || value.Contains(" – ", StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
internal static int CountLineTypeSegments(IReadOnlyList<string> lines, IReadOnlySet<string> affixLegendSymbols)
|
||||
@@ -274,23 +233,11 @@ internal static class CriticalTableParserSupport
|
||||
return false;
|
||||
}
|
||||
|
||||
return normalized.StartsWith("with ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.StartsWith("w/ ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.StartsWith("without ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.StartsWith("if ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.StartsWith("while ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.StartsWith("until ", StringComparison.OrdinalIgnoreCase) ||
|
||||
normalized.StartsWith("unless ", StringComparison.OrdinalIgnoreCase);
|
||||
return normalized.StartsWith("with ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("w/ ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("without ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("if ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("while ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("until ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("unless ", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
internal static string NormalizeText(string value) =>
|
||||
value
|
||||
.Replace('\u00a0', ' ')
|
||||
.Replace('\r', ' ')
|
||||
.Replace('\n', ' ')
|
||||
.Replace('’', '\'')
|
||||
.Trim();
|
||||
value.Replace('\u00a0', ' ').Replace('\r', ' ').Replace('\n', ' ').Replace('’', '\'').Trim();
|
||||
|
||||
private static List<PositionedTextFragment> RemoveRedundantContainedFragments(IReadOnlyList<PositionedTextFragment> fragments)
|
||||
{
|
||||
@@ -298,10 +245,7 @@ internal static class CriticalTableParserSupport
|
||||
|
||||
foreach (var group in fragments.GroupBy(item => (item.PageNumber, item.Top, item.Height)))
|
||||
{
|
||||
var ordered = group
|
||||
.OrderByDescending(item => item.Width)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
var ordered = group.OrderByDescending(item => item.Width).ThenBy(item => item.Left).ToList();
|
||||
|
||||
for (var index = 0; index < ordered.Count; index++)
|
||||
{
|
||||
@@ -314,9 +258,7 @@ internal static class CriticalTableParserSupport
|
||||
for (var candidateIndex = index + 1; candidateIndex < ordered.Count; candidateIndex++)
|
||||
{
|
||||
var candidate = ordered[candidateIndex];
|
||||
if (candidate.Width > container.Width ||
|
||||
!container.Text.Contains(candidate.Text, StringComparison.Ordinal) ||
|
||||
!IsHorizontallyContained(candidate, container))
|
||||
if (candidate.Width > container.Width || !container.Text.Contains(candidate.Text, StringComparison.Ordinal) || !IsHorizontallyContained(candidate, container))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@@ -326,9 +268,7 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
}
|
||||
|
||||
return fragments
|
||||
.Where(item => !redundant.Contains(item))
|
||||
.ToList();
|
||||
return fragments.Where(item => !redundant.Contains(item)).ToList();
|
||||
}
|
||||
|
||||
private static bool IsHorizontallyContained(PositionedTextFragment candidate, PositionedTextFragment container)
|
||||
@@ -338,29 +278,19 @@ internal static class CriticalTableParserSupport
|
||||
var candidateRight = candidate.Left + candidate.Width;
|
||||
var containerRight = container.Left + container.Width;
|
||||
|
||||
return candidate.Left >= container.Left - containmentTolerance &&
|
||||
candidateRight <= containerRight + containmentTolerance;
|
||||
return candidate.Left >= container.Left - containmentTolerance && candidateRight <= containerRight + containmentTolerance;
|
||||
}
|
||||
|
||||
internal static string? NormalizeConditionKey(string conditionText)
|
||||
{
|
||||
var normalized = CollapseWhitespace(conditionText)
|
||||
.ToLowerInvariant()
|
||||
.Replace("w/o", "without", StringComparison.Ordinal)
|
||||
.Replace("w/", "with", StringComparison.Ordinal);
|
||||
var normalized = CollapseWhitespace(conditionText).ToLowerInvariant().Replace("w/o", "without", StringComparison.Ordinal).Replace("w/", "with", StringComparison.Ordinal);
|
||||
normalized = Regex.Replace(normalized, @"[^a-z0-9]+", "_");
|
||||
normalized = normalized.Trim('_');
|
||||
return normalized.Length == 0 ? null : normalized;
|
||||
}
|
||||
|
||||
internal static int FindKeyTop(IReadOnlyList<PositionedTextFragment> fragments) =>
|
||||
fragments
|
||||
.Where(item =>
|
||||
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
|
||||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
|
||||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
|
||||
.Select(item => (int?)item.Top)
|
||||
.Min() ?? int.MaxValue;
|
||||
fragments.Where(item => string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) || item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) || item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase)).Select(item => (int?)item.Top).Min() ?? int.MaxValue;
|
||||
|
||||
internal static AffixLegend ParseAffixLegend(IReadOnlyList<PositionedTextFragment> fragments, int keyTop)
|
||||
{
|
||||
@@ -369,13 +299,7 @@ internal static class CriticalTableParserSupport
|
||||
return AffixLegend.Empty;
|
||||
}
|
||||
|
||||
var footerLines = GroupByTop(fragments
|
||||
.Where(item => item.Top >= keyTop - TopGroupingTolerance)
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList())
|
||||
.Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.ToList();
|
||||
var footerLines = GroupByTop(fragments.Where(item => item.Top >= keyTop - TopGroupingTolerance).OrderBy(item => item.Top).ThenBy(item => item.Left).ToList()).Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text)))).ToList();
|
||||
|
||||
var footerText = string.Join(' ', footerLines);
|
||||
var symbolEffects = new Dictionary<string, string>(StringComparer.Ordinal);
|
||||
@@ -389,22 +313,10 @@ internal static class CriticalTableParserSupport
|
||||
AddLegendMatch(symbolEffects, footerText, CriticalEffectCodes.BleedPerRound, @"bleed\s*=\s*(\S)");
|
||||
AddLegendMatch(symbolEffects, footerText, CriticalEffectCodes.BleedPerRound, @"(\S)\s*=\s*bleed");
|
||||
|
||||
return new AffixLegend(
|
||||
symbolEffects,
|
||||
footerText.Contains("powerpoint modification", StringComparison.OrdinalIgnoreCase)
|
||||
? ["P"]
|
||||
: [],
|
||||
supportsFoePenalty: footerText.Contains("foe has", StringComparison.OrdinalIgnoreCase) &&
|
||||
footerText.Contains("penalty", StringComparison.OrdinalIgnoreCase),
|
||||
supportsAttackerBonus: footerText.Contains("attacker gets", StringComparison.OrdinalIgnoreCase) &&
|
||||
footerText.Contains("next round", StringComparison.OrdinalIgnoreCase),
|
||||
supportsPowerPointModifier: footerText.Contains("powerpoint modification", StringComparison.OrdinalIgnoreCase));
|
||||
return new AffixLegend(symbolEffects, footerText.Contains("powerpoint modification", StringComparison.OrdinalIgnoreCase) ? ["P"] : [], supportsFoePenalty: footerText.Contains("foe has", StringComparison.OrdinalIgnoreCase) && footerText.Contains("penalty", StringComparison.OrdinalIgnoreCase), supportsAttackerBonus: footerText.Contains("attacker gets", StringComparison.OrdinalIgnoreCase) && footerText.Contains("next round", StringComparison.OrdinalIgnoreCase), supportsPowerPointModifier: footerText.Contains("powerpoint modification", StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
internal static List<PositionedTextFragment> SplitBoundaryCrossingFragments(
|
||||
IReadOnlyList<PositionedTextFragment> bodyFragments,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
||||
IReadOnlySet<string> affixLegendSymbols)
|
||||
internal static List<PositionedTextFragment> SplitBoundaryCrossingFragments(IReadOnlyList<PositionedTextFragment> bodyFragments, IReadOnlyList<(string Key, double CenterX)> columnCenters, IReadOnlySet<string> affixLegendSymbols)
|
||||
{
|
||||
var splitFragments = new List<PositionedTextFragment>(bodyFragments.Count);
|
||||
|
||||
@@ -416,23 +328,15 @@ internal static class CriticalTableParserSupport
|
||||
return splitFragments;
|
||||
}
|
||||
|
||||
internal static List<(int Top, bool IsAffixLike)> BuildBodyLines(
|
||||
IReadOnlyList<PositionedTextFragment> bodyFragments,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
||||
IReadOnlySet<string> affixLegendSymbols)
|
||||
internal static List<(int Top, bool IsAffixLike)> BuildBodyLines(IReadOnlyList<PositionedTextFragment> bodyFragments, IReadOnlyList<(string Key, double CenterX)> columnCenters, IReadOnlySet<string> affixLegendSymbols)
|
||||
{
|
||||
var bodyLines = new List<(int Top, bool IsAffixLike)>();
|
||||
|
||||
foreach (var lineFragments in GroupByTop(bodyFragments.OrderBy(item => item.Top).ThenBy(item => item.Left).ToList()))
|
||||
{
|
||||
var columnTexts = lineFragments
|
||||
.GroupBy(item => ResolveColumn(item.CenterX, columnCenters), StringComparer.OrdinalIgnoreCase)
|
||||
.Select(group => CollapseWhitespace(string.Join(' ', group.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item))
|
||||
.ToList();
|
||||
var columnTexts = lineFragments.GroupBy(item => ResolveColumn(item.CenterX, columnCenters), StringComparer.OrdinalIgnoreCase).Select(group => CollapseWhitespace(string.Join(' ', group.OrderBy(item => item.Left).Select(item => item.Text)))).Where(item => !string.IsNullOrWhiteSpace(item)).ToList();
|
||||
|
||||
var isAffixLike = columnTexts.Count > 0 &&
|
||||
columnTexts.All(text => IsAffixLikeLine(text, affixLegendSymbols) || IsBoundaryBonusLine(text));
|
||||
var isAffixLike = columnTexts.Count > 0 && columnTexts.All(text => IsAffixLikeLine(text, affixLegendSymbols) || IsBoundaryBonusLine(text));
|
||||
|
||||
bodyLines.Add((lineFragments[0].Top, isAffixLike));
|
||||
}
|
||||
@@ -447,8 +351,7 @@ internal static class CriticalTableParserSupport
|
||||
return false;
|
||||
}
|
||||
|
||||
return fragment.Top >= keyTop - FooterPageNumberExclusionGap &&
|
||||
Regex.IsMatch(fragment.Text, @"^\d{2,3}$");
|
||||
return fragment.Top >= keyTop - FooterPageNumberExclusionGap && Regex.IsMatch(fragment.Text, @"^\d{2,3}$");
|
||||
}
|
||||
|
||||
internal static IEnumerable<List<PositionedTextFragment>> GroupByTop(IReadOnlyList<PositionedTextFragment> fragments)
|
||||
@@ -470,10 +373,7 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
|
||||
internal static List<RowAnchor> CreateRowAnchors(IReadOnlyList<PositionedTextFragment> rowLabelFragments) =>
|
||||
rowLabelFragments
|
||||
.OrderBy(item => item.Top)
|
||||
.Select((item, index) => new RowAnchor(NormalizeRollBandLabel(item.Text), item.Top, index + 1))
|
||||
.ToList();
|
||||
rowLabelFragments.OrderBy(item => item.Top).Select((item, index) => new RowAnchor(NormalizeRollBandLabel(item.Text), item.Top, index + 1)).ToList();
|
||||
|
||||
internal static int ResolveBodyStartTop(int headerTop, IReadOnlyList<RowAnchor> rowAnchors)
|
||||
{
|
||||
@@ -482,32 +382,12 @@ internal static class CriticalTableParserSupport
|
||||
return headerTop + HeaderToBodyMinimumGap;
|
||||
}
|
||||
|
||||
return Math.Min(
|
||||
headerTop + HeaderToBodyMinimumGap,
|
||||
Math.Max(
|
||||
headerTop + HeaderToRowLabelMinimumGap,
|
||||
rowAnchors[0].Top - HeaderToRowLabelMinimumGap - TopGroupingTolerance));
|
||||
return Math.Min(headerTop + HeaderToBodyMinimumGap, Math.Max(headerTop + HeaderToRowLabelMinimumGap, rowAnchors[0].Top - HeaderToRowLabelMinimumGap - TopGroupingTolerance));
|
||||
}
|
||||
|
||||
internal static List<PositionedTextFragment> BuildBodyFragments(
|
||||
IReadOnlyList<PositionedTextFragment> fragments,
|
||||
int bodyStartTop,
|
||||
int keyTop,
|
||||
int leftCutoff,
|
||||
IReadOnlyList<RowAnchor> rowAnchors,
|
||||
IReadOnlyCollection<PositionedTextFragment> excludedFragments,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
||||
IReadOnlySet<string> affixLegendSymbols)
|
||||
internal static List<PositionedTextFragment> BuildBodyFragments(IReadOnlyList<PositionedTextFragment> fragments, int bodyStartTop, int keyTop, int leftCutoff, IReadOnlyList<RowAnchor> rowAnchors, IReadOnlyCollection<PositionedTextFragment> excludedFragments, IReadOnlyList<(string Key, double CenterX)> columnCenters, IReadOnlySet<string> affixLegendSymbols)
|
||||
{
|
||||
var bodyFragments = fragments
|
||||
.Where(item =>
|
||||
item.Top >= bodyStartTop &&
|
||||
item.Top < keyTop - TopGroupingTolerance &&
|
||||
!IsFooterPageNumberFragment(item, keyTop) &&
|
||||
!IsPotentialRowLabelFragment(item, leftCutoff) &&
|
||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
|
||||
!excludedFragments.Contains(item))
|
||||
.ToList();
|
||||
var bodyFragments = fragments.Where(item => item.Top >= bodyStartTop && item.Top < keyTop - TopGroupingTolerance && !IsFooterPageNumberFragment(item, keyTop) && !IsPotentialRowLabelFragment(item, leftCutoff) && !rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) && !excludedFragments.Contains(item)).ToList();
|
||||
|
||||
return SplitBoundaryCrossingFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
}
|
||||
@@ -515,23 +395,14 @@ internal static class CriticalTableParserSupport
|
||||
internal static void RepairLeadingAffixLeakage(List<ColumnarCellEntry> cellEntries, IReadOnlySet<string> affixLegendSymbols)
|
||||
{
|
||||
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
||||
var axes = cellEntries
|
||||
.Select(item => (item.GroupKey, item.ColumnKey))
|
||||
.Distinct()
|
||||
.ToList();
|
||||
var axes = cellEntries.Select(item => (item.GroupKey, item.ColumnKey)).Distinct().ToList();
|
||||
|
||||
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
|
||||
{
|
||||
foreach (var (groupKey, columnKey) in axes)
|
||||
{
|
||||
var current = cellEntries.SingleOrDefault(item =>
|
||||
item.RowIndex == rowIndex &&
|
||||
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
||||
var next = cellEntries.SingleOrDefault(item =>
|
||||
item.RowIndex == rowIndex + 1 &&
|
||||
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
||||
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) && string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
||||
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) && string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
||||
if (current is null || next is null)
|
||||
{
|
||||
continue;
|
||||
@@ -554,15 +425,9 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
}
|
||||
|
||||
internal static int ResolveRowBoundaryTop(
|
||||
RowAnchor current,
|
||||
RowAnchor next,
|
||||
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
||||
internal static int ResolveRowBoundaryTop(RowAnchor current, RowAnchor next, IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
||||
{
|
||||
var linesBetweenLabels = bodyLines
|
||||
.Where(item => item.Top >= current.Top && item.Top < next.Top)
|
||||
.OrderBy(item => item.Top)
|
||||
.ToList();
|
||||
var linesBetweenLabels = bodyLines.Where(item => item.Top >= current.Top && item.Top < next.Top).OrderBy(item => item.Top).ToList();
|
||||
|
||||
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
|
||||
{
|
||||
@@ -575,14 +440,7 @@ internal static class CriticalTableParserSupport
|
||||
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
||||
}
|
||||
|
||||
internal static void BuildParsedArtifacts(
|
||||
IReadOnlyList<ColumnarCellEntry> cellEntries,
|
||||
AffixLegend affixLegend,
|
||||
List<ParsedCriticalCellArtifact> parsedCells,
|
||||
List<ParsedCriticalResult> parsedResults,
|
||||
List<string> validationErrors,
|
||||
List<string>? validationWarnings = null,
|
||||
bool downgradeCellContentValidationToWarnings = false)
|
||||
internal static void BuildParsedArtifacts(IReadOnlyList<ColumnarCellEntry> cellEntries, AffixLegend affixLegend, List<ParsedCriticalCellArtifact> parsedCells, List<ParsedCriticalResult> parsedResults, List<string> validationErrors, List<string>? validationWarnings = null, bool downgradeCellContentValidationToWarnings = false)
|
||||
{
|
||||
var sharedLegend = ToSharedAffixLegend(affixLegend);
|
||||
|
||||
@@ -591,8 +449,7 @@ internal static class CriticalTableParserSupport
|
||||
var lineTexts = cellEntry.Lines.Select(line => line.Text).ToList();
|
||||
var content = SharedParsing.CriticalCellTextParser.Parse(lineTexts, sharedLegend);
|
||||
var sourceBounds = BuildSourceBounds(cellEntry.Lines.SelectMany(line => line.Fragments).ToList());
|
||||
var contentIssues = content.ValidationErrors.Select(error =>
|
||||
$"Cell '{BuildCellIdentifier(cellEntry)}': {error}");
|
||||
var contentIssues = content.ValidationErrors.Select(error => $"Cell '{BuildCellIdentifier(cellEntry)}': {error}");
|
||||
if (downgradeCellContentValidationToWarnings)
|
||||
{
|
||||
validationWarnings?.AddRange(contentIssues);
|
||||
@@ -605,29 +462,9 @@ internal static class CriticalTableParserSupport
|
||||
var effects = content.Effects.Select(ToImportToolEffect).ToList();
|
||||
var branches = content.Branches.Select(ToImportToolBranch).ToList();
|
||||
|
||||
parsedCells.Add(new ParsedCriticalCellArtifact(
|
||||
cellEntry.GroupKey,
|
||||
cellEntry.RollBandLabel,
|
||||
cellEntry.ColumnKey,
|
||||
lineTexts,
|
||||
content.BaseLines,
|
||||
content.RawCellText,
|
||||
content.DescriptionText,
|
||||
content.RawAffixText,
|
||||
effects,
|
||||
branches,
|
||||
sourceBounds));
|
||||
parsedCells.Add(new ParsedCriticalCellArtifact(cellEntry.GroupKey, cellEntry.RollBandLabel, cellEntry.ColumnKey, lineTexts, content.BaseLines, content.RawCellText, content.DescriptionText, content.RawAffixText, effects, branches, sourceBounds));
|
||||
|
||||
parsedResults.Add(new ParsedCriticalResult(
|
||||
cellEntry.GroupKey,
|
||||
cellEntry.ColumnKey,
|
||||
cellEntry.RollBandLabel,
|
||||
content.RawCellText,
|
||||
content.DescriptionText,
|
||||
content.RawAffixText,
|
||||
effects,
|
||||
branches,
|
||||
sourceBounds));
|
||||
parsedResults.Add(new ParsedCriticalResult(cellEntry.GroupKey, cellEntry.ColumnKey, cellEntry.RollBandLabel, content.RawCellText, content.DescriptionText, content.RawAffixText, effects, branches, sourceBounds));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -649,55 +486,42 @@ internal static class CriticalTableParserSupport
|
||||
var right = fragments.Max(fragment => fragment.Left + fragment.Width);
|
||||
var bottom = fragments.Max(fragment => fragment.Top + fragment.Height);
|
||||
|
||||
return new ParsedCriticalSourceRect(
|
||||
pageNumber,
|
||||
left,
|
||||
top,
|
||||
Math.Max(1, right - left),
|
||||
Math.Max(1, bottom - top));
|
||||
return new ParsedCriticalSourceRect(pageNumber, left, top, Math.Max(1, right - left), Math.Max(1, bottom - top));
|
||||
}
|
||||
|
||||
private static SharedParsing.AffixLegend ToSharedAffixLegend(AffixLegend affixLegend) =>
|
||||
new(
|
||||
affixLegend.SymbolEffects,
|
||||
affixLegend.ClassificationSymbols.Except(affixLegend.EffectSymbols).ToList(),
|
||||
affixLegend.SupportsFoePenalty,
|
||||
affixLegend.SupportsAttackerBonus,
|
||||
affixLegend.SupportsPowerPointModifier);
|
||||
new(affixLegend.SymbolEffects, affixLegend.ClassificationSymbols.Except(affixLegend.EffectSymbols).ToList(), affixLegend.SupportsFoePenalty, affixLegend.SupportsAttackerBonus, affixLegend.SupportsPowerPointModifier);
|
||||
|
||||
private static ParsedCriticalEffect ToImportToolEffect(SharedParsing.ParsedCriticalEffect effect) =>
|
||||
new(
|
||||
effect.EffectCode,
|
||||
effect.Target,
|
||||
effect.ValueInteger,
|
||||
effect.ValueExpression,
|
||||
effect.DurationRounds,
|
||||
effect.PerRound,
|
||||
effect.Modifier,
|
||||
effect.BodyPart,
|
||||
effect.IsPermanent,
|
||||
effect.SourceType,
|
||||
effect.SourceText);
|
||||
new(effect.EffectCode, effect.Target, effect.ValueInteger, effect.ValueExpression, effect.DurationRounds, effect.PerRound, effect.Modifier, effect.BodyPart, effect.IsPermanent, effect.SourceType, effect.SourceText);
|
||||
|
||||
private static ParsedCriticalBranch ToImportToolBranch(SharedParsing.ParsedCriticalBranch branch) =>
|
||||
new(
|
||||
branch.BranchKind,
|
||||
branch.ConditionKey,
|
||||
branch.ConditionText,
|
||||
branch.RawText,
|
||||
branch.DescriptionText,
|
||||
branch.RawAffixText,
|
||||
branch.Effects.Select(ToImportToolEffect).ToList(),
|
||||
branch.SortOrder);
|
||||
new(branch.BranchKind, branch.ConditionKey, branch.ConditionText, branch.RawText, branch.DescriptionText, branch.RawAffixText, branch.Effects.Select(ToImportToolEffect).ToList(), branch.SortOrder);
|
||||
|
||||
private static string BuildCellIdentifier(ColumnarCellEntry cellEntry) =>
|
||||
cellEntry.GroupKey is null
|
||||
? $"{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}"
|
||||
: $"{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}";
|
||||
cellEntry.GroupKey is null ? $"{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}" : $"{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}";
|
||||
|
||||
private static bool LooksLikeSplitRollBandStart(string value) =>
|
||||
Regex.IsMatch(value.Trim(), @"^\d{2,3}\s*-$");
|
||||
|
||||
private static bool TryNormalizeRollBandLabel(string label, out string normalized)
|
||||
{
|
||||
normalized = Regex.Replace(CollapseWhitespace(label), @"\s*-\s*", "-");
|
||||
|
||||
if (RollBandLabelRegex.IsMatch(normalized))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (OcrDamagedStandardRollBandLabels.TryGetValue(normalized, out var repaired))
|
||||
{
|
||||
normalized = repaired;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool TryMergeSplitRollBand(IReadOnlyList<PositionedTextFragment> candidates, int index, out PositionedTextFragment mergedCandidate)
|
||||
{
|
||||
var current = candidates[index];
|
||||
@@ -708,11 +532,7 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
|
||||
var next = candidates[index + 1];
|
||||
if (current.PageNumber != next.PageNumber ||
|
||||
!Regex.IsMatch(next.Text.Trim(), @"^\d{2,3}$") ||
|
||||
next.Top <= current.Top ||
|
||||
next.Top - current.Top > RowLabelDuplicateTolerance + 5 ||
|
||||
Math.Abs(next.Left - current.Left) > 20)
|
||||
if (current.PageNumber != next.PageNumber || !Regex.IsMatch(next.Text.Trim(), @"^\d{2,3}$") || next.Top <= current.Top || next.Top - current.Top > RowLabelDuplicateTolerance + 5 || Math.Abs(next.Left - current.Left) > 20)
|
||||
{
|
||||
mergedCandidate = null!;
|
||||
return false;
|
||||
@@ -722,28 +542,18 @@ internal static class CriticalTableParserSupport
|
||||
var mergedLabel = $"{startDigits}-{next.Text.Trim()}";
|
||||
var right = Math.Max(current.Left + current.Width, next.Left + next.Width);
|
||||
|
||||
mergedCandidate = new PositionedTextFragment(
|
||||
current.PageNumber,
|
||||
current.Top,
|
||||
Math.Min(current.Left, next.Left),
|
||||
right - Math.Min(current.Left, next.Left),
|
||||
Math.Max(current.Height, next.Height),
|
||||
mergedLabel);
|
||||
mergedCandidate = new PositionedTextFragment(current.PageNumber, current.Top, Math.Min(current.Left, next.Left), right - Math.Min(current.Left, next.Left), Math.Max(current.Height, next.Height), mergedLabel);
|
||||
return true;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<PositionedTextFragment> SplitBoundaryCrossingFragment(
|
||||
PositionedTextFragment fragment,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
||||
IReadOnlySet<string> affixLegendSymbols)
|
||||
private static IReadOnlyList<PositionedTextFragment> SplitBoundaryCrossingFragment(PositionedTextFragment fragment, IReadOnlyList<(string Key, double CenterX)> columnCenters, IReadOnlySet<string> affixLegendSymbols)
|
||||
{
|
||||
if (!CrossesColumnBoundary(fragment, columnCenters))
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
if (IsAffixLikeLine(fragment.Text, affixLegendSymbols) &&
|
||||
fragment.Text.Contains(" ", StringComparison.Ordinal))
|
||||
if (IsAffixLikeLine(fragment.Text, affixLegendSymbols) && fragment.Text.Contains(" ", StringComparison.Ordinal))
|
||||
{
|
||||
return BuildSplitFragmentsFromMatches(fragment, MultiFragmentSplitRegex.Matches(fragment.Text), columnCenters);
|
||||
}
|
||||
@@ -756,10 +566,7 @@ internal static class CriticalTableParserSupport
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
private static IReadOnlyList<PositionedTextFragment> BuildSplitFragmentsFromMatches(
|
||||
PositionedTextFragment fragment,
|
||||
MatchCollection matches,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
||||
private static IReadOnlyList<PositionedTextFragment> BuildSplitFragmentsFromMatches(PositionedTextFragment fragment, MatchCollection matches, IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
||||
{
|
||||
if (matches.Count < 2)
|
||||
{
|
||||
@@ -780,13 +587,7 @@ internal static class CriticalTableParserSupport
|
||||
var segmentLeft = fragment.Left + (int)Math.Round(characterWidth * match.Index);
|
||||
var segmentWidth = Math.Max(1, (int)Math.Round(characterWidth * match.Length));
|
||||
|
||||
splitFragments.Add(new PositionedTextFragment(
|
||||
fragment.PageNumber,
|
||||
fragment.Top,
|
||||
segmentLeft,
|
||||
segmentWidth,
|
||||
fragment.Height,
|
||||
segmentText));
|
||||
splitFragments.Add(new PositionedTextFragment(fragment.PageNumber, fragment.Top, segmentLeft, segmentWidth, fragment.Height, segmentText));
|
||||
}
|
||||
|
||||
if (splitFragments.Count < 2)
|
||||
@@ -795,20 +596,12 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
|
||||
var originalColumn = ResolveColumn(fragment.CenterX, columnCenters);
|
||||
var distinctColumns = splitFragments
|
||||
.Select(item => ResolveColumn(item.CenterX, columnCenters))
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
var distinctColumns = splitFragments.Select(item => ResolveColumn(item.CenterX, columnCenters)).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
|
||||
|
||||
return distinctColumns.Count > 1 || distinctColumns.Any(item => !string.Equals(item, originalColumn, StringComparison.OrdinalIgnoreCase))
|
||||
? splitFragments
|
||||
: [fragment];
|
||||
return distinctColumns.Count > 1 || distinctColumns.Any(item => !string.Equals(item, originalColumn, StringComparison.OrdinalIgnoreCase)) ? splitFragments : [fragment];
|
||||
}
|
||||
|
||||
private static bool TrySplitProseFragmentAtBoundaries(
|
||||
PositionedTextFragment fragment,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
||||
out IReadOnlyList<PositionedTextFragment> splitFragments)
|
||||
private static bool TrySplitProseFragmentAtBoundaries(PositionedTextFragment fragment, IReadOnlyList<(string Key, double CenterX)> columnCenters, out IReadOnlyList<PositionedTextFragment> splitFragments)
|
||||
{
|
||||
splitFragments = null!;
|
||||
|
||||
@@ -848,9 +641,7 @@ internal static class CriticalTableParserSupport
|
||||
return true;
|
||||
}
|
||||
|
||||
private static List<int> FindBoundarySplitIndexes(
|
||||
PositionedTextFragment fragment,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
||||
private static List<int> FindBoundarySplitIndexes(PositionedTextFragment fragment, IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
||||
{
|
||||
var characterWidth = fragment.Width / (double)Math.Max(fragment.Text.Length, 1);
|
||||
var fragmentRight = fragment.Left + fragment.Width;
|
||||
@@ -917,11 +708,7 @@ internal static class CriticalTableParserSupport
|
||||
return bestIndex;
|
||||
}
|
||||
|
||||
private static PositionedTextFragment? CreateFragmentSegment(
|
||||
PositionedTextFragment fragment,
|
||||
int startIndex,
|
||||
int length,
|
||||
double characterWidth)
|
||||
private static PositionedTextFragment? CreateFragmentSegment(PositionedTextFragment fragment, int startIndex, int length, double characterWidth)
|
||||
{
|
||||
if (length <= 0)
|
||||
{
|
||||
@@ -950,18 +737,10 @@ internal static class CriticalTableParserSupport
|
||||
var actualLength = trimmedEnd - trimmedStart + 1;
|
||||
var segmentText = CollapseWhitespace(fragment.Text.Substring(actualStart, actualLength));
|
||||
|
||||
return new PositionedTextFragment(
|
||||
fragment.PageNumber,
|
||||
fragment.Top,
|
||||
fragment.Left + (int)Math.Round(characterWidth * actualStart),
|
||||
Math.Max(1, (int)Math.Round(characterWidth * actualLength)),
|
||||
fragment.Height,
|
||||
segmentText);
|
||||
return new PositionedTextFragment(fragment.PageNumber, fragment.Top, fragment.Left + (int)Math.Round(characterWidth * actualStart), Math.Max(1, (int)Math.Round(characterWidth * actualLength)), fragment.Height, segmentText);
|
||||
}
|
||||
|
||||
private static bool CrossesColumnBoundary(
|
||||
PositionedTextFragment fragment,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
||||
private static bool CrossesColumnBoundary(PositionedTextFragment fragment, IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
||||
{
|
||||
var fragmentRight = fragment.Left + fragment.Width;
|
||||
|
||||
@@ -980,11 +759,7 @@ internal static class CriticalTableParserSupport
|
||||
private static bool IsBoundaryBonusLine(string text) =>
|
||||
BoundaryBonusLineRegex.IsMatch(text.Trim());
|
||||
|
||||
private static void AddLegendMatch(
|
||||
IDictionary<string, string> symbolEffects,
|
||||
string value,
|
||||
string effectCode,
|
||||
string pattern)
|
||||
private static void AddLegendMatch(IDictionary<string, string> symbolEffects, string value, string effectCode, string pattern)
|
||||
{
|
||||
foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase))
|
||||
{
|
||||
@@ -998,4 +773,4 @@ internal static class CriticalTableParserSupport
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user