Promote Void OCR source
This commit is contained in:
BIN
sources/Void OCR CleanScan v2.pdf
Normal file
BIN
sources/Void OCR CleanScan v2.pdf
Normal file
Binary file not shown.
@@ -174,7 +174,7 @@
|
|||||||
"family": "standard",
|
"family": "standard",
|
||||||
"extractionMethod": "ocr",
|
"extractionMethod": "ocr",
|
||||||
"axisTemplateSlug": "mana-standard-19",
|
"axisTemplateSlug": "mana-standard-19",
|
||||||
"pdfPath": "sources/Void.pdf",
|
"pdfPath": "sources/Void OCR CleanScan v2.pdf",
|
||||||
"enabled": true
|
"enabled": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
Binary file not shown.
@@ -0,0 +1,32 @@
|
|||||||
|
using RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
|
namespace RolemasterDb.ImportTool.Tests;
|
||||||
|
|
||||||
|
public sealed class CriticalTableParserSupportTests
|
||||||
|
{
|
||||||
|
[Theory]
|
||||||
|
[InlineData("7-70", "67-70")]
|
||||||
|
[InlineData("6-10", "06-10")]
|
||||||
|
[InlineData("1-95", "91-95")]
|
||||||
|
public void NormalizeRollBandLabel_repairs_known_ocr_missing_leading_digit_cases(string damagedLabel, string expectedLabel)
|
||||||
|
{
|
||||||
|
Assert.True(CriticalTableParserSupport.IsRollBandLabel(damagedLabel));
|
||||||
|
Assert.Equal(expectedLabel, CriticalTableParserSupport.NormalizeRollBandLabel(damagedLabel));
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FindRowLabelFragments_keeps_repaired_ocr_row_labels_in_sequence()
|
||||||
|
{
|
||||||
|
List<PositionedTextFragment> fragments =
|
||||||
|
[
|
||||||
|
new PositionedTextFragment(1, 100, 10, 20, 10, "61-65"),
|
||||||
|
new PositionedTextFragment(1, 120, 10, 20, 10, "7-70"),
|
||||||
|
new PositionedTextFragment(1, 140, 10, 20, 10, "71-75")
|
||||||
|
];
|
||||||
|
|
||||||
|
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(fragments, leftCutoff: 100, bodyStartTop: 90, keyTop: 200);
|
||||||
|
var labels = rowLabelFragments.Select(item => CriticalTableParserSupport.NormalizeRollBandLabel(item.Text)).ToList();
|
||||||
|
|
||||||
|
Assert.Equal(["61-65", "67-70", "71-75"], labels);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
using System.Xml;
|
using System.Xml;
|
||||||
using System.Xml.Linq;
|
using System.Xml.Linq;
|
||||||
|
|
||||||
using RolemasterDb.App.Domain;
|
using RolemasterDb.App.Domain;
|
||||||
using SharedParsing = RolemasterDb.CriticalParsing;
|
using SharedParsing = RolemasterDb.CriticalParsing;
|
||||||
|
|
||||||
@@ -21,34 +20,41 @@ internal static class CriticalTableParserSupport
|
|||||||
private static readonly Regex NumericAffixLineRegex = new(@"^\d+(?:H|∑|∏|π|∫|\s*[–-])", RegexOptions.Compiled);
|
private static readonly Regex NumericAffixLineRegex = new(@"^\d+(?:H|∑|∏|π|∫|\s*[–-])", RegexOptions.Compiled);
|
||||||
private static readonly Regex StandaloneModifierAffixLineRegex = new(@"^(?:\d+)?\((?:\+|-|–)\d+\)$", RegexOptions.Compiled);
|
private static readonly Regex StandaloneModifierAffixLineRegex = new(@"^(?:\d+)?\((?:\+|-|–)\d+\)$", RegexOptions.Compiled);
|
||||||
private static readonly Regex BoundaryBonusLineRegex = new(@"^(?:all allies|all foe's allies|all foes|all opponents)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
private static readonly Regex BoundaryBonusLineRegex = new(@"^(?:all allies|all foe's allies|all foes|all opponents)\b", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||||
|
private static readonly Regex RollBandLabelRegex = new(@"^\d{2,3}(?:-\d{2,3})?$|^\d{2,3}\+$", RegexOptions.Compiled);
|
||||||
|
|
||||||
|
// Left-edge OCR occasionally drops the first digit of the lower bound on standard-table row labels.
|
||||||
|
private static readonly IReadOnlyDictionary<string, string> OcrDamagedStandardRollBandLabels = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
["1-05"] = "01-05",
|
||||||
|
["6-10"] = "06-10",
|
||||||
|
["1-15"] = "11-15",
|
||||||
|
["6-20"] = "16-20",
|
||||||
|
["1-35"] = "21-35",
|
||||||
|
["6-45"] = "36-45",
|
||||||
|
["6-50"] = "46-50",
|
||||||
|
["1-55"] = "51-55",
|
||||||
|
["6-60"] = "56-60",
|
||||||
|
["1-65"] = "61-65",
|
||||||
|
["7-70"] = "67-70",
|
||||||
|
["1-75"] = "71-75",
|
||||||
|
["6-80"] = "76-80",
|
||||||
|
["1-85"] = "81-85",
|
||||||
|
["6-90"] = "86-90",
|
||||||
|
["1-95"] = "91-95"
|
||||||
|
};
|
||||||
|
|
||||||
internal static List<PositionedTextFragment> LoadFragments(string xmlContent)
|
internal static List<PositionedTextFragment> LoadFragments(string xmlContent)
|
||||||
{
|
{
|
||||||
using var stringReader = new StringReader(xmlContent);
|
using var stringReader = new StringReader(xmlContent);
|
||||||
using var xmlReader = XmlReader.Create(
|
using var xmlReader = XmlReader.Create(stringReader, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore });
|
||||||
stringReader,
|
|
||||||
new XmlReaderSettings
|
|
||||||
{
|
|
||||||
DtdProcessing = DtdProcessing.Ignore
|
|
||||||
});
|
|
||||||
|
|
||||||
var document = XDocument.Load(xmlReader);
|
var document = XDocument.Load(xmlReader);
|
||||||
|
|
||||||
var fragments = document.Descendants("page")
|
var fragments = document.Descendants("page").SelectMany(page =>
|
||||||
.SelectMany(page =>
|
|
||||||
{
|
{
|
||||||
var pageNumber = int.Parse(page.Attribute("number")?.Value ?? "1");
|
var pageNumber = int.Parse(page.Attribute("number")?.Value ?? "1");
|
||||||
return page.Elements("text")
|
return page.Elements("text").Select(item => new PositionedTextFragment(pageNumber, int.Parse(item.Attribute("top")?.Value ?? throw new InvalidOperationException("Missing text top attribute.")), int.Parse(item.Attribute("left")?.Value ?? throw new InvalidOperationException("Missing text left attribute.")), int.Parse(item.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing text width attribute.")), int.Parse(item.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing text height attribute.")), NormalizeText(string.Concat(item.DescendantNodes().OfType<XText>().Select(node => node.Value))))).Where(item => !string.IsNullOrWhiteSpace(item.Text));
|
||||||
.Select(item => new PositionedTextFragment(
|
}).ToList();
|
||||||
pageNumber,
|
|
||||||
int.Parse(item.Attribute("top")?.Value ?? throw new InvalidOperationException("Missing text top attribute.")),
|
|
||||||
int.Parse(item.Attribute("left")?.Value ?? throw new InvalidOperationException("Missing text left attribute.")),
|
|
||||||
int.Parse(item.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing text width attribute.")),
|
|
||||||
int.Parse(item.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing text height attribute.")),
|
|
||||||
NormalizeText(string.Concat(item.DescendantNodes().OfType<XText>().Select(node => node.Value)))))
|
|
||||||
.Where(item => !string.IsNullOrWhiteSpace(item.Text));
|
|
||||||
})
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
return RemoveRedundantContainedFragments(fragments);
|
return RemoveRedundantContainedFragments(fragments);
|
||||||
}
|
}
|
||||||
@@ -56,38 +62,16 @@ internal static class CriticalTableParserSupport
|
|||||||
internal static List<ParsedPdfPageGeometry> LoadPageGeometries(string xmlContent)
|
internal static List<ParsedPdfPageGeometry> LoadPageGeometries(string xmlContent)
|
||||||
{
|
{
|
||||||
using var stringReader = new StringReader(xmlContent);
|
using var stringReader = new StringReader(xmlContent);
|
||||||
using var xmlReader = XmlReader.Create(
|
using var xmlReader = XmlReader.Create(stringReader, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore });
|
||||||
stringReader,
|
|
||||||
new XmlReaderSettings
|
|
||||||
{
|
|
||||||
DtdProcessing = DtdProcessing.Ignore
|
|
||||||
});
|
|
||||||
|
|
||||||
var document = XDocument.Load(xmlReader);
|
var document = XDocument.Load(xmlReader);
|
||||||
|
|
||||||
return document.Descendants("page")
|
return document.Descendants("page").Select(page => new ParsedPdfPageGeometry(int.Parse(page.Attribute("number")?.Value ?? "1"), int.Parse(page.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing page width attribute.")), int.Parse(page.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing page height attribute.")))).ToList();
|
||||||
.Select(page => new ParsedPdfPageGeometry(
|
|
||||||
int.Parse(page.Attribute("number")?.Value ?? "1"),
|
|
||||||
int.Parse(page.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing page width attribute.")),
|
|
||||||
int.Parse(page.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing page height attribute."))))
|
|
||||||
.ToList();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static List<PositionedTextFragment> FindRowLabelFragments(
|
internal static List<PositionedTextFragment> FindRowLabelFragments(IReadOnlyList<PositionedTextFragment> fragments, int leftCutoff, int bodyStartTop, int keyTop)
|
||||||
IReadOnlyList<PositionedTextFragment> fragments,
|
|
||||||
int leftCutoff,
|
|
||||||
int bodyStartTop,
|
|
||||||
int keyTop)
|
|
||||||
{
|
{
|
||||||
var candidates = fragments
|
var candidates = fragments.Where(item => item.Left < leftCutoff && item.Top >= bodyStartTop && item.Top < keyTop - FooterLabelExclusionGap && (IsRollBandLabel(item.Text) || LooksLikeSplitRollBandStart(item.Text))).OrderBy(item => item.Top).ThenBy(item => item.Left).ToList();
|
||||||
.Where(item =>
|
|
||||||
item.Left < leftCutoff &&
|
|
||||||
item.Top >= bodyStartTop &&
|
|
||||||
item.Top < keyTop - FooterLabelExclusionGap &&
|
|
||||||
(IsRollBandLabel(item.Text) || LooksLikeSplitRollBandStart(item.Text)))
|
|
||||||
.OrderBy(item => item.Top)
|
|
||||||
.ThenBy(item => item.Left)
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
var merged = new List<PositionedTextFragment>();
|
var merged = new List<PositionedTextFragment>();
|
||||||
|
|
||||||
@@ -112,9 +96,7 @@ internal static class CriticalTableParserSupport
|
|||||||
foreach (var candidate in merged)
|
foreach (var candidate in merged)
|
||||||
{
|
{
|
||||||
var previous = deduped.LastOrDefault();
|
var previous = deduped.LastOrDefault();
|
||||||
if (previous is not null &&
|
if (previous is not null && string.Equals(NormalizeRollBandLabel(previous.Text), NormalizeRollBandLabel(candidate.Text), StringComparison.OrdinalIgnoreCase) && Math.Abs(previous.Top - candidate.Top) <= RowLabelDuplicateTolerance)
|
||||||
string.Equals(NormalizeRollBandLabel(previous.Text), NormalizeRollBandLabel(candidate.Text), StringComparison.OrdinalIgnoreCase) &&
|
|
||||||
Math.Abs(previous.Top - candidate.Top) <= RowLabelDuplicateTolerance)
|
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -126,14 +108,13 @@ internal static class CriticalTableParserSupport
|
|||||||
}
|
}
|
||||||
|
|
||||||
internal static bool IsRollBandLabel(string value) =>
|
internal static bool IsRollBandLabel(string value) =>
|
||||||
Regex.IsMatch(value.Trim(), @"^\d{2,3}(?:\s*-\s*\d{2,3})?$|^\d{2,3}\+$");
|
TryNormalizeRollBandLabel(value, out _);
|
||||||
|
|
||||||
internal static bool IsPotentialRowLabelFragment(PositionedTextFragment fragment, int leftCutoff) =>
|
internal static bool IsPotentialRowLabelFragment(PositionedTextFragment fragment, int leftCutoff) =>
|
||||||
fragment.Left < leftCutoff &&
|
fragment.Left < leftCutoff && (IsRollBandLabel(fragment.Text) || LooksLikeSplitRollBandStart(fragment.Text));
|
||||||
(IsRollBandLabel(fragment.Text) || LooksLikeSplitRollBandStart(fragment.Text));
|
|
||||||
|
|
||||||
internal static string NormalizeRollBandLabel(string label) =>
|
internal static string NormalizeRollBandLabel(string label) =>
|
||||||
Regex.Replace(CollapseWhitespace(label), @"\s*-\s*", "-");
|
TryNormalizeRollBandLabel(label, out var normalized) ? normalized : Regex.Replace(CollapseWhitespace(label), @"\s*-\s*", "-");
|
||||||
|
|
||||||
internal static ParsedCriticalRollBand CreateRollBand(string label, int sortOrder)
|
internal static ParsedCriticalRollBand CreateRollBand(string label, int sortOrder)
|
||||||
{
|
{
|
||||||
@@ -144,9 +125,7 @@ internal static class CriticalTableParserSupport
|
|||||||
}
|
}
|
||||||
|
|
||||||
var parts = normalizedLabel.Split('-', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
var parts = normalizedLabel.Split('-', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||||
return parts.Length == 1
|
return parts.Length == 1 ? new ParsedCriticalRollBand(normalizedLabel, int.Parse(parts[0]), int.Parse(parts[0]), sortOrder) : new ParsedCriticalRollBand(normalizedLabel, int.Parse(parts[0]), int.Parse(parts[1]), sortOrder);
|
||||||
? new ParsedCriticalRollBand(normalizedLabel, int.Parse(parts[0]), int.Parse(parts[0]), sortOrder)
|
|
||||||
: new ParsedCriticalRollBand(normalizedLabel, int.Parse(parts[0]), int.Parse(parts[1]), sortOrder);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static string ResolveColumn(double centerX, IReadOnlyList<(string Key, double CenterX)> columns)
|
internal static string ResolveColumn(double centerX, IReadOnlyList<(string Key, double CenterX)> columns)
|
||||||
@@ -178,12 +157,7 @@ internal static class CriticalTableParserSupport
|
|||||||
lines[^1].Add(fragment);
|
lines[^1].Add(fragment);
|
||||||
}
|
}
|
||||||
|
|
||||||
return lines
|
return lines.Select(line => new ColumnarCellLine(CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))), line.OrderBy(item => item.Left).ToList())).Where(item => !string.IsNullOrWhiteSpace(item.Text)).ToList();
|
||||||
.Select(line => new ColumnarCellLine(
|
|
||||||
CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))),
|
|
||||||
line.OrderBy(item => item.Left).ToList()))
|
|
||||||
.Where(item => !string.IsNullOrWhiteSpace(item.Text))
|
|
||||||
.ToList();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static bool IsAffixLikeLine(string line, IReadOnlySet<string> affixLegendSymbols)
|
internal static bool IsAffixLikeLine(string line, IReadOnlySet<string> affixLegendSymbols)
|
||||||
@@ -204,8 +178,7 @@ internal static class CriticalTableParserSupport
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (affixLegendSymbols.Count > 0 &&
|
if (affixLegendSymbols.Count > 0 && affixLegendSymbols.Any(symbol => value.Contains(symbol, StringComparison.Ordinal)))
|
||||||
affixLegendSymbols.Any(symbol => value.Contains(symbol, StringComparison.Ordinal)))
|
|
||||||
{
|
{
|
||||||
if (value.Any(char.IsDigit))
|
if (value.Any(char.IsDigit))
|
||||||
{
|
{
|
||||||
@@ -218,13 +191,7 @@ internal static class CriticalTableParserSupport
|
|||||||
remainder = remainder.Replace(symbol, string.Empty, StringComparison.Ordinal);
|
remainder = remainder.Replace(symbol, string.Empty, StringComparison.Ordinal);
|
||||||
}
|
}
|
||||||
|
|
||||||
remainder = remainder
|
remainder = remainder.Replace("+", string.Empty, StringComparison.Ordinal).Replace("-", string.Empty, StringComparison.Ordinal).Replace("–", string.Empty, StringComparison.Ordinal).Replace("(", string.Empty, StringComparison.Ordinal).Replace(")", string.Empty, StringComparison.Ordinal).Replace("/", string.Empty, StringComparison.Ordinal);
|
||||||
.Replace("+", string.Empty, StringComparison.Ordinal)
|
|
||||||
.Replace("-", string.Empty, StringComparison.Ordinal)
|
|
||||||
.Replace("–", string.Empty, StringComparison.Ordinal)
|
|
||||||
.Replace("(", string.Empty, StringComparison.Ordinal)
|
|
||||||
.Replace(")", string.Empty, StringComparison.Ordinal)
|
|
||||||
.Replace("/", string.Empty, StringComparison.Ordinal);
|
|
||||||
|
|
||||||
if (string.IsNullOrWhiteSpace(remainder))
|
if (string.IsNullOrWhiteSpace(remainder))
|
||||||
{
|
{
|
||||||
@@ -232,15 +199,7 @@ internal static class CriticalTableParserSupport
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return value.StartsWith("+", StringComparison.Ordinal) ||
|
return value.StartsWith("+", StringComparison.Ordinal) || value.StartsWith("\u2211", StringComparison.Ordinal) || value.StartsWith("\u220F", StringComparison.Ordinal) || value.StartsWith("\u03C0", StringComparison.Ordinal) || value.StartsWith("\u222B", StringComparison.Ordinal) || StandaloneModifierAffixLineRegex.IsMatch(value) || NumericAffixLineRegex.IsMatch(value) || value.Contains(" - ", StringComparison.Ordinal) || value.Contains(" – ", StringComparison.Ordinal);
|
||||||
value.StartsWith("\u2211", StringComparison.Ordinal) ||
|
|
||||||
value.StartsWith("\u220F", StringComparison.Ordinal) ||
|
|
||||||
value.StartsWith("\u03C0", StringComparison.Ordinal) ||
|
|
||||||
value.StartsWith("\u222B", StringComparison.Ordinal) ||
|
|
||||||
StandaloneModifierAffixLineRegex.IsMatch(value) ||
|
|
||||||
NumericAffixLineRegex.IsMatch(value) ||
|
|
||||||
value.Contains(" - ", StringComparison.Ordinal) ||
|
|
||||||
value.Contains(" – ", StringComparison.Ordinal);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static int CountLineTypeSegments(IReadOnlyList<string> lines, IReadOnlySet<string> affixLegendSymbols)
|
internal static int CountLineTypeSegments(IReadOnlyList<string> lines, IReadOnlySet<string> affixLegendSymbols)
|
||||||
@@ -274,23 +233,11 @@ internal static class CriticalTableParserSupport
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return normalized.StartsWith("with ", StringComparison.OrdinalIgnoreCase) ||
|
return normalized.StartsWith("with ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("w/ ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("without ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("if ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("while ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("until ", StringComparison.OrdinalIgnoreCase) || normalized.StartsWith("unless ", StringComparison.OrdinalIgnoreCase);
|
||||||
normalized.StartsWith("w/ ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
normalized.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
normalized.StartsWith("without ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
normalized.StartsWith("if ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
normalized.StartsWith("while ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
normalized.StartsWith("until ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
normalized.StartsWith("unless ", StringComparison.OrdinalIgnoreCase);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static string NormalizeText(string value) =>
|
internal static string NormalizeText(string value) =>
|
||||||
value
|
value.Replace('\u00a0', ' ').Replace('\r', ' ').Replace('\n', ' ').Replace('’', '\'').Trim();
|
||||||
.Replace('\u00a0', ' ')
|
|
||||||
.Replace('\r', ' ')
|
|
||||||
.Replace('\n', ' ')
|
|
||||||
.Replace('’', '\'')
|
|
||||||
.Trim();
|
|
||||||
|
|
||||||
private static List<PositionedTextFragment> RemoveRedundantContainedFragments(IReadOnlyList<PositionedTextFragment> fragments)
|
private static List<PositionedTextFragment> RemoveRedundantContainedFragments(IReadOnlyList<PositionedTextFragment> fragments)
|
||||||
{
|
{
|
||||||
@@ -298,10 +245,7 @@ internal static class CriticalTableParserSupport
|
|||||||
|
|
||||||
foreach (var group in fragments.GroupBy(item => (item.PageNumber, item.Top, item.Height)))
|
foreach (var group in fragments.GroupBy(item => (item.PageNumber, item.Top, item.Height)))
|
||||||
{
|
{
|
||||||
var ordered = group
|
var ordered = group.OrderByDescending(item => item.Width).ThenBy(item => item.Left).ToList();
|
||||||
.OrderByDescending(item => item.Width)
|
|
||||||
.ThenBy(item => item.Left)
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
for (var index = 0; index < ordered.Count; index++)
|
for (var index = 0; index < ordered.Count; index++)
|
||||||
{
|
{
|
||||||
@@ -314,9 +258,7 @@ internal static class CriticalTableParserSupport
|
|||||||
for (var candidateIndex = index + 1; candidateIndex < ordered.Count; candidateIndex++)
|
for (var candidateIndex = index + 1; candidateIndex < ordered.Count; candidateIndex++)
|
||||||
{
|
{
|
||||||
var candidate = ordered[candidateIndex];
|
var candidate = ordered[candidateIndex];
|
||||||
if (candidate.Width > container.Width ||
|
if (candidate.Width > container.Width || !container.Text.Contains(candidate.Text, StringComparison.Ordinal) || !IsHorizontallyContained(candidate, container))
|
||||||
!container.Text.Contains(candidate.Text, StringComparison.Ordinal) ||
|
|
||||||
!IsHorizontallyContained(candidate, container))
|
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -326,9 +268,7 @@ internal static class CriticalTableParserSupport
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return fragments
|
return fragments.Where(item => !redundant.Contains(item)).ToList();
|
||||||
.Where(item => !redundant.Contains(item))
|
|
||||||
.ToList();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static bool IsHorizontallyContained(PositionedTextFragment candidate, PositionedTextFragment container)
|
private static bool IsHorizontallyContained(PositionedTextFragment candidate, PositionedTextFragment container)
|
||||||
@@ -338,29 +278,19 @@ internal static class CriticalTableParserSupport
|
|||||||
var candidateRight = candidate.Left + candidate.Width;
|
var candidateRight = candidate.Left + candidate.Width;
|
||||||
var containerRight = container.Left + container.Width;
|
var containerRight = container.Left + container.Width;
|
||||||
|
|
||||||
return candidate.Left >= container.Left - containmentTolerance &&
|
return candidate.Left >= container.Left - containmentTolerance && candidateRight <= containerRight + containmentTolerance;
|
||||||
candidateRight <= containerRight + containmentTolerance;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static string? NormalizeConditionKey(string conditionText)
|
internal static string? NormalizeConditionKey(string conditionText)
|
||||||
{
|
{
|
||||||
var normalized = CollapseWhitespace(conditionText)
|
var normalized = CollapseWhitespace(conditionText).ToLowerInvariant().Replace("w/o", "without", StringComparison.Ordinal).Replace("w/", "with", StringComparison.Ordinal);
|
||||||
.ToLowerInvariant()
|
|
||||||
.Replace("w/o", "without", StringComparison.Ordinal)
|
|
||||||
.Replace("w/", "with", StringComparison.Ordinal);
|
|
||||||
normalized = Regex.Replace(normalized, @"[^a-z0-9]+", "_");
|
normalized = Regex.Replace(normalized, @"[^a-z0-9]+", "_");
|
||||||
normalized = normalized.Trim('_');
|
normalized = normalized.Trim('_');
|
||||||
return normalized.Length == 0 ? null : normalized;
|
return normalized.Length == 0 ? null : normalized;
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static int FindKeyTop(IReadOnlyList<PositionedTextFragment> fragments) =>
|
internal static int FindKeyTop(IReadOnlyList<PositionedTextFragment> fragments) =>
|
||||||
fragments
|
fragments.Where(item => string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) || item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) || item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase)).Select(item => (int?)item.Top).Min() ?? int.MaxValue;
|
||||||
.Where(item =>
|
|
||||||
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
|
|
||||||
.Select(item => (int?)item.Top)
|
|
||||||
.Min() ?? int.MaxValue;
|
|
||||||
|
|
||||||
internal static AffixLegend ParseAffixLegend(IReadOnlyList<PositionedTextFragment> fragments, int keyTop)
|
internal static AffixLegend ParseAffixLegend(IReadOnlyList<PositionedTextFragment> fragments, int keyTop)
|
||||||
{
|
{
|
||||||
@@ -369,13 +299,7 @@ internal static class CriticalTableParserSupport
|
|||||||
return AffixLegend.Empty;
|
return AffixLegend.Empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
var footerLines = GroupByTop(fragments
|
var footerLines = GroupByTop(fragments.Where(item => item.Top >= keyTop - TopGroupingTolerance).OrderBy(item => item.Top).ThenBy(item => item.Left).ToList()).Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text)))).ToList();
|
||||||
.Where(item => item.Top >= keyTop - TopGroupingTolerance)
|
|
||||||
.OrderBy(item => item.Top)
|
|
||||||
.ThenBy(item => item.Left)
|
|
||||||
.ToList())
|
|
||||||
.Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))))
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
var footerText = string.Join(' ', footerLines);
|
var footerText = string.Join(' ', footerLines);
|
||||||
var symbolEffects = new Dictionary<string, string>(StringComparer.Ordinal);
|
var symbolEffects = new Dictionary<string, string>(StringComparer.Ordinal);
|
||||||
@@ -389,22 +313,10 @@ internal static class CriticalTableParserSupport
|
|||||||
AddLegendMatch(symbolEffects, footerText, CriticalEffectCodes.BleedPerRound, @"bleed\s*=\s*(\S)");
|
AddLegendMatch(symbolEffects, footerText, CriticalEffectCodes.BleedPerRound, @"bleed\s*=\s*(\S)");
|
||||||
AddLegendMatch(symbolEffects, footerText, CriticalEffectCodes.BleedPerRound, @"(\S)\s*=\s*bleed");
|
AddLegendMatch(symbolEffects, footerText, CriticalEffectCodes.BleedPerRound, @"(\S)\s*=\s*bleed");
|
||||||
|
|
||||||
return new AffixLegend(
|
return new AffixLegend(symbolEffects, footerText.Contains("powerpoint modification", StringComparison.OrdinalIgnoreCase) ? ["P"] : [], supportsFoePenalty: footerText.Contains("foe has", StringComparison.OrdinalIgnoreCase) && footerText.Contains("penalty", StringComparison.OrdinalIgnoreCase), supportsAttackerBonus: footerText.Contains("attacker gets", StringComparison.OrdinalIgnoreCase) && footerText.Contains("next round", StringComparison.OrdinalIgnoreCase), supportsPowerPointModifier: footerText.Contains("powerpoint modification", StringComparison.OrdinalIgnoreCase));
|
||||||
symbolEffects,
|
|
||||||
footerText.Contains("powerpoint modification", StringComparison.OrdinalIgnoreCase)
|
|
||||||
? ["P"]
|
|
||||||
: [],
|
|
||||||
supportsFoePenalty: footerText.Contains("foe has", StringComparison.OrdinalIgnoreCase) &&
|
|
||||||
footerText.Contains("penalty", StringComparison.OrdinalIgnoreCase),
|
|
||||||
supportsAttackerBonus: footerText.Contains("attacker gets", StringComparison.OrdinalIgnoreCase) &&
|
|
||||||
footerText.Contains("next round", StringComparison.OrdinalIgnoreCase),
|
|
||||||
supportsPowerPointModifier: footerText.Contains("powerpoint modification", StringComparison.OrdinalIgnoreCase));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static List<PositionedTextFragment> SplitBoundaryCrossingFragments(
|
internal static List<PositionedTextFragment> SplitBoundaryCrossingFragments(IReadOnlyList<PositionedTextFragment> bodyFragments, IReadOnlyList<(string Key, double CenterX)> columnCenters, IReadOnlySet<string> affixLegendSymbols)
|
||||||
IReadOnlyList<PositionedTextFragment> bodyFragments,
|
|
||||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
|
||||||
IReadOnlySet<string> affixLegendSymbols)
|
|
||||||
{
|
{
|
||||||
var splitFragments = new List<PositionedTextFragment>(bodyFragments.Count);
|
var splitFragments = new List<PositionedTextFragment>(bodyFragments.Count);
|
||||||
|
|
||||||
@@ -416,23 +328,15 @@ internal static class CriticalTableParserSupport
|
|||||||
return splitFragments;
|
return splitFragments;
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static List<(int Top, bool IsAffixLike)> BuildBodyLines(
|
internal static List<(int Top, bool IsAffixLike)> BuildBodyLines(IReadOnlyList<PositionedTextFragment> bodyFragments, IReadOnlyList<(string Key, double CenterX)> columnCenters, IReadOnlySet<string> affixLegendSymbols)
|
||||||
IReadOnlyList<PositionedTextFragment> bodyFragments,
|
|
||||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
|
||||||
IReadOnlySet<string> affixLegendSymbols)
|
|
||||||
{
|
{
|
||||||
var bodyLines = new List<(int Top, bool IsAffixLike)>();
|
var bodyLines = new List<(int Top, bool IsAffixLike)>();
|
||||||
|
|
||||||
foreach (var lineFragments in GroupByTop(bodyFragments.OrderBy(item => item.Top).ThenBy(item => item.Left).ToList()))
|
foreach (var lineFragments in GroupByTop(bodyFragments.OrderBy(item => item.Top).ThenBy(item => item.Left).ToList()))
|
||||||
{
|
{
|
||||||
var columnTexts = lineFragments
|
var columnTexts = lineFragments.GroupBy(item => ResolveColumn(item.CenterX, columnCenters), StringComparer.OrdinalIgnoreCase).Select(group => CollapseWhitespace(string.Join(' ', group.OrderBy(item => item.Left).Select(item => item.Text)))).Where(item => !string.IsNullOrWhiteSpace(item)).ToList();
|
||||||
.GroupBy(item => ResolveColumn(item.CenterX, columnCenters), StringComparer.OrdinalIgnoreCase)
|
|
||||||
.Select(group => CollapseWhitespace(string.Join(' ', group.OrderBy(item => item.Left).Select(item => item.Text))))
|
|
||||||
.Where(item => !string.IsNullOrWhiteSpace(item))
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
var isAffixLike = columnTexts.Count > 0 &&
|
var isAffixLike = columnTexts.Count > 0 && columnTexts.All(text => IsAffixLikeLine(text, affixLegendSymbols) || IsBoundaryBonusLine(text));
|
||||||
columnTexts.All(text => IsAffixLikeLine(text, affixLegendSymbols) || IsBoundaryBonusLine(text));
|
|
||||||
|
|
||||||
bodyLines.Add((lineFragments[0].Top, isAffixLike));
|
bodyLines.Add((lineFragments[0].Top, isAffixLike));
|
||||||
}
|
}
|
||||||
@@ -447,8 +351,7 @@ internal static class CriticalTableParserSupport
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return fragment.Top >= keyTop - FooterPageNumberExclusionGap &&
|
return fragment.Top >= keyTop - FooterPageNumberExclusionGap && Regex.IsMatch(fragment.Text, @"^\d{2,3}$");
|
||||||
Regex.IsMatch(fragment.Text, @"^\d{2,3}$");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static IEnumerable<List<PositionedTextFragment>> GroupByTop(IReadOnlyList<PositionedTextFragment> fragments)
|
internal static IEnumerable<List<PositionedTextFragment>> GroupByTop(IReadOnlyList<PositionedTextFragment> fragments)
|
||||||
@@ -470,10 +373,7 @@ internal static class CriticalTableParserSupport
|
|||||||
}
|
}
|
||||||
|
|
||||||
internal static List<RowAnchor> CreateRowAnchors(IReadOnlyList<PositionedTextFragment> rowLabelFragments) =>
|
internal static List<RowAnchor> CreateRowAnchors(IReadOnlyList<PositionedTextFragment> rowLabelFragments) =>
|
||||||
rowLabelFragments
|
rowLabelFragments.OrderBy(item => item.Top).Select((item, index) => new RowAnchor(NormalizeRollBandLabel(item.Text), item.Top, index + 1)).ToList();
|
||||||
.OrderBy(item => item.Top)
|
|
||||||
.Select((item, index) => new RowAnchor(NormalizeRollBandLabel(item.Text), item.Top, index + 1))
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
internal static int ResolveBodyStartTop(int headerTop, IReadOnlyList<RowAnchor> rowAnchors)
|
internal static int ResolveBodyStartTop(int headerTop, IReadOnlyList<RowAnchor> rowAnchors)
|
||||||
{
|
{
|
||||||
@@ -482,32 +382,12 @@ internal static class CriticalTableParserSupport
|
|||||||
return headerTop + HeaderToBodyMinimumGap;
|
return headerTop + HeaderToBodyMinimumGap;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Math.Min(
|
return Math.Min(headerTop + HeaderToBodyMinimumGap, Math.Max(headerTop + HeaderToRowLabelMinimumGap, rowAnchors[0].Top - HeaderToRowLabelMinimumGap - TopGroupingTolerance));
|
||||||
headerTop + HeaderToBodyMinimumGap,
|
|
||||||
Math.Max(
|
|
||||||
headerTop + HeaderToRowLabelMinimumGap,
|
|
||||||
rowAnchors[0].Top - HeaderToRowLabelMinimumGap - TopGroupingTolerance));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static List<PositionedTextFragment> BuildBodyFragments(
|
internal static List<PositionedTextFragment> BuildBodyFragments(IReadOnlyList<PositionedTextFragment> fragments, int bodyStartTop, int keyTop, int leftCutoff, IReadOnlyList<RowAnchor> rowAnchors, IReadOnlyCollection<PositionedTextFragment> excludedFragments, IReadOnlyList<(string Key, double CenterX)> columnCenters, IReadOnlySet<string> affixLegendSymbols)
|
||||||
IReadOnlyList<PositionedTextFragment> fragments,
|
|
||||||
int bodyStartTop,
|
|
||||||
int keyTop,
|
|
||||||
int leftCutoff,
|
|
||||||
IReadOnlyList<RowAnchor> rowAnchors,
|
|
||||||
IReadOnlyCollection<PositionedTextFragment> excludedFragments,
|
|
||||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
|
||||||
IReadOnlySet<string> affixLegendSymbols)
|
|
||||||
{
|
{
|
||||||
var bodyFragments = fragments
|
var bodyFragments = fragments.Where(item => item.Top >= bodyStartTop && item.Top < keyTop - TopGroupingTolerance && !IsFooterPageNumberFragment(item, keyTop) && !IsPotentialRowLabelFragment(item, leftCutoff) && !rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) && !excludedFragments.Contains(item)).ToList();
|
||||||
.Where(item =>
|
|
||||||
item.Top >= bodyStartTop &&
|
|
||||||
item.Top < keyTop - TopGroupingTolerance &&
|
|
||||||
!IsFooterPageNumberFragment(item, keyTop) &&
|
|
||||||
!IsPotentialRowLabelFragment(item, leftCutoff) &&
|
|
||||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
|
|
||||||
!excludedFragments.Contains(item))
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
return SplitBoundaryCrossingFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
return SplitBoundaryCrossingFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
||||||
}
|
}
|
||||||
@@ -515,23 +395,14 @@ internal static class CriticalTableParserSupport
|
|||||||
internal static void RepairLeadingAffixLeakage(List<ColumnarCellEntry> cellEntries, IReadOnlySet<string> affixLegendSymbols)
|
internal static void RepairLeadingAffixLeakage(List<ColumnarCellEntry> cellEntries, IReadOnlySet<string> affixLegendSymbols)
|
||||||
{
|
{
|
||||||
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
||||||
var axes = cellEntries
|
var axes = cellEntries.Select(item => (item.GroupKey, item.ColumnKey)).Distinct().ToList();
|
||||||
.Select(item => (item.GroupKey, item.ColumnKey))
|
|
||||||
.Distinct()
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
|
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
|
||||||
{
|
{
|
||||||
foreach (var (groupKey, columnKey) in axes)
|
foreach (var (groupKey, columnKey) in axes)
|
||||||
{
|
{
|
||||||
var current = cellEntries.SingleOrDefault(item =>
|
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) && string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
||||||
item.RowIndex == rowIndex &&
|
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) && string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
||||||
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
|
|
||||||
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
|
||||||
var next = cellEntries.SingleOrDefault(item =>
|
|
||||||
item.RowIndex == rowIndex + 1 &&
|
|
||||||
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
|
|
||||||
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
|
||||||
if (current is null || next is null)
|
if (current is null || next is null)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
@@ -554,15 +425,9 @@ internal static class CriticalTableParserSupport
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static int ResolveRowBoundaryTop(
|
internal static int ResolveRowBoundaryTop(RowAnchor current, RowAnchor next, IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
||||||
RowAnchor current,
|
|
||||||
RowAnchor next,
|
|
||||||
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
|
||||||
{
|
{
|
||||||
var linesBetweenLabels = bodyLines
|
var linesBetweenLabels = bodyLines.Where(item => item.Top >= current.Top && item.Top < next.Top).OrderBy(item => item.Top).ToList();
|
||||||
.Where(item => item.Top >= current.Top && item.Top < next.Top)
|
|
||||||
.OrderBy(item => item.Top)
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
|
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
|
||||||
{
|
{
|
||||||
@@ -575,14 +440,7 @@ internal static class CriticalTableParserSupport
|
|||||||
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static void BuildParsedArtifacts(
|
internal static void BuildParsedArtifacts(IReadOnlyList<ColumnarCellEntry> cellEntries, AffixLegend affixLegend, List<ParsedCriticalCellArtifact> parsedCells, List<ParsedCriticalResult> parsedResults, List<string> validationErrors, List<string>? validationWarnings = null, bool downgradeCellContentValidationToWarnings = false)
|
||||||
IReadOnlyList<ColumnarCellEntry> cellEntries,
|
|
||||||
AffixLegend affixLegend,
|
|
||||||
List<ParsedCriticalCellArtifact> parsedCells,
|
|
||||||
List<ParsedCriticalResult> parsedResults,
|
|
||||||
List<string> validationErrors,
|
|
||||||
List<string>? validationWarnings = null,
|
|
||||||
bool downgradeCellContentValidationToWarnings = false)
|
|
||||||
{
|
{
|
||||||
var sharedLegend = ToSharedAffixLegend(affixLegend);
|
var sharedLegend = ToSharedAffixLegend(affixLegend);
|
||||||
|
|
||||||
@@ -591,8 +449,7 @@ internal static class CriticalTableParserSupport
|
|||||||
var lineTexts = cellEntry.Lines.Select(line => line.Text).ToList();
|
var lineTexts = cellEntry.Lines.Select(line => line.Text).ToList();
|
||||||
var content = SharedParsing.CriticalCellTextParser.Parse(lineTexts, sharedLegend);
|
var content = SharedParsing.CriticalCellTextParser.Parse(lineTexts, sharedLegend);
|
||||||
var sourceBounds = BuildSourceBounds(cellEntry.Lines.SelectMany(line => line.Fragments).ToList());
|
var sourceBounds = BuildSourceBounds(cellEntry.Lines.SelectMany(line => line.Fragments).ToList());
|
||||||
var contentIssues = content.ValidationErrors.Select(error =>
|
var contentIssues = content.ValidationErrors.Select(error => $"Cell '{BuildCellIdentifier(cellEntry)}': {error}");
|
||||||
$"Cell '{BuildCellIdentifier(cellEntry)}': {error}");
|
|
||||||
if (downgradeCellContentValidationToWarnings)
|
if (downgradeCellContentValidationToWarnings)
|
||||||
{
|
{
|
||||||
validationWarnings?.AddRange(contentIssues);
|
validationWarnings?.AddRange(contentIssues);
|
||||||
@@ -605,29 +462,9 @@ internal static class CriticalTableParserSupport
|
|||||||
var effects = content.Effects.Select(ToImportToolEffect).ToList();
|
var effects = content.Effects.Select(ToImportToolEffect).ToList();
|
||||||
var branches = content.Branches.Select(ToImportToolBranch).ToList();
|
var branches = content.Branches.Select(ToImportToolBranch).ToList();
|
||||||
|
|
||||||
parsedCells.Add(new ParsedCriticalCellArtifact(
|
parsedCells.Add(new ParsedCriticalCellArtifact(cellEntry.GroupKey, cellEntry.RollBandLabel, cellEntry.ColumnKey, lineTexts, content.BaseLines, content.RawCellText, content.DescriptionText, content.RawAffixText, effects, branches, sourceBounds));
|
||||||
cellEntry.GroupKey,
|
|
||||||
cellEntry.RollBandLabel,
|
|
||||||
cellEntry.ColumnKey,
|
|
||||||
lineTexts,
|
|
||||||
content.BaseLines,
|
|
||||||
content.RawCellText,
|
|
||||||
content.DescriptionText,
|
|
||||||
content.RawAffixText,
|
|
||||||
effects,
|
|
||||||
branches,
|
|
||||||
sourceBounds));
|
|
||||||
|
|
||||||
parsedResults.Add(new ParsedCriticalResult(
|
parsedResults.Add(new ParsedCriticalResult(cellEntry.GroupKey, cellEntry.ColumnKey, cellEntry.RollBandLabel, content.RawCellText, content.DescriptionText, content.RawAffixText, effects, branches, sourceBounds));
|
||||||
cellEntry.GroupKey,
|
|
||||||
cellEntry.ColumnKey,
|
|
||||||
cellEntry.RollBandLabel,
|
|
||||||
content.RawCellText,
|
|
||||||
content.DescriptionText,
|
|
||||||
content.RawAffixText,
|
|
||||||
effects,
|
|
||||||
branches,
|
|
||||||
sourceBounds));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -649,55 +486,42 @@ internal static class CriticalTableParserSupport
|
|||||||
var right = fragments.Max(fragment => fragment.Left + fragment.Width);
|
var right = fragments.Max(fragment => fragment.Left + fragment.Width);
|
||||||
var bottom = fragments.Max(fragment => fragment.Top + fragment.Height);
|
var bottom = fragments.Max(fragment => fragment.Top + fragment.Height);
|
||||||
|
|
||||||
return new ParsedCriticalSourceRect(
|
return new ParsedCriticalSourceRect(pageNumber, left, top, Math.Max(1, right - left), Math.Max(1, bottom - top));
|
||||||
pageNumber,
|
|
||||||
left,
|
|
||||||
top,
|
|
||||||
Math.Max(1, right - left),
|
|
||||||
Math.Max(1, bottom - top));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static SharedParsing.AffixLegend ToSharedAffixLegend(AffixLegend affixLegend) =>
|
private static SharedParsing.AffixLegend ToSharedAffixLegend(AffixLegend affixLegend) =>
|
||||||
new(
|
new(affixLegend.SymbolEffects, affixLegend.ClassificationSymbols.Except(affixLegend.EffectSymbols).ToList(), affixLegend.SupportsFoePenalty, affixLegend.SupportsAttackerBonus, affixLegend.SupportsPowerPointModifier);
|
||||||
affixLegend.SymbolEffects,
|
|
||||||
affixLegend.ClassificationSymbols.Except(affixLegend.EffectSymbols).ToList(),
|
|
||||||
affixLegend.SupportsFoePenalty,
|
|
||||||
affixLegend.SupportsAttackerBonus,
|
|
||||||
affixLegend.SupportsPowerPointModifier);
|
|
||||||
|
|
||||||
private static ParsedCriticalEffect ToImportToolEffect(SharedParsing.ParsedCriticalEffect effect) =>
|
private static ParsedCriticalEffect ToImportToolEffect(SharedParsing.ParsedCriticalEffect effect) =>
|
||||||
new(
|
new(effect.EffectCode, effect.Target, effect.ValueInteger, effect.ValueExpression, effect.DurationRounds, effect.PerRound, effect.Modifier, effect.BodyPart, effect.IsPermanent, effect.SourceType, effect.SourceText);
|
||||||
effect.EffectCode,
|
|
||||||
effect.Target,
|
|
||||||
effect.ValueInteger,
|
|
||||||
effect.ValueExpression,
|
|
||||||
effect.DurationRounds,
|
|
||||||
effect.PerRound,
|
|
||||||
effect.Modifier,
|
|
||||||
effect.BodyPart,
|
|
||||||
effect.IsPermanent,
|
|
||||||
effect.SourceType,
|
|
||||||
effect.SourceText);
|
|
||||||
|
|
||||||
private static ParsedCriticalBranch ToImportToolBranch(SharedParsing.ParsedCriticalBranch branch) =>
|
private static ParsedCriticalBranch ToImportToolBranch(SharedParsing.ParsedCriticalBranch branch) =>
|
||||||
new(
|
new(branch.BranchKind, branch.ConditionKey, branch.ConditionText, branch.RawText, branch.DescriptionText, branch.RawAffixText, branch.Effects.Select(ToImportToolEffect).ToList(), branch.SortOrder);
|
||||||
branch.BranchKind,
|
|
||||||
branch.ConditionKey,
|
|
||||||
branch.ConditionText,
|
|
||||||
branch.RawText,
|
|
||||||
branch.DescriptionText,
|
|
||||||
branch.RawAffixText,
|
|
||||||
branch.Effects.Select(ToImportToolEffect).ToList(),
|
|
||||||
branch.SortOrder);
|
|
||||||
|
|
||||||
private static string BuildCellIdentifier(ColumnarCellEntry cellEntry) =>
|
private static string BuildCellIdentifier(ColumnarCellEntry cellEntry) =>
|
||||||
cellEntry.GroupKey is null
|
cellEntry.GroupKey is null ? $"{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}" : $"{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}";
|
||||||
? $"{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}"
|
|
||||||
: $"{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}";
|
|
||||||
|
|
||||||
private static bool LooksLikeSplitRollBandStart(string value) =>
|
private static bool LooksLikeSplitRollBandStart(string value) =>
|
||||||
Regex.IsMatch(value.Trim(), @"^\d{2,3}\s*-$");
|
Regex.IsMatch(value.Trim(), @"^\d{2,3}\s*-$");
|
||||||
|
|
||||||
|
private static bool TryNormalizeRollBandLabel(string label, out string normalized)
|
||||||
|
{
|
||||||
|
normalized = Regex.Replace(CollapseWhitespace(label), @"\s*-\s*", "-");
|
||||||
|
|
||||||
|
if (RollBandLabelRegex.IsMatch(normalized))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OcrDamagedStandardRollBandLabels.TryGetValue(normalized, out var repaired))
|
||||||
|
{
|
||||||
|
normalized = repaired;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private static bool TryMergeSplitRollBand(IReadOnlyList<PositionedTextFragment> candidates, int index, out PositionedTextFragment mergedCandidate)
|
private static bool TryMergeSplitRollBand(IReadOnlyList<PositionedTextFragment> candidates, int index, out PositionedTextFragment mergedCandidate)
|
||||||
{
|
{
|
||||||
var current = candidates[index];
|
var current = candidates[index];
|
||||||
@@ -708,11 +532,7 @@ internal static class CriticalTableParserSupport
|
|||||||
}
|
}
|
||||||
|
|
||||||
var next = candidates[index + 1];
|
var next = candidates[index + 1];
|
||||||
if (current.PageNumber != next.PageNumber ||
|
if (current.PageNumber != next.PageNumber || !Regex.IsMatch(next.Text.Trim(), @"^\d{2,3}$") || next.Top <= current.Top || next.Top - current.Top > RowLabelDuplicateTolerance + 5 || Math.Abs(next.Left - current.Left) > 20)
|
||||||
!Regex.IsMatch(next.Text.Trim(), @"^\d{2,3}$") ||
|
|
||||||
next.Top <= current.Top ||
|
|
||||||
next.Top - current.Top > RowLabelDuplicateTolerance + 5 ||
|
|
||||||
Math.Abs(next.Left - current.Left) > 20)
|
|
||||||
{
|
{
|
||||||
mergedCandidate = null!;
|
mergedCandidate = null!;
|
||||||
return false;
|
return false;
|
||||||
@@ -722,28 +542,18 @@ internal static class CriticalTableParserSupport
|
|||||||
var mergedLabel = $"{startDigits}-{next.Text.Trim()}";
|
var mergedLabel = $"{startDigits}-{next.Text.Trim()}";
|
||||||
var right = Math.Max(current.Left + current.Width, next.Left + next.Width);
|
var right = Math.Max(current.Left + current.Width, next.Left + next.Width);
|
||||||
|
|
||||||
mergedCandidate = new PositionedTextFragment(
|
mergedCandidate = new PositionedTextFragment(current.PageNumber, current.Top, Math.Min(current.Left, next.Left), right - Math.Min(current.Left, next.Left), Math.Max(current.Height, next.Height), mergedLabel);
|
||||||
current.PageNumber,
|
|
||||||
current.Top,
|
|
||||||
Math.Min(current.Left, next.Left),
|
|
||||||
right - Math.Min(current.Left, next.Left),
|
|
||||||
Math.Max(current.Height, next.Height),
|
|
||||||
mergedLabel);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static IReadOnlyList<PositionedTextFragment> SplitBoundaryCrossingFragment(
|
private static IReadOnlyList<PositionedTextFragment> SplitBoundaryCrossingFragment(PositionedTextFragment fragment, IReadOnlyList<(string Key, double CenterX)> columnCenters, IReadOnlySet<string> affixLegendSymbols)
|
||||||
PositionedTextFragment fragment,
|
|
||||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
|
||||||
IReadOnlySet<string> affixLegendSymbols)
|
|
||||||
{
|
{
|
||||||
if (!CrossesColumnBoundary(fragment, columnCenters))
|
if (!CrossesColumnBoundary(fragment, columnCenters))
|
||||||
{
|
{
|
||||||
return [fragment];
|
return [fragment];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsAffixLikeLine(fragment.Text, affixLegendSymbols) &&
|
if (IsAffixLikeLine(fragment.Text, affixLegendSymbols) && fragment.Text.Contains(" ", StringComparison.Ordinal))
|
||||||
fragment.Text.Contains(" ", StringComparison.Ordinal))
|
|
||||||
{
|
{
|
||||||
return BuildSplitFragmentsFromMatches(fragment, MultiFragmentSplitRegex.Matches(fragment.Text), columnCenters);
|
return BuildSplitFragmentsFromMatches(fragment, MultiFragmentSplitRegex.Matches(fragment.Text), columnCenters);
|
||||||
}
|
}
|
||||||
@@ -756,10 +566,7 @@ internal static class CriticalTableParserSupport
|
|||||||
return [fragment];
|
return [fragment];
|
||||||
}
|
}
|
||||||
|
|
||||||
private static IReadOnlyList<PositionedTextFragment> BuildSplitFragmentsFromMatches(
|
private static IReadOnlyList<PositionedTextFragment> BuildSplitFragmentsFromMatches(PositionedTextFragment fragment, MatchCollection matches, IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
||||||
PositionedTextFragment fragment,
|
|
||||||
MatchCollection matches,
|
|
||||||
IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
|
||||||
{
|
{
|
||||||
if (matches.Count < 2)
|
if (matches.Count < 2)
|
||||||
{
|
{
|
||||||
@@ -780,13 +587,7 @@ internal static class CriticalTableParserSupport
|
|||||||
var segmentLeft = fragment.Left + (int)Math.Round(characterWidth * match.Index);
|
var segmentLeft = fragment.Left + (int)Math.Round(characterWidth * match.Index);
|
||||||
var segmentWidth = Math.Max(1, (int)Math.Round(characterWidth * match.Length));
|
var segmentWidth = Math.Max(1, (int)Math.Round(characterWidth * match.Length));
|
||||||
|
|
||||||
splitFragments.Add(new PositionedTextFragment(
|
splitFragments.Add(new PositionedTextFragment(fragment.PageNumber, fragment.Top, segmentLeft, segmentWidth, fragment.Height, segmentText));
|
||||||
fragment.PageNumber,
|
|
||||||
fragment.Top,
|
|
||||||
segmentLeft,
|
|
||||||
segmentWidth,
|
|
||||||
fragment.Height,
|
|
||||||
segmentText));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (splitFragments.Count < 2)
|
if (splitFragments.Count < 2)
|
||||||
@@ -795,20 +596,12 @@ internal static class CriticalTableParserSupport
|
|||||||
}
|
}
|
||||||
|
|
||||||
var originalColumn = ResolveColumn(fragment.CenterX, columnCenters);
|
var originalColumn = ResolveColumn(fragment.CenterX, columnCenters);
|
||||||
var distinctColumns = splitFragments
|
var distinctColumns = splitFragments.Select(item => ResolveColumn(item.CenterX, columnCenters)).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
|
||||||
.Select(item => ResolveColumn(item.CenterX, columnCenters))
|
|
||||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
return distinctColumns.Count > 1 || distinctColumns.Any(item => !string.Equals(item, originalColumn, StringComparison.OrdinalIgnoreCase))
|
return distinctColumns.Count > 1 || distinctColumns.Any(item => !string.Equals(item, originalColumn, StringComparison.OrdinalIgnoreCase)) ? splitFragments : [fragment];
|
||||||
? splitFragments
|
|
||||||
: [fragment];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static bool TrySplitProseFragmentAtBoundaries(
|
private static bool TrySplitProseFragmentAtBoundaries(PositionedTextFragment fragment, IReadOnlyList<(string Key, double CenterX)> columnCenters, out IReadOnlyList<PositionedTextFragment> splitFragments)
|
||||||
PositionedTextFragment fragment,
|
|
||||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
|
||||||
out IReadOnlyList<PositionedTextFragment> splitFragments)
|
|
||||||
{
|
{
|
||||||
splitFragments = null!;
|
splitFragments = null!;
|
||||||
|
|
||||||
@@ -848,9 +641,7 @@ internal static class CriticalTableParserSupport
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<int> FindBoundarySplitIndexes(
|
private static List<int> FindBoundarySplitIndexes(PositionedTextFragment fragment, IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
||||||
PositionedTextFragment fragment,
|
|
||||||
IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
|
||||||
{
|
{
|
||||||
var characterWidth = fragment.Width / (double)Math.Max(fragment.Text.Length, 1);
|
var characterWidth = fragment.Width / (double)Math.Max(fragment.Text.Length, 1);
|
||||||
var fragmentRight = fragment.Left + fragment.Width;
|
var fragmentRight = fragment.Left + fragment.Width;
|
||||||
@@ -917,11 +708,7 @@ internal static class CriticalTableParserSupport
|
|||||||
return bestIndex;
|
return bestIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static PositionedTextFragment? CreateFragmentSegment(
|
private static PositionedTextFragment? CreateFragmentSegment(PositionedTextFragment fragment, int startIndex, int length, double characterWidth)
|
||||||
PositionedTextFragment fragment,
|
|
||||||
int startIndex,
|
|
||||||
int length,
|
|
||||||
double characterWidth)
|
|
||||||
{
|
{
|
||||||
if (length <= 0)
|
if (length <= 0)
|
||||||
{
|
{
|
||||||
@@ -950,18 +737,10 @@ internal static class CriticalTableParserSupport
|
|||||||
var actualLength = trimmedEnd - trimmedStart + 1;
|
var actualLength = trimmedEnd - trimmedStart + 1;
|
||||||
var segmentText = CollapseWhitespace(fragment.Text.Substring(actualStart, actualLength));
|
var segmentText = CollapseWhitespace(fragment.Text.Substring(actualStart, actualLength));
|
||||||
|
|
||||||
return new PositionedTextFragment(
|
return new PositionedTextFragment(fragment.PageNumber, fragment.Top, fragment.Left + (int)Math.Round(characterWidth * actualStart), Math.Max(1, (int)Math.Round(characterWidth * actualLength)), fragment.Height, segmentText);
|
||||||
fragment.PageNumber,
|
|
||||||
fragment.Top,
|
|
||||||
fragment.Left + (int)Math.Round(characterWidth * actualStart),
|
|
||||||
Math.Max(1, (int)Math.Round(characterWidth * actualLength)),
|
|
||||||
fragment.Height,
|
|
||||||
segmentText);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static bool CrossesColumnBoundary(
|
private static bool CrossesColumnBoundary(PositionedTextFragment fragment, IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
||||||
PositionedTextFragment fragment,
|
|
||||||
IReadOnlyList<(string Key, double CenterX)> columnCenters)
|
|
||||||
{
|
{
|
||||||
var fragmentRight = fragment.Left + fragment.Width;
|
var fragmentRight = fragment.Left + fragment.Width;
|
||||||
|
|
||||||
@@ -980,11 +759,7 @@ internal static class CriticalTableParserSupport
|
|||||||
private static bool IsBoundaryBonusLine(string text) =>
|
private static bool IsBoundaryBonusLine(string text) =>
|
||||||
BoundaryBonusLineRegex.IsMatch(text.Trim());
|
BoundaryBonusLineRegex.IsMatch(text.Trim());
|
||||||
|
|
||||||
private static void AddLegendMatch(
|
private static void AddLegendMatch(IDictionary<string, string> symbolEffects, string value, string effectCode, string pattern)
|
||||||
IDictionary<string, string> symbolEffects,
|
|
||||||
string value,
|
|
||||||
string effectCode,
|
|
||||||
string pattern)
|
|
||||||
{
|
{
|
||||||
foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase))
|
foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase))
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user