Implement phase 4 critical table imports
This commit is contained in:
Binary file not shown.
@@ -4,7 +4,7 @@ namespace RolemasterDb.ImportTool.Tests;
|
||||
|
||||
public sealed class StandardCriticalTableParserIntegrationTests
|
||||
{
|
||||
private static readonly string[] ExpectedPhase3Slugs =
|
||||
private static readonly string[] ExpectedEnabledSlugs =
|
||||
[
|
||||
"arcane-aether",
|
||||
"arcane-nether",
|
||||
@@ -16,20 +16,25 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
"heat",
|
||||
"impact",
|
||||
"krush",
|
||||
"large_creature_magic",
|
||||
"large_creature_weapon",
|
||||
"ma-strikes",
|
||||
"ma-sweeps",
|
||||
"mana",
|
||||
"puncture",
|
||||
"slash",
|
||||
"subdual",
|
||||
"super_large_creature_weapon",
|
||||
"tiny",
|
||||
"unbalance"
|
||||
];
|
||||
|
||||
private static readonly PdfXmlExtractor Extractor = new();
|
||||
private static readonly StandardCriticalTableParser Parser = new();
|
||||
private static readonly StandardCriticalTableParser StandardParser = new();
|
||||
private static readonly VariantColumnCriticalTableParser VariantColumnParser = new();
|
||||
private static readonly GroupedVariantCriticalTableParser GroupedVariantParser = new();
|
||||
|
||||
public static IEnumerable<object[]> EnabledStandardTables() =>
|
||||
public static IEnumerable<object[]> EnabledTables() =>
|
||||
LoadManifest().Tables
|
||||
.Where(item => item.Enabled)
|
||||
.OrderBy(item => item.Slug, StringComparer.Ordinal)
|
||||
@@ -37,18 +42,22 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
|
||||
public static IEnumerable<object[]> RepresentativeCells()
|
||||
{
|
||||
yield return ["slash", "71-75", "A", "Blow falls on lower leg"];
|
||||
yield return ["puncture", "66", "C", "Strike shatters foe's knee"];
|
||||
yield return ["ballistic-shrapnel", "86-90", "E", "destroy his heart"];
|
||||
yield return ["arcane-aether", "96-99", "E", "smoking pulp"];
|
||||
yield return ["ma-strikes", "96-99", "E", "drives bone into brain"];
|
||||
yield return ["mana", "96-99", "E", "momentarily transformed"];
|
||||
yield return ["mana", "100", "E", "Mana consumes everything"];
|
||||
yield return ["tiny", "100", "E", "Vein and artery severed"];
|
||||
yield return new object[] { "slash", null!, "71-75", "A", "Blow falls on lower leg" };
|
||||
yield return new object[] { "puncture", null!, "66", "C", "Strike shatters foe's knee" };
|
||||
yield return new object[] { "ballistic-shrapnel", null!, "86-90", "E", "destroy his heart" };
|
||||
yield return new object[] { "arcane-aether", null!, "96-99", "E", "smoking pulp" };
|
||||
yield return new object[] { "ma-strikes", null!, "96-99", "E", "drives bone into brain" };
|
||||
yield return new object[] { "mana", null!, "96-99", "E", "momentarily transformed" };
|
||||
yield return new object[] { "mana", null!, "100", "E", "Mana consumes everything" };
|
||||
yield return new object[] { "tiny", null!, "100", "E", "Vein and artery severed" };
|
||||
yield return new object[] { "large_creature_weapon", null!, "01-05", "NORMAL", "Weapon shatters" };
|
||||
yield return new object[] { "super_large_creature_weapon", null!, "31-40", "SLAYING", "Boom! Solid without question" };
|
||||
yield return new object[] { "large_creature_magic", "large", "251+", "NORMAL", "Foe lowers his eyes within your reach" };
|
||||
yield return new object[] { "large_creature_magic", "super_large", "251+", "SLAYING", "Blast goes in through foe's eye" };
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Manifest_enables_the_phase_3_standard_table_set()
|
||||
public void Manifest_enables_the_phase_4_table_set()
|
||||
{
|
||||
var manifest = LoadManifest();
|
||||
var enabledTables = manifest.Tables
|
||||
@@ -56,25 +65,29 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
.OrderBy(item => item.Slug, StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
Assert.Equal(ExpectedPhase3Slugs, enabledTables.Select(item => item.Slug));
|
||||
Assert.Equal(ExpectedEnabledSlugs, enabledTables.Select(item => item.Slug));
|
||||
Assert.All(enabledTables, entry =>
|
||||
{
|
||||
Assert.Equal("standard", entry.Family);
|
||||
Assert.Equal("xml", entry.ExtractionMethod);
|
||||
Assert.True(File.Exists(Path.Combine(GetRepositoryRoot(), entry.PdfPath)), $"Missing source PDF for '{entry.Slug}'.");
|
||||
});
|
||||
|
||||
Assert.Equal("variant_column", enabledTables.Single(item => item.Slug == "large_creature_weapon").Family);
|
||||
Assert.Equal("variant_column", enabledTables.Single(item => item.Slug == "super_large_creature_weapon").Family);
|
||||
Assert.Equal("grouped_variant", enabledTables.Single(item => item.Slug == "large_creature_magic").Family);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(EnabledStandardTables))]
|
||||
public async Task Enabled_standard_tables_extract_and_parse_successfully(CriticalImportManifestEntry entry)
|
||||
[MemberData(nameof(EnabledTables))]
|
||||
public async Task Enabled_tables_extract_and_parse_successfully(CriticalImportManifestEntry entry)
|
||||
{
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var expectedGroupCount = Math.Max(parseResult.Table.Groups.Count, 1);
|
||||
|
||||
Assert.True(parseResult.ValidationReport.IsValid, string.Join(Environment.NewLine, parseResult.ValidationReport.Errors));
|
||||
Assert.Equal(5, parseResult.Table.Columns.Count);
|
||||
Assert.NotEmpty(parseResult.Table.Columns);
|
||||
Assert.NotEmpty(parseResult.Table.RollBands);
|
||||
Assert.Equal(parseResult.ValidationReport.RowCount * 5, parseResult.ValidationReport.CellCount);
|
||||
Assert.Equal(parseResult.ValidationReport.RowCount * parseResult.Table.Columns.Count * expectedGroupCount, parseResult.ValidationReport.CellCount);
|
||||
Assert.Equal(parseResult.ValidationReport.CellCount, parseResult.Table.Results.Count);
|
||||
}
|
||||
|
||||
@@ -82,6 +95,7 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
[MemberData(nameof(RepresentativeCells))]
|
||||
public async Task Representative_cells_keep_expected_descriptions(
|
||||
string slug,
|
||||
string? groupKey,
|
||||
string rollBandLabel,
|
||||
string columnKey,
|
||||
string expectedSnippet)
|
||||
@@ -89,6 +103,7 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, slug, StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var result = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
|
||||
string.Equals(item.RollBandLabel, rollBandLabel, StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
||||
|
||||
@@ -101,6 +116,7 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "slash", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var result = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "56-60", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "A", StringComparison.Ordinal));
|
||||
|
||||
@@ -113,9 +129,11 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row96E = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "96-99", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "E", StringComparison.Ordinal));
|
||||
var row100E = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "100", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "E", StringComparison.Ordinal));
|
||||
|
||||
@@ -130,6 +148,7 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row100C = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "100", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "C", StringComparison.Ordinal));
|
||||
|
||||
@@ -143,9 +162,11 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row71A = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "A", StringComparison.Ordinal));
|
||||
var row71B = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
|
||||
|
||||
@@ -159,9 +180,11 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row71D = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "D", StringComparison.Ordinal));
|
||||
var row71E = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "71-75", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "E", StringComparison.Ordinal));
|
||||
|
||||
@@ -175,9 +198,11 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row91B = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "91-95", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
|
||||
var row91C = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "91-95", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "C", StringComparison.Ordinal));
|
||||
|
||||
@@ -191,9 +216,11 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "mana", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
var row86B = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "86-90", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
|
||||
var row86C = parseResult.Table.Results.Single(item =>
|
||||
item.GroupKey is null &&
|
||||
string.Equals(item.RollBandLabel, "86-90", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "C", StringComparison.Ordinal));
|
||||
|
||||
@@ -201,7 +228,28 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
Assert.Contains("+16H - 8", row86C.RawAffixText, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private static async Task<StandardCriticalTableParseResult> LoadParseResultAsync(CriticalImportManifestEntry entry)
|
||||
[Fact]
|
||||
public async Task Grouped_magic_table_keeps_large_and_super_large_groups_distinct()
|
||||
{
|
||||
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "large_creature_magic", StringComparison.Ordinal));
|
||||
var parseResult = await LoadParseResultAsync(entry);
|
||||
|
||||
Assert.Equal(["large", "super_large"], parseResult.Table.Groups.Select(item => item.GroupKey));
|
||||
|
||||
var largeNormal = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.GroupKey, "large", StringComparison.Ordinal) &&
|
||||
string.Equals(item.RollBandLabel, "251+", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "NORMAL", StringComparison.Ordinal));
|
||||
var superSlaying = parseResult.Table.Results.Single(item =>
|
||||
string.Equals(item.GroupKey, "super_large", StringComparison.Ordinal) &&
|
||||
string.Equals(item.RollBandLabel, "251+", StringComparison.Ordinal) &&
|
||||
string.Equals(item.ColumnKey, "SLAYING", StringComparison.Ordinal));
|
||||
|
||||
Assert.DoesNotContain("Blast goes in through foe's eye", largeNormal.DescriptionText, StringComparison.OrdinalIgnoreCase);
|
||||
Assert.Contains("Blast goes in through foe's eye", superSlaying.DescriptionText, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static async Task<CriticalTableParseResult> LoadParseResultAsync(CriticalImportManifestEntry entry)
|
||||
{
|
||||
var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml");
|
||||
|
||||
@@ -211,7 +259,13 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
||||
}
|
||||
|
||||
var xmlContent = await File.ReadAllTextAsync(xmlPath);
|
||||
return Parser.Parse(entry, xmlContent);
|
||||
return entry.Family switch
|
||||
{
|
||||
"standard" => StandardParser.Parse(entry, xmlContent),
|
||||
"variant_column" => VariantColumnParser.Parse(entry, xmlContent),
|
||||
"grouped_variant" => GroupedVariantParser.Parse(entry, xmlContent),
|
||||
_ => throw new InvalidOperationException($"Unsupported manifest family '{entry.Family}'.")
|
||||
};
|
||||
}
|
||||
|
||||
private static CriticalImportManifest LoadManifest() =>
|
||||
|
||||
@@ -8,6 +8,8 @@ public sealed class CriticalImportCommandRunner
|
||||
private readonly ImportArtifactWriter artifactWriter = new();
|
||||
private readonly PdfXmlExtractor pdfXmlExtractor = new();
|
||||
private readonly StandardCriticalTableParser standardParser = new();
|
||||
private readonly VariantColumnCriticalTableParser variantColumnParser = new();
|
||||
private readonly GroupedVariantCriticalTableParser groupedVariantParser = new();
|
||||
|
||||
public async Task<int> RunAsync(ResetOptions options)
|
||||
{
|
||||
@@ -96,14 +98,24 @@ public sealed class CriticalImportCommandRunner
|
||||
?? throw new InvalidOperationException($"No enabled manifest entry was found for '{tableSlug}'.");
|
||||
}
|
||||
|
||||
private StandardCriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||
private CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||
{
|
||||
if (!string.Equals(entry.Family, "standard", StringComparison.OrdinalIgnoreCase))
|
||||
if (string.Equals(entry.Family, "standard", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
throw new InvalidOperationException($"Family '{entry.Family}' is not supported by phase 2.");
|
||||
return standardParser.Parse(entry, xmlContent);
|
||||
}
|
||||
|
||||
return standardParser.Parse(entry, xmlContent);
|
||||
if (string.Equals(entry.Family, "variant_column", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return variantColumnParser.Parse(entry, xmlContent);
|
||||
}
|
||||
|
||||
if (string.Equals(entry.Family, "grouped_variant", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return groupedVariantParser.Parse(entry, xmlContent);
|
||||
}
|
||||
|
||||
throw new InvalidOperationException($"Family '{entry.Family}' is not supported by the importer.");
|
||||
}
|
||||
|
||||
private static ImportArtifactPaths CreateArtifactPaths(string slug) =>
|
||||
|
||||
@@ -43,6 +43,15 @@ public sealed class CriticalImportLoader(string databasePath)
|
||||
Notes = table.Notes
|
||||
};
|
||||
|
||||
entity.Groups = table.Groups
|
||||
.Select(item => new CriticalGroup
|
||||
{
|
||||
GroupKey = item.GroupKey,
|
||||
Label = item.Label,
|
||||
SortOrder = item.SortOrder
|
||||
})
|
||||
.ToList();
|
||||
|
||||
entity.Columns = table.Columns
|
||||
.Select(item => new CriticalColumn
|
||||
{
|
||||
@@ -63,12 +72,14 @@ public sealed class CriticalImportLoader(string databasePath)
|
||||
})
|
||||
.ToList();
|
||||
|
||||
var groupsByKey = entity.Groups.ToDictionary(item => item.GroupKey, StringComparer.OrdinalIgnoreCase);
|
||||
var columnsByKey = entity.Columns.ToDictionary(item => item.ColumnKey, StringComparer.OrdinalIgnoreCase);
|
||||
var rollBandsByLabel = entity.RollBands.ToDictionary(item => item.Label, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
entity.Results = table.Results
|
||||
.Select(item => new CriticalResult
|
||||
{
|
||||
CriticalGroup = item.GroupKey is null ? null : groupsByKey[item.GroupKey],
|
||||
CriticalColumn = columnsByKey[item.ColumnKey],
|
||||
CriticalRollBand = rollBandsByLabel[item.RollBandLabel],
|
||||
RawCellText = item.RawCellText,
|
||||
|
||||
@@ -11,7 +11,7 @@ public sealed class ImportArtifactWriter
|
||||
WriteIndented = true
|
||||
};
|
||||
|
||||
public async Task WriteAsync(ImportArtifactPaths artifactPaths, StandardCriticalTableParseResult parseResult, CancellationToken cancellationToken = default)
|
||||
public async Task WriteAsync(ImportArtifactPaths artifactPaths, CriticalTableParseResult parseResult, CancellationToken cancellationToken = default)
|
||||
{
|
||||
Directory.CreateDirectory(artifactPaths.DirectoryPath);
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class CriticalTableParseResult(
|
||||
ParsedCriticalTable table,
|
||||
IReadOnlyList<XmlTextFragment> fragments,
|
||||
IReadOnlyList<ParsedCriticalCellArtifact> cells,
|
||||
ImportValidationReport validationReport)
|
||||
{
|
||||
public ParsedCriticalTable Table { get; } = table;
|
||||
public IReadOnlyList<XmlTextFragment> Fragments { get; } = fragments;
|
||||
public IReadOnlyList<ParsedCriticalCellArtifact> Cells { get; } = cells;
|
||||
public ImportValidationReport ValidationReport { get; } = validationReport;
|
||||
}
|
||||
@@ -0,0 +1,477 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Xml;
|
||||
using System.Xml.Linq;
|
||||
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
internal static class CriticalTableParserSupport
|
||||
{
|
||||
internal const int HeaderToBodyMinimumGap = 20;
|
||||
internal const int FooterLabelExclusionGap = 15;
|
||||
internal const int FooterPageNumberExclusionGap = 80;
|
||||
internal const int RowLabelDuplicateTolerance = 15;
|
||||
internal const int TopGroupingTolerance = 2;
|
||||
|
||||
private static readonly Regex MultiFragmentSplitRegex = new(@"\S(?:.*?\S)?(?=(?:\s{2,}|$))", RegexOptions.Compiled);
|
||||
private static readonly Regex NumericAffixLineRegex = new(@"^\d+(?:H|∑|∏|π|∫|\s*[–-])", RegexOptions.Compiled);
|
||||
private static readonly Regex StandaloneModifierAffixLineRegex = new(@"^(?:\d+)?\((?:\+|-|–)\d+\)$", RegexOptions.Compiled);
|
||||
|
||||
internal static List<XmlTextFragment> LoadFragments(string xmlContent)
|
||||
{
|
||||
using var stringReader = new StringReader(xmlContent);
|
||||
using var xmlReader = XmlReader.Create(
|
||||
stringReader,
|
||||
new XmlReaderSettings
|
||||
{
|
||||
DtdProcessing = DtdProcessing.Ignore
|
||||
});
|
||||
|
||||
var document = XDocument.Load(xmlReader);
|
||||
|
||||
return document.Descendants("page")
|
||||
.SelectMany(page =>
|
||||
{
|
||||
var pageNumber = int.Parse(page.Attribute("number")?.Value ?? "1");
|
||||
return page.Elements("text")
|
||||
.Select(item => new XmlTextFragment(
|
||||
pageNumber,
|
||||
int.Parse(item.Attribute("top")?.Value ?? throw new InvalidOperationException("Missing text top attribute.")),
|
||||
int.Parse(item.Attribute("left")?.Value ?? throw new InvalidOperationException("Missing text left attribute.")),
|
||||
int.Parse(item.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing text width attribute.")),
|
||||
int.Parse(item.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing text height attribute.")),
|
||||
NormalizeText(string.Concat(item.DescendantNodes().OfType<XText>().Select(node => node.Value)))))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item.Text));
|
||||
})
|
||||
.ToList();
|
||||
}
|
||||
|
||||
internal static List<XmlTextFragment> FindRowLabelFragments(
|
||||
IReadOnlyList<XmlTextFragment> fragments,
|
||||
int leftCutoff,
|
||||
int bodyStartTop,
|
||||
int keyTop)
|
||||
{
|
||||
var candidates = fragments
|
||||
.Where(item =>
|
||||
item.Left < leftCutoff &&
|
||||
item.Top >= bodyStartTop &&
|
||||
item.Top < keyTop - FooterLabelExclusionGap &&
|
||||
(IsRollBandLabel(item.Text) || LooksLikeSplitRollBandStart(item.Text)))
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
|
||||
var merged = new List<XmlTextFragment>();
|
||||
|
||||
for (var index = 0; index < candidates.Count; index++)
|
||||
{
|
||||
var candidate = candidates[index];
|
||||
if (TryMergeSplitRollBand(candidates, index, out var mergedCandidate))
|
||||
{
|
||||
merged.Add(mergedCandidate);
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IsRollBandLabel(candidate.Text))
|
||||
{
|
||||
merged.Add(candidate);
|
||||
}
|
||||
}
|
||||
|
||||
var deduped = new List<XmlTextFragment>();
|
||||
|
||||
foreach (var candidate in merged)
|
||||
{
|
||||
var previous = deduped.LastOrDefault();
|
||||
if (previous is not null &&
|
||||
string.Equals(NormalizeRollBandLabel(previous.Text), NormalizeRollBandLabel(candidate.Text), StringComparison.OrdinalIgnoreCase) &&
|
||||
Math.Abs(previous.Top - candidate.Top) <= RowLabelDuplicateTolerance)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
deduped.Add(candidate);
|
||||
}
|
||||
|
||||
return deduped;
|
||||
}
|
||||
|
||||
internal static bool IsRollBandLabel(string value) =>
|
||||
Regex.IsMatch(value.Trim(), @"^\d{2,3}(?:\s*-\s*\d{2,3})?$|^\d{2,3}\+$");
|
||||
|
||||
internal static bool IsPotentialRowLabelFragment(XmlTextFragment fragment, int leftCutoff) =>
|
||||
fragment.Left < leftCutoff &&
|
||||
(IsRollBandLabel(fragment.Text) || LooksLikeSplitRollBandStart(fragment.Text));
|
||||
|
||||
internal static string NormalizeRollBandLabel(string label) =>
|
||||
Regex.Replace(CollapseWhitespace(label), @"\s*-\s*", "-");
|
||||
|
||||
internal static ParsedCriticalRollBand CreateRollBand(string label, int sortOrder)
|
||||
{
|
||||
var normalizedLabel = NormalizeRollBandLabel(label);
|
||||
if (normalizedLabel.EndsWith('+'))
|
||||
{
|
||||
return new ParsedCriticalRollBand(normalizedLabel, int.Parse(normalizedLabel[..^1]), null, sortOrder);
|
||||
}
|
||||
|
||||
var parts = normalizedLabel.Split('-', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||
return parts.Length == 1
|
||||
? new ParsedCriticalRollBand(normalizedLabel, int.Parse(parts[0]), int.Parse(parts[0]), sortOrder)
|
||||
: new ParsedCriticalRollBand(normalizedLabel, int.Parse(parts[0]), int.Parse(parts[1]), sortOrder);
|
||||
}
|
||||
|
||||
internal static string ResolveColumn(double centerX, IReadOnlyList<(string Key, double CenterX)> columns)
|
||||
{
|
||||
for (var index = 0; index < columns.Count - 1; index++)
|
||||
{
|
||||
var boundary = (columns[index].CenterX + columns[index + 1].CenterX) / 2.0;
|
||||
if (centerX < boundary)
|
||||
{
|
||||
return columns[index].Key;
|
||||
}
|
||||
}
|
||||
|
||||
return columns[^1].Key;
|
||||
}
|
||||
|
||||
internal static IReadOnlyList<string> BuildLines(IReadOnlyList<XmlTextFragment> fragments)
|
||||
{
|
||||
var lines = new List<List<XmlTextFragment>>();
|
||||
|
||||
foreach (var fragment in fragments.OrderBy(item => item.Top).ThenBy(item => item.Left))
|
||||
{
|
||||
if (lines.Count == 0 || Math.Abs(lines[^1][0].Top - fragment.Top) > TopGroupingTolerance)
|
||||
{
|
||||
lines.Add([fragment]);
|
||||
continue;
|
||||
}
|
||||
|
||||
lines[^1].Add(fragment);
|
||||
}
|
||||
|
||||
return lines
|
||||
.Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
internal static bool IsAffixLikeLine(string line, ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var value = line.Trim();
|
||||
if (value.Length == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (value is "-" or "\u2013" or "\u2014")
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (value.StartsWith("with ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("without ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("if ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("while ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("until ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("unless ", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return value.Contains(':', StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
if (affixLegendSymbols.Count > 0 &&
|
||||
affixLegendSymbols.Any(symbol => value.Contains(symbol, StringComparison.Ordinal)))
|
||||
{
|
||||
if (value.Any(char.IsDigit))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
var remainder = value;
|
||||
foreach (var symbol in affixLegendSymbols.OrderByDescending(item => item.Length))
|
||||
{
|
||||
remainder = remainder.Replace(symbol, string.Empty, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
remainder = remainder
|
||||
.Replace("+", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("-", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("–", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("(", string.Empty, StringComparison.Ordinal)
|
||||
.Replace(")", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("/", string.Empty, StringComparison.Ordinal);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(remainder))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return value.StartsWith("+", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u2211", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u220F", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u03C0", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u222B", StringComparison.Ordinal) ||
|
||||
StandaloneModifierAffixLineRegex.IsMatch(value) ||
|
||||
NumericAffixLineRegex.IsMatch(value) ||
|
||||
value.Contains(" - ", StringComparison.Ordinal) ||
|
||||
value.Contains(" – ", StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
internal static int CountLineTypeSegments(IReadOnlyList<string> lines, ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var segmentCount = 0;
|
||||
bool? previousIsAffix = null;
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var currentIsAffix = IsAffixLikeLine(line, affixLegendSymbols);
|
||||
if (previousIsAffix == currentIsAffix)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
segmentCount++;
|
||||
previousIsAffix = currentIsAffix;
|
||||
}
|
||||
|
||||
return segmentCount;
|
||||
}
|
||||
|
||||
internal static string CollapseWhitespace(string value) =>
|
||||
Regex.Replace(value.Trim(), @"\s+", " ");
|
||||
|
||||
internal static string NormalizeText(string value) =>
|
||||
value
|
||||
.Replace('\u00a0', ' ')
|
||||
.Replace('\r', ' ')
|
||||
.Replace('\n', ' ')
|
||||
.Replace('’', '\'')
|
||||
.Trim();
|
||||
|
||||
internal static HashSet<string> DetectAffixLegendSymbols(IReadOnlyList<XmlTextFragment> fragments, int keyTop)
|
||||
{
|
||||
if (keyTop == int.MaxValue)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
var footerLines = GroupByTop(fragments
|
||||
.Where(item => item.Top >= keyTop - TopGroupingTolerance)
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList())
|
||||
.Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.ToList();
|
||||
|
||||
var symbols = new HashSet<string>(StringComparer.Ordinal);
|
||||
|
||||
foreach (var footerLine in footerLines)
|
||||
{
|
||||
AddLegendMatch(symbols, footerLine, @"must parry\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"no parry\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"stun(?:ned)?\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"bleed\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"powerpoint modification.*=\s*(\S)");
|
||||
}
|
||||
|
||||
return symbols;
|
||||
}
|
||||
|
||||
internal static List<XmlTextFragment> SplitBoundaryCrossingAffixFragments(
|
||||
IReadOnlyList<XmlTextFragment> bodyFragments,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var splitFragments = new List<XmlTextFragment>(bodyFragments.Count);
|
||||
|
||||
foreach (var fragment in bodyFragments)
|
||||
{
|
||||
splitFragments.AddRange(SplitBoundaryCrossingAffixFragment(fragment, columnCenters, affixLegendSymbols));
|
||||
}
|
||||
|
||||
return splitFragments;
|
||||
}
|
||||
|
||||
internal static List<(int Top, bool IsAffixLike)> BuildBodyLines(
|
||||
IReadOnlyList<XmlTextFragment> bodyFragments,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var bodyLines = new List<(int Top, bool IsAffixLike)>();
|
||||
|
||||
foreach (var lineFragments in GroupByTop(bodyFragments.OrderBy(item => item.Top).ThenBy(item => item.Left).ToList()))
|
||||
{
|
||||
var columnTexts = lineFragments
|
||||
.GroupBy(item => ResolveColumn(item.CenterX, columnCenters), StringComparer.OrdinalIgnoreCase)
|
||||
.Select(group => CollapseWhitespace(string.Join(' ', group.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item))
|
||||
.ToList();
|
||||
|
||||
var isAffixLike = columnTexts.Count > 0 &&
|
||||
columnTexts.All(text => IsAffixLikeLine(text, affixLegendSymbols));
|
||||
|
||||
bodyLines.Add((lineFragments[0].Top, isAffixLike));
|
||||
}
|
||||
|
||||
return bodyLines;
|
||||
}
|
||||
|
||||
internal static bool IsFooterPageNumberFragment(XmlTextFragment fragment, int keyTop)
|
||||
{
|
||||
if (keyTop == int.MaxValue)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return fragment.Top >= keyTop - FooterPageNumberExclusionGap &&
|
||||
Regex.IsMatch(fragment.Text, @"^\d{2,3}$");
|
||||
}
|
||||
|
||||
internal static IEnumerable<List<XmlTextFragment>> GroupByTop(IReadOnlyList<XmlTextFragment> fragments)
|
||||
{
|
||||
var groups = new List<List<XmlTextFragment>>();
|
||||
|
||||
foreach (var fragment in fragments)
|
||||
{
|
||||
if (groups.Count == 0 || Math.Abs(groups[^1][0].Top - fragment.Top) > TopGroupingTolerance)
|
||||
{
|
||||
groups.Add([fragment]);
|
||||
continue;
|
||||
}
|
||||
|
||||
groups[^1].Add(fragment);
|
||||
}
|
||||
|
||||
return groups;
|
||||
}
|
||||
|
||||
private static bool LooksLikeSplitRollBandStart(string value) =>
|
||||
Regex.IsMatch(value.Trim(), @"^\d{2,3}\s*-$");
|
||||
|
||||
private static bool TryMergeSplitRollBand(IReadOnlyList<XmlTextFragment> candidates, int index, out XmlTextFragment mergedCandidate)
|
||||
{
|
||||
var current = candidates[index];
|
||||
if (!LooksLikeSplitRollBandStart(current.Text) || index + 1 >= candidates.Count)
|
||||
{
|
||||
mergedCandidate = null!;
|
||||
return false;
|
||||
}
|
||||
|
||||
var next = candidates[index + 1];
|
||||
if (current.PageNumber != next.PageNumber ||
|
||||
!Regex.IsMatch(next.Text.Trim(), @"^\d{2,3}$") ||
|
||||
next.Top <= current.Top ||
|
||||
next.Top - current.Top > RowLabelDuplicateTolerance + 5 ||
|
||||
Math.Abs(next.Left - current.Left) > 20)
|
||||
{
|
||||
mergedCandidate = null!;
|
||||
return false;
|
||||
}
|
||||
|
||||
var startDigits = Regex.Match(current.Text, @"\d{2,3}").Value;
|
||||
var mergedLabel = $"{startDigits}-{next.Text.Trim()}";
|
||||
var right = Math.Max(current.Left + current.Width, next.Left + next.Width);
|
||||
|
||||
mergedCandidate = new XmlTextFragment(
|
||||
current.PageNumber,
|
||||
current.Top,
|
||||
Math.Min(current.Left, next.Left),
|
||||
right - Math.Min(current.Left, next.Left),
|
||||
Math.Max(current.Height, next.Height),
|
||||
mergedLabel);
|
||||
return true;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<XmlTextFragment> SplitBoundaryCrossingAffixFragment(
|
||||
XmlTextFragment fragment,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
if (!LooksLikeBoundaryCrossingAffixFragment(fragment, columnCenters, affixLegendSymbols))
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var matches = MultiFragmentSplitRegex.Matches(fragment.Text);
|
||||
if (matches.Count < 2)
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var characterWidth = fragment.Width / (double)Math.Max(fragment.Text.Length, 1);
|
||||
var splitFragments = new List<XmlTextFragment>(matches.Count);
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var segmentText = CollapseWhitespace(match.Value);
|
||||
if (segmentText.Length == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var segmentLeft = fragment.Left + (int)Math.Round(characterWidth * match.Index);
|
||||
var segmentWidth = Math.Max(1, (int)Math.Round(characterWidth * match.Length));
|
||||
|
||||
splitFragments.Add(new XmlTextFragment(
|
||||
fragment.PageNumber,
|
||||
fragment.Top,
|
||||
segmentLeft,
|
||||
segmentWidth,
|
||||
fragment.Height,
|
||||
segmentText));
|
||||
}
|
||||
|
||||
if (splitFragments.Count < 2)
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var originalColumn = ResolveColumn(fragment.CenterX, columnCenters);
|
||||
var distinctColumns = splitFragments
|
||||
.Select(item => ResolveColumn(item.CenterX, columnCenters))
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
|
||||
return distinctColumns.Count > 1 || distinctColumns.Any(item => !string.Equals(item, originalColumn, StringComparison.OrdinalIgnoreCase))
|
||||
? splitFragments
|
||||
: [fragment];
|
||||
}
|
||||
|
||||
private static bool LooksLikeBoundaryCrossingAffixFragment(
|
||||
XmlTextFragment fragment,
|
||||
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
if (!IsAffixLikeLine(fragment.Text, affixLegendSymbols) ||
|
||||
!fragment.Text.Contains(" ", StringComparison.Ordinal))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var fragmentRight = fragment.Left + fragment.Width;
|
||||
|
||||
for (var index = 0; index < columnCenters.Count - 1; index++)
|
||||
{
|
||||
var boundary = (columnCenters[index].CenterX + columnCenters[index + 1].CenterX) / 2.0;
|
||||
if (fragment.Left < boundary && fragmentRight > boundary)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void AddLegendMatch(HashSet<string> symbols, string value, string pattern)
|
||||
{
|
||||
foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase))
|
||||
{
|
||||
if (match.Groups.Count > 1)
|
||||
{
|
||||
symbols.Add(match.Groups[1].Value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,306 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class GroupedVariantCriticalTableParser
|
||||
{
|
||||
private static readonly ParsedCriticalGroup[] ExpectedGroups =
|
||||
[
|
||||
new("large", "Large Creatures", 1),
|
||||
new("super_large", "Super Large Creatures", 2)
|
||||
];
|
||||
|
||||
private static readonly ParsedCriticalColumn[] ExpectedColumns =
|
||||
[
|
||||
new("NORMAL", "Normal", "variant", 1),
|
||||
new("SLAYING", "Slaying", "variant", 2)
|
||||
];
|
||||
|
||||
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||
{
|
||||
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
||||
var groupHeaders = FindGroupHeaders(fragments);
|
||||
var columnHeaders = FindColumnHeaders(fragments);
|
||||
var validationErrors = new List<string>();
|
||||
var validationWarnings = new List<string>();
|
||||
|
||||
var combinedColumnAnchors = columnHeaders
|
||||
.OrderBy(item => item.Left)
|
||||
.Select((item, index) =>
|
||||
{
|
||||
var group = ExpectedGroups[index / ExpectedColumns.Length];
|
||||
var column = ExpectedColumns[index % ExpectedColumns.Length];
|
||||
return (group.GroupKey, column.ColumnKey, CompositeKey: $"{group.GroupKey}:{column.ColumnKey}", item.CenterX);
|
||||
})
|
||||
.ToList();
|
||||
|
||||
var bodyStartTop = Math.Max(
|
||||
groupHeaders.Max(item => item.Top),
|
||||
columnHeaders.Max(item => item.Top))
|
||||
+ CriticalTableParserSupport.HeaderToBodyMinimumGap;
|
||||
var keyTop = fragments
|
||||
.Where(item =>
|
||||
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
|
||||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
|
||||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
|
||||
.Select(item => (int?)item.Top)
|
||||
.Min() ?? int.MaxValue;
|
||||
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
|
||||
var leftCutoff = columnHeaders.Min(item => item.Left) - 10;
|
||||
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
|
||||
fragments,
|
||||
leftCutoff,
|
||||
bodyStartTop,
|
||||
keyTop);
|
||||
|
||||
var rowAnchors = rowLabelFragments
|
||||
.OrderBy(item => item.Top)
|
||||
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
|
||||
.ToList();
|
||||
|
||||
if (rowAnchors.Count == 0)
|
||||
{
|
||||
validationErrors.Add("No roll-band labels were found in the XML artifact.");
|
||||
}
|
||||
|
||||
var columnCenters = combinedColumnAnchors
|
||||
.Select(item => (item.CompositeKey, item.CenterX))
|
||||
.ToList();
|
||||
|
||||
var bodyFragments = fragments
|
||||
.Where(item =>
|
||||
item.Top >= bodyStartTop &&
|
||||
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance &&
|
||||
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) &&
|
||||
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) &&
|
||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
|
||||
!groupHeaders.Contains(item) &&
|
||||
!columnHeaders.Contains(item))
|
||||
.ToList();
|
||||
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
|
||||
var parsedRollBands = rowAnchors
|
||||
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
|
||||
.ToList();
|
||||
|
||||
var cellEntries = new List<CellEntry>();
|
||||
|
||||
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
|
||||
{
|
||||
var rowStart = rowIndex == 0
|
||||
? bodyStartTop
|
||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
|
||||
|
||||
var rowEnd = rowIndex == rowAnchors.Count - 1
|
||||
? keyTop - 1
|
||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
|
||||
|
||||
var rowFragments = bodyFragments
|
||||
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
|
||||
.ToList();
|
||||
|
||||
foreach (var anchor in combinedColumnAnchors)
|
||||
{
|
||||
var cellFragments = rowFragments
|
||||
.Where(item => CriticalTableParserSupport.ResolveColumn(item.CenterX, columnCenters) == anchor.CompositeKey)
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
|
||||
if (cellFragments.Count == 0)
|
||||
{
|
||||
validationErrors.Add($"Missing content for roll band '{rowAnchors[rowIndex].Label}', group '{anchor.GroupKey}', column '{anchor.ColumnKey}'.");
|
||||
continue;
|
||||
}
|
||||
|
||||
cellEntries.Add(new CellEntry(
|
||||
anchor.GroupKey,
|
||||
rowAnchors[rowIndex].Label,
|
||||
rowIndex,
|
||||
anchor.ColumnKey,
|
||||
CriticalTableParserSupport.BuildLines(cellFragments).ToList()));
|
||||
}
|
||||
}
|
||||
|
||||
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
|
||||
|
||||
var parsedCells = new List<ParsedCriticalCellArtifact>();
|
||||
var parsedResults = new List<ParsedCriticalResult>();
|
||||
|
||||
foreach (var cellEntry in cellEntries
|
||||
.OrderBy(item => item.RowIndex)
|
||||
.ThenBy(item => item.GroupKey, StringComparer.Ordinal)
|
||||
.ThenBy(item => item.ColumnKey, StringComparer.Ordinal))
|
||||
{
|
||||
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
|
||||
if (segmentCount > 2)
|
||||
{
|
||||
validationErrors.Add($"Cell '{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
|
||||
}
|
||||
|
||||
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
|
||||
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
|
||||
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
|
||||
|
||||
parsedCells.Add(new ParsedCriticalCellArtifact(
|
||||
cellEntry.GroupKey,
|
||||
cellEntry.RollBandLabel,
|
||||
cellEntry.ColumnKey,
|
||||
cellEntry.Lines,
|
||||
rawCellText,
|
||||
descriptionText,
|
||||
rawAffixText));
|
||||
|
||||
parsedResults.Add(new ParsedCriticalResult(
|
||||
cellEntry.GroupKey,
|
||||
cellEntry.ColumnKey,
|
||||
cellEntry.RollBandLabel,
|
||||
rawCellText,
|
||||
descriptionText,
|
||||
rawAffixText));
|
||||
}
|
||||
|
||||
var expectedCellCount = rowAnchors.Count * ExpectedGroups.Length * ExpectedColumns.Length;
|
||||
if (parsedCells.Count != expectedCellCount)
|
||||
{
|
||||
validationErrors.Add($"Expected {expectedCellCount} parsed cells but produced {parsedCells.Count}.");
|
||||
}
|
||||
|
||||
var validationReport = new ImportValidationReport(
|
||||
validationErrors.Count == 0,
|
||||
validationErrors,
|
||||
validationWarnings,
|
||||
rowAnchors.Count,
|
||||
parsedCells.Count);
|
||||
|
||||
var table = new ParsedCriticalTable(
|
||||
entry.Slug,
|
||||
entry.DisplayName,
|
||||
entry.Family,
|
||||
Path.GetFileName(entry.PdfPath),
|
||||
"Imported from PDF XML extraction.",
|
||||
ExpectedGroups,
|
||||
ExpectedColumns,
|
||||
parsedRollBands,
|
||||
parsedResults);
|
||||
|
||||
return new CriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> FindGroupHeaders(IReadOnlyList<XmlTextFragment> fragments)
|
||||
{
|
||||
var expectedLabels = ExpectedGroups.Select(item => item.Label).ToList();
|
||||
var headerCandidates = fragments
|
||||
.Where(item => expectedLabels.Contains(item.Text.Trim(), StringComparer.OrdinalIgnoreCase))
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
|
||||
foreach (var group in CriticalTableParserSupport.GroupByTop(headerCandidates))
|
||||
{
|
||||
var ordered = group.OrderBy(item => item.Left).ToList();
|
||||
var labels = ordered.Select(item => item.Text.Trim()).ToList();
|
||||
if (labels.SequenceEqual(expectedLabels, StringComparer.OrdinalIgnoreCase))
|
||||
{
|
||||
return ordered;
|
||||
}
|
||||
}
|
||||
|
||||
throw new InvalidOperationException("Could not find the grouped-variant section headers in the XML artifact.");
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> FindColumnHeaders(IReadOnlyList<XmlTextFragment> fragments)
|
||||
{
|
||||
var expectedLabels = new[] { "normal", "slaying", "normal", "slaying" };
|
||||
var headerCandidates = fragments
|
||||
.Where(item =>
|
||||
{
|
||||
var normalized = item.Text.Trim().ToLowerInvariant();
|
||||
return normalized is "normal" or "slaying";
|
||||
})
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
|
||||
foreach (var group in CriticalTableParserSupport.GroupByTop(headerCandidates))
|
||||
{
|
||||
var ordered = group.OrderBy(item => item.Left).ToList();
|
||||
var labels = ordered.Select(item => item.Text.Trim().ToLowerInvariant()).ToList();
|
||||
if (labels.SequenceEqual(expectedLabels))
|
||||
{
|
||||
return ordered;
|
||||
}
|
||||
}
|
||||
|
||||
throw new InvalidOperationException("Could not find the grouped-variant column header row in the XML artifact.");
|
||||
}
|
||||
|
||||
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
||||
var axes = cellEntries
|
||||
.Select(item => (item.GroupKey, item.ColumnKey))
|
||||
.Distinct()
|
||||
.ToList();
|
||||
|
||||
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
|
||||
{
|
||||
foreach (var (groupKey, columnKey) in axes)
|
||||
{
|
||||
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.GroupKey == groupKey && item.ColumnKey == columnKey);
|
||||
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.GroupKey == groupKey && item.ColumnKey == columnKey);
|
||||
if (current is null || next is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var leadingAffixCount = 0;
|
||||
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
||||
{
|
||||
leadingAffixCount++;
|
||||
}
|
||||
|
||||
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
|
||||
next.Lines.RemoveRange(0, leadingAffixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int ResolveRowBoundaryTop(
|
||||
RowAnchor current,
|
||||
RowAnchor next,
|
||||
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
||||
{
|
||||
var linesBetweenLabels = bodyLines
|
||||
.Where(item => item.Top >= current.Top && item.Top < next.Top)
|
||||
.OrderBy(item => item.Top)
|
||||
.ToList();
|
||||
|
||||
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
|
||||
{
|
||||
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
|
||||
{
|
||||
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
||||
}
|
||||
|
||||
private sealed record RowAnchor(string Label, int Top, int SortOrder);
|
||||
|
||||
private sealed class CellEntry(string groupKey, string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
|
||||
{
|
||||
public string GroupKey { get; } = groupKey;
|
||||
public string RollBandLabel { get; } = rollBandLabel;
|
||||
public int RowIndex { get; } = rowIndex;
|
||||
public string ColumnKey { get; } = columnKey;
|
||||
public List<string> Lines { get; } = lines;
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class ParsedCriticalCellArtifact(
|
||||
string? groupKey,
|
||||
string rollBandLabel,
|
||||
string columnKey,
|
||||
IReadOnlyList<string> lines,
|
||||
@@ -8,6 +9,7 @@ public sealed class ParsedCriticalCellArtifact(
|
||||
string descriptionText,
|
||||
string? rawAffixText)
|
||||
{
|
||||
public string? GroupKey { get; } = groupKey;
|
||||
public string RollBandLabel { get; } = rollBandLabel;
|
||||
public string ColumnKey { get; } = columnKey;
|
||||
public IReadOnlyList<string> Lines { get; } = lines;
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class ParsedCriticalGroup(string groupKey, string label, int sortOrder)
|
||||
{
|
||||
public string GroupKey { get; } = groupKey;
|
||||
public string Label { get; } = label;
|
||||
public int SortOrder { get; } = sortOrder;
|
||||
}
|
||||
@@ -1,12 +1,14 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class ParsedCriticalResult(
|
||||
string? groupKey,
|
||||
string columnKey,
|
||||
string rollBandLabel,
|
||||
string rawCellText,
|
||||
string descriptionText,
|
||||
string? rawAffixText)
|
||||
{
|
||||
public string? GroupKey { get; } = groupKey;
|
||||
public string ColumnKey { get; } = columnKey;
|
||||
public string RollBandLabel { get; } = rollBandLabel;
|
||||
public string RawCellText { get; } = rawCellText;
|
||||
|
||||
@@ -6,6 +6,7 @@ public sealed class ParsedCriticalTable(
|
||||
string family,
|
||||
string sourceDocument,
|
||||
string? notes,
|
||||
IReadOnlyList<ParsedCriticalGroup> groups,
|
||||
IReadOnlyList<ParsedCriticalColumn> columns,
|
||||
IReadOnlyList<ParsedCriticalRollBand> rollBands,
|
||||
IReadOnlyList<ParsedCriticalResult> results)
|
||||
@@ -15,6 +16,7 @@ public sealed class ParsedCriticalTable(
|
||||
public string Family { get; } = family;
|
||||
public string SourceDocument { get; } = sourceDocument;
|
||||
public string? Notes { get; } = notes;
|
||||
public IReadOnlyList<ParsedCriticalGroup> Groups { get; } = groups;
|
||||
public IReadOnlyList<ParsedCriticalColumn> Columns { get; } = columns;
|
||||
public IReadOnlyList<ParsedCriticalRollBand> RollBands { get; } = rollBands;
|
||||
public IReadOnlyList<ParsedCriticalResult> Results { get; } = results;
|
||||
|
||||
@@ -1,33 +1,20 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Xml;
|
||||
using System.Xml.Linq;
|
||||
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class StandardCriticalTableParser
|
||||
{
|
||||
private const int HeaderToBodyMinimumGap = 20;
|
||||
private const int FooterLabelExclusionGap = 15;
|
||||
private const int FooterPageNumberExclusionGap = 80;
|
||||
private const int RowLabelDuplicateTolerance = 15;
|
||||
private const int TopGroupingTolerance = 2;
|
||||
private static readonly Regex MultiFragmentSplitRegex = new(@"\S(?:.*?\S)?(?=(?:\s{2,}|$))", RegexOptions.Compiled);
|
||||
private static readonly Regex NumericAffixLineRegex = new(@"^\d+(?:H|∑|∏|π|∫|\s*[–-])", RegexOptions.Compiled);
|
||||
private static readonly Regex StandaloneModifierAffixLineRegex = new(@"^(?:\d+)?\((?:\+|-)\d+\)$", RegexOptions.Compiled);
|
||||
|
||||
public StandardCriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||
{
|
||||
var fragments = LoadFragments(xmlContent);
|
||||
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
||||
var headerFragments = FindHeaderFragments(fragments);
|
||||
var validationErrors = new List<string>();
|
||||
var validationWarnings = new List<string>();
|
||||
|
||||
var columnCenters = headerFragments
|
||||
.OrderBy(item => item.Left)
|
||||
.Select(item => new ColumnAnchor(item.Text.ToUpperInvariant(), item.CenterX))
|
||||
.Select(item => (Key: item.Text.ToUpperInvariant(), CenterX: item.CenterX))
|
||||
.ToList();
|
||||
|
||||
var bodyStartTop = headerFragments.Max(item => item.Top) + HeaderToBodyMinimumGap;
|
||||
var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap;
|
||||
var keyTop = fragments
|
||||
.Where(item =>
|
||||
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
|
||||
@@ -35,12 +22,17 @@ public sealed class StandardCriticalTableParser
|
||||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
|
||||
.Select(item => (int?)item.Top)
|
||||
.Min() ?? int.MaxValue;
|
||||
var affixLegendSymbols = DetectAffixLegendSymbols(fragments, keyTop);
|
||||
var rowLabelFragments = FindRowLabelFragments(fragments, headerFragments, keyTop);
|
||||
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
|
||||
var leftCutoff = headerFragments.Min(item => item.Left) - 10;
|
||||
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
|
||||
fragments,
|
||||
leftCutoff,
|
||||
bodyStartTop,
|
||||
keyTop);
|
||||
|
||||
var rowAnchors = rowLabelFragments
|
||||
.OrderBy(item => item.Top)
|
||||
.Select((item, index) => new RowAnchor(item.Text, item.Top, index + 1))
|
||||
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
|
||||
.ToList();
|
||||
|
||||
if (rowAnchors.Count == 0)
|
||||
@@ -51,16 +43,17 @@ public sealed class StandardCriticalTableParser
|
||||
var bodyFragments = fragments
|
||||
.Where(item =>
|
||||
item.Top >= bodyStartTop &&
|
||||
item.Top < keyTop - TopGroupingTolerance &&
|
||||
!IsFooterPageNumberFragment(item, keyTop) &&
|
||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, item.Text, StringComparison.OrdinalIgnoreCase)) &&
|
||||
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance &&
|
||||
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) &&
|
||||
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) &&
|
||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
|
||||
!headerFragments.Contains(item))
|
||||
.ToList();
|
||||
bodyFragments = SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
var bodyLines = BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
|
||||
var parsedRollBands = rowAnchors
|
||||
.Select(anchor => CreateRollBand(anchor.Label, anchor.SortOrder))
|
||||
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
|
||||
.ToList();
|
||||
|
||||
var cellEntries = new List<CellEntry>();
|
||||
@@ -82,7 +75,7 @@ public sealed class StandardCriticalTableParser
|
||||
foreach (var columnAnchor in columnCenters)
|
||||
{
|
||||
var cellFragments = rowFragments
|
||||
.Where(item => ResolveColumn(item.CenterX, columnCenters) == columnAnchor.Key)
|
||||
.Where(item => CriticalTableParserSupport.ResolveColumn(item.CenterX, columnCenters) == columnAnchor.Key)
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
@@ -97,7 +90,7 @@ public sealed class StandardCriticalTableParser
|
||||
rowAnchors[rowIndex].Label,
|
||||
rowIndex,
|
||||
columnAnchor.Key,
|
||||
BuildLines(cellFragments).ToList()));
|
||||
CriticalTableParserSupport.BuildLines(cellFragments).ToList()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,7 +101,7 @@ public sealed class StandardCriticalTableParser
|
||||
|
||||
foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey))
|
||||
{
|
||||
var segmentCount = CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
|
||||
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
|
||||
|
||||
if (segmentCount > 2)
|
||||
{
|
||||
@@ -116,13 +109,14 @@ public sealed class StandardCriticalTableParser
|
||||
$"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
|
||||
}
|
||||
|
||||
var rawAffixLines = cellEntry.Lines.Where(line => IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||
var descriptionLines = cellEntry.Lines.Where(line => !IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
|
||||
var descriptionText = CollapseWhitespace(string.Join(' ', descriptionLines));
|
||||
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
|
||||
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
|
||||
|
||||
parsedCells.Add(new ParsedCriticalCellArtifact(
|
||||
null,
|
||||
cellEntry.RollBandLabel,
|
||||
cellEntry.ColumnKey,
|
||||
cellEntry.Lines,
|
||||
@@ -131,6 +125,7 @@ public sealed class StandardCriticalTableParser
|
||||
rawAffixText));
|
||||
|
||||
parsedResults.Add(new ParsedCriticalResult(
|
||||
null,
|
||||
cellEntry.ColumnKey,
|
||||
cellEntry.RollBandLabel,
|
||||
rawCellText,
|
||||
@@ -162,40 +157,12 @@ public sealed class StandardCriticalTableParser
|
||||
entry.Family,
|
||||
Path.GetFileName(entry.PdfPath),
|
||||
"Imported from PDF XML extraction.",
|
||||
[],
|
||||
columnCenters.Select((item, index) => new ParsedCriticalColumn(item.Key, item.Key, "severity", index + 1)).ToList(),
|
||||
parsedRollBands,
|
||||
parsedResults);
|
||||
|
||||
return new StandardCriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> LoadFragments(string xmlContent)
|
||||
{
|
||||
using var stringReader = new StringReader(xmlContent);
|
||||
using var xmlReader = XmlReader.Create(
|
||||
stringReader,
|
||||
new XmlReaderSettings
|
||||
{
|
||||
DtdProcessing = DtdProcessing.Ignore
|
||||
});
|
||||
|
||||
var document = XDocument.Load(xmlReader);
|
||||
|
||||
return document.Descendants("page")
|
||||
.SelectMany(page =>
|
||||
{
|
||||
var pageNumber = int.Parse(page.Attribute("number")?.Value ?? "1");
|
||||
return page.Elements("text")
|
||||
.Select(item => new XmlTextFragment(
|
||||
pageNumber,
|
||||
int.Parse(item.Attribute("top")?.Value ?? throw new InvalidOperationException("Missing text top attribute.")),
|
||||
int.Parse(item.Attribute("left")?.Value ?? throw new InvalidOperationException("Missing text left attribute.")),
|
||||
int.Parse(item.Attribute("width")?.Value ?? throw new InvalidOperationException("Missing text width attribute.")),
|
||||
int.Parse(item.Attribute("height")?.Value ?? throw new InvalidOperationException("Missing text height attribute.")),
|
||||
NormalizeText(string.Concat(item.DescendantNodes().OfType<XText>().Select(node => node.Value)))))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item.Text));
|
||||
})
|
||||
.ToList();
|
||||
return new CriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> FindHeaderFragments(IReadOnlyList<XmlTextFragment> fragments)
|
||||
@@ -206,7 +173,7 @@ public sealed class StandardCriticalTableParser
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
|
||||
foreach (var group in GroupByTop(headerCandidates))
|
||||
foreach (var group in CriticalTableParserSupport.GroupByTop(headerCandidates))
|
||||
{
|
||||
var ordered = group.OrderBy(item => item.Left).ToList();
|
||||
var labels = ordered.Select(item => item.Text.ToUpperInvariant()).ToList();
|
||||
@@ -219,156 +186,6 @@ public sealed class StandardCriticalTableParser
|
||||
throw new InvalidOperationException("Could not find the standard-table A-E header row in the XML artifact.");
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> FindRowLabelFragments(
|
||||
IReadOnlyList<XmlTextFragment> fragments,
|
||||
IReadOnlyList<XmlTextFragment> headerFragments,
|
||||
int keyTop)
|
||||
{
|
||||
var leftCutoff = headerFragments.Min(item => item.Left) - 10;
|
||||
var bodyStartTop = headerFragments.Max(item => item.Top) + HeaderToBodyMinimumGap;
|
||||
|
||||
var candidates = fragments
|
||||
.Where(item =>
|
||||
item.Left < leftCutoff &&
|
||||
item.Top >= bodyStartTop &&
|
||||
item.Top < keyTop - FooterLabelExclusionGap &&
|
||||
IsRollBandLabel(item.Text))
|
||||
.OrderBy(item => item.Top)
|
||||
.ToList();
|
||||
|
||||
var deduped = new List<XmlTextFragment>();
|
||||
|
||||
foreach (var candidate in candidates)
|
||||
{
|
||||
var previous = deduped.LastOrDefault();
|
||||
if (previous is not null &&
|
||||
string.Equals(previous.Text, candidate.Text, StringComparison.OrdinalIgnoreCase) &&
|
||||
Math.Abs(previous.Top - candidate.Top) <= RowLabelDuplicateTolerance)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
deduped.Add(candidate);
|
||||
}
|
||||
|
||||
return deduped;
|
||||
}
|
||||
|
||||
private static bool IsRollBandLabel(string value) =>
|
||||
Regex.IsMatch(value.Trim(), @"^\d{2,3}(?:-\d{2,3})?$|^\d{2,3}\+$");
|
||||
|
||||
private static ParsedCriticalRollBand CreateRollBand(string label, int sortOrder)
|
||||
{
|
||||
if (label.EndsWith('+'))
|
||||
{
|
||||
return new ParsedCriticalRollBand(label, int.Parse(label[..^1]), null, sortOrder);
|
||||
}
|
||||
|
||||
var parts = label.Split('-', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||
return parts.Length == 1
|
||||
? new ParsedCriticalRollBand(label, int.Parse(parts[0]), int.Parse(parts[0]), sortOrder)
|
||||
: new ParsedCriticalRollBand(label, int.Parse(parts[0]), int.Parse(parts[1]), sortOrder);
|
||||
}
|
||||
|
||||
private static string ResolveColumn(double centerX, IReadOnlyList<ColumnAnchor> columns)
|
||||
{
|
||||
for (var index = 0; index < columns.Count - 1; index++)
|
||||
{
|
||||
var boundary = (columns[index].CenterX + columns[index + 1].CenterX) / 2.0;
|
||||
if (centerX < boundary)
|
||||
{
|
||||
return columns[index].Key;
|
||||
}
|
||||
}
|
||||
|
||||
return columns[^1].Key;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> BuildLines(IReadOnlyList<XmlTextFragment> fragments)
|
||||
{
|
||||
var lines = new List<List<XmlTextFragment>>();
|
||||
|
||||
foreach (var fragment in fragments.OrderBy(item => item.Top).ThenBy(item => item.Left))
|
||||
{
|
||||
if (lines.Count == 0 || Math.Abs(lines[^1][0].Top - fragment.Top) > TopGroupingTolerance)
|
||||
{
|
||||
lines.Add([fragment]);
|
||||
continue;
|
||||
}
|
||||
|
||||
lines[^1].Add(fragment);
|
||||
}
|
||||
|
||||
return lines
|
||||
.Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static bool IsAffixLikeLine(string line, ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var value = line.Trim();
|
||||
if (value.Length == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (value == "-" || value == "\u2013" || value == "\u2014")
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (value.StartsWith("with ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("without ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("if ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("while ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("until ", StringComparison.OrdinalIgnoreCase) ||
|
||||
value.StartsWith("unless ", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return value.Contains(':', StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
if (affixLegendSymbols.Count > 0 &&
|
||||
affixLegendSymbols.Any(symbol => value.Contains(symbol, StringComparison.Ordinal)))
|
||||
{
|
||||
if (value.Any(char.IsDigit))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
var remainder = value;
|
||||
foreach (var symbol in affixLegendSymbols.OrderByDescending(item => item.Length))
|
||||
{
|
||||
remainder = remainder.Replace(symbol, string.Empty, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
remainder = remainder
|
||||
.Replace("+", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("-", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("(", string.Empty, StringComparison.Ordinal)
|
||||
.Replace(")", string.Empty, StringComparison.Ordinal)
|
||||
.Replace("/", string.Empty, StringComparison.Ordinal);
|
||||
|
||||
if (string.IsNullOrWhiteSpace(remainder))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return value.StartsWith("+", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u2211", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u220F", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u03C0", StringComparison.Ordinal) ||
|
||||
value.StartsWith("\u222B", StringComparison.Ordinal) ||
|
||||
StandaloneModifierAffixLineRegex.IsMatch(value) ||
|
||||
NumericAffixLineRegex.IsMatch(value) ||
|
||||
value.Contains(" - ", StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries)
|
||||
=> RepairLeadingAffixLeakage(cellEntries, new HashSet<string>(StringComparer.Ordinal));
|
||||
|
||||
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
||||
@@ -380,14 +197,13 @@ public sealed class StandardCriticalTableParser
|
||||
{
|
||||
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.ColumnKey == columnKey);
|
||||
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.ColumnKey == columnKey);
|
||||
|
||||
if (current is null || next is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var leadingAffixCount = 0;
|
||||
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
||||
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
||||
{
|
||||
leadingAffixCount++;
|
||||
}
|
||||
@@ -403,199 +219,10 @@ public sealed class StandardCriticalTableParser
|
||||
}
|
||||
}
|
||||
|
||||
private static string CollapseWhitespace(string value) =>
|
||||
Regex.Replace(value.Trim(), @"\s+", " ");
|
||||
|
||||
private static string NormalizeText(string value) =>
|
||||
value
|
||||
.Replace('\u00a0', ' ')
|
||||
.Replace('\r', ' ')
|
||||
.Replace('\n', ' ')
|
||||
.Trim();
|
||||
|
||||
private static int CountLineTypeSegments(IReadOnlyList<string> lines, ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var segmentCount = 0;
|
||||
bool? previousIsAffix = null;
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var currentIsAffix = IsAffixLikeLine(line, affixLegendSymbols);
|
||||
if (previousIsAffix == currentIsAffix)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
segmentCount++;
|
||||
previousIsAffix = currentIsAffix;
|
||||
}
|
||||
|
||||
return segmentCount;
|
||||
}
|
||||
|
||||
private static HashSet<string> DetectAffixLegendSymbols(IReadOnlyList<XmlTextFragment> fragments, int keyTop)
|
||||
{
|
||||
if (keyTop == int.MaxValue)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
var footerLines = GroupByTop(fragments
|
||||
.Where(item => item.Top >= keyTop - TopGroupingTolerance)
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList())
|
||||
.Select(line => CollapseWhitespace(string.Join(' ', line.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.ToList();
|
||||
|
||||
var symbols = new HashSet<string>(StringComparer.Ordinal);
|
||||
|
||||
foreach (var footerLine in footerLines)
|
||||
{
|
||||
AddLegendMatch(symbols, footerLine, @"must parry\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"no parry\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"stun(?:ned)?\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"bleed\s*=\s*(\S)");
|
||||
AddLegendMatch(symbols, footerLine, @"powerpoint modification.*=\s*(\S)");
|
||||
}
|
||||
|
||||
return symbols;
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> SplitBoundaryCrossingAffixFragments(
|
||||
IReadOnlyList<XmlTextFragment> bodyFragments,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var splitFragments = new List<XmlTextFragment>(bodyFragments.Count);
|
||||
|
||||
foreach (var fragment in bodyFragments)
|
||||
{
|
||||
splitFragments.AddRange(SplitBoundaryCrossingAffixFragment(fragment, columnCenters, affixLegendSymbols));
|
||||
}
|
||||
|
||||
return splitFragments;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<XmlTextFragment> SplitBoundaryCrossingAffixFragment(
|
||||
XmlTextFragment fragment,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
if (!LooksLikeBoundaryCrossingAffixFragment(fragment, columnCenters, affixLegendSymbols))
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var matches = MultiFragmentSplitRegex.Matches(fragment.Text);
|
||||
if (matches.Count < 2)
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var characterWidth = fragment.Width / (double)Math.Max(fragment.Text.Length, 1);
|
||||
var splitFragments = new List<XmlTextFragment>(matches.Count);
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var segmentText = CollapseWhitespace(match.Value);
|
||||
if (segmentText.Length == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var segmentLeft = fragment.Left + (int)Math.Round(characterWidth * match.Index);
|
||||
var segmentWidth = Math.Max(1, (int)Math.Round(characterWidth * match.Length));
|
||||
|
||||
splitFragments.Add(new XmlTextFragment(
|
||||
fragment.PageNumber,
|
||||
fragment.Top,
|
||||
segmentLeft,
|
||||
segmentWidth,
|
||||
fragment.Height,
|
||||
segmentText));
|
||||
}
|
||||
|
||||
if (splitFragments.Count < 2)
|
||||
{
|
||||
return [fragment];
|
||||
}
|
||||
|
||||
var originalColumn = ResolveColumn(fragment.CenterX, columnCenters);
|
||||
var distinctColumns = splitFragments
|
||||
.Select(item => ResolveColumn(item.CenterX, columnCenters))
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
|
||||
return distinctColumns.Count > 1 || distinctColumns.Any(item => !string.Equals(item, originalColumn, StringComparison.OrdinalIgnoreCase))
|
||||
? splitFragments
|
||||
: [fragment];
|
||||
}
|
||||
|
||||
private static bool LooksLikeBoundaryCrossingAffixFragment(
|
||||
XmlTextFragment fragment,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
if (!IsAffixLikeLine(fragment.Text, affixLegendSymbols) ||
|
||||
!fragment.Text.Contains(" ", StringComparison.Ordinal))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var fragmentRight = fragment.Left + fragment.Width;
|
||||
|
||||
for (var index = 0; index < columnCenters.Count - 1; index++)
|
||||
{
|
||||
var boundary = (columnCenters[index].CenterX + columnCenters[index + 1].CenterX) / 2.0;
|
||||
if (fragment.Left < boundary && fragmentRight > boundary)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void AddLegendMatch(HashSet<string> symbols, string value, string pattern)
|
||||
{
|
||||
foreach (Match match in Regex.Matches(value, pattern, RegexOptions.IgnoreCase))
|
||||
{
|
||||
if (match.Groups.Count > 1)
|
||||
{
|
||||
symbols.Add(match.Groups[1].Value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<BodyLine> BuildBodyLines(
|
||||
IReadOnlyList<XmlTextFragment> bodyFragments,
|
||||
IReadOnlyList<ColumnAnchor> columnCenters,
|
||||
ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var bodyLines = new List<BodyLine>();
|
||||
|
||||
foreach (var lineFragments in GroupByTop(bodyFragments.OrderBy(item => item.Top).ThenBy(item => item.Left).ToList()))
|
||||
{
|
||||
var columnTexts = lineFragments
|
||||
.GroupBy(item => ResolveColumn(item.CenterX, columnCenters), StringComparer.OrdinalIgnoreCase)
|
||||
.Select(group => CollapseWhitespace(string.Join(' ', group.OrderBy(item => item.Left).Select(item => item.Text))))
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item))
|
||||
.ToList();
|
||||
|
||||
var isAffixLike = columnTexts.Count > 0 &&
|
||||
columnTexts.All(text => IsAffixLikeLine(text, affixLegendSymbols));
|
||||
|
||||
bodyLines.Add(new BodyLine(lineFragments[0].Top, isAffixLike));
|
||||
}
|
||||
|
||||
return bodyLines;
|
||||
}
|
||||
|
||||
private static int ResolveRowBoundaryTop(
|
||||
RowAnchor current,
|
||||
RowAnchor next,
|
||||
IReadOnlyList<BodyLine> bodyLines)
|
||||
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
||||
{
|
||||
var linesBetweenLabels = bodyLines
|
||||
.Where(item => item.Top >= current.Top && item.Top < next.Top)
|
||||
@@ -613,41 +240,8 @@ public sealed class StandardCriticalTableParser
|
||||
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
||||
}
|
||||
|
||||
private static bool IsFooterPageNumberFragment(XmlTextFragment fragment, int keyTop)
|
||||
{
|
||||
if (keyTop == int.MaxValue)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return fragment.Top >= keyTop - FooterPageNumberExclusionGap &&
|
||||
Regex.IsMatch(fragment.Text, @"^\d{2,3}$");
|
||||
}
|
||||
|
||||
private static IEnumerable<List<XmlTextFragment>> GroupByTop(IReadOnlyList<XmlTextFragment> fragments)
|
||||
{
|
||||
var groups = new List<List<XmlTextFragment>>();
|
||||
|
||||
foreach (var fragment in fragments)
|
||||
{
|
||||
if (groups.Count == 0 || Math.Abs(groups[^1][0].Top - fragment.Top) > TopGroupingTolerance)
|
||||
{
|
||||
groups.Add([fragment]);
|
||||
continue;
|
||||
}
|
||||
|
||||
groups[^1].Add(fragment);
|
||||
}
|
||||
|
||||
return groups;
|
||||
}
|
||||
|
||||
private sealed record ColumnAnchor(string Key, double CenterX);
|
||||
|
||||
private sealed record RowAnchor(string Label, int Top, int SortOrder);
|
||||
|
||||
private sealed record BodyLine(int Top, bool IsAffixLike);
|
||||
|
||||
private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
|
||||
{
|
||||
public string RollBandLabel { get; } = rollBandLabel;
|
||||
|
||||
@@ -0,0 +1,276 @@
|
||||
namespace RolemasterDb.ImportTool.Parsing;
|
||||
|
||||
public sealed class VariantColumnCriticalTableParser
|
||||
{
|
||||
private static readonly ColumnDefinition[] ExpectedColumns =
|
||||
[
|
||||
new("NORMAL", "Normal"),
|
||||
new("MAGIC", "Magic"),
|
||||
new("MITHRIL", "Mithril"),
|
||||
new("HOLY_ARMS", "Holy Arms"),
|
||||
new("SLAYING", "Slaying")
|
||||
];
|
||||
|
||||
public CriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent)
|
||||
{
|
||||
var fragments = CriticalTableParserSupport.LoadFragments(xmlContent);
|
||||
var headerFragments = FindHeaderFragments(fragments);
|
||||
var validationErrors = new List<string>();
|
||||
var validationWarnings = new List<string>();
|
||||
|
||||
var columnAnchors = headerFragments
|
||||
.OrderBy(item => item.Left)
|
||||
.Select(item =>
|
||||
{
|
||||
var definition = ResolveColumnDefinition(item.Text);
|
||||
return (definition.Key, definition.Label, item.CenterX);
|
||||
})
|
||||
.ToList();
|
||||
|
||||
var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap;
|
||||
var keyTop = fragments
|
||||
.Where(item =>
|
||||
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
|
||||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
|
||||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
|
||||
.Select(item => (int?)item.Top)
|
||||
.Min() ?? int.MaxValue;
|
||||
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
|
||||
var leftCutoff = headerFragments.Min(item => item.Left) - 10;
|
||||
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
|
||||
fragments,
|
||||
leftCutoff,
|
||||
bodyStartTop,
|
||||
keyTop);
|
||||
|
||||
var rowAnchors = rowLabelFragments
|
||||
.OrderBy(item => item.Top)
|
||||
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
|
||||
.ToList();
|
||||
|
||||
if (rowAnchors.Count == 0)
|
||||
{
|
||||
validationErrors.Add("No roll-band labels were found in the XML artifact.");
|
||||
}
|
||||
|
||||
var columnCenters = columnAnchors
|
||||
.Select(item => (item.Key, item.CenterX))
|
||||
.ToList();
|
||||
|
||||
var bodyFragments = fragments
|
||||
.Where(item =>
|
||||
item.Top >= bodyStartTop &&
|
||||
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance &&
|
||||
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) &&
|
||||
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) &&
|
||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
|
||||
!headerFragments.Contains(item))
|
||||
.ToList();
|
||||
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
||||
|
||||
var parsedRollBands = rowAnchors
|
||||
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
|
||||
.ToList();
|
||||
|
||||
var cellEntries = new List<CellEntry>();
|
||||
|
||||
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
|
||||
{
|
||||
var rowStart = rowIndex == 0
|
||||
? bodyStartTop
|
||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
|
||||
|
||||
var rowEnd = rowIndex == rowAnchors.Count - 1
|
||||
? keyTop - 1
|
||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
|
||||
|
||||
var rowFragments = bodyFragments
|
||||
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
|
||||
.ToList();
|
||||
|
||||
foreach (var columnAnchor in columnAnchors)
|
||||
{
|
||||
var cellFragments = rowFragments
|
||||
.Where(item => CriticalTableParserSupport.ResolveColumn(item.CenterX, columnCenters) == columnAnchor.Key)
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
|
||||
if (cellFragments.Count == 0)
|
||||
{
|
||||
validationErrors.Add($"Missing content for roll band '{rowAnchors[rowIndex].Label}', column '{columnAnchor.Key}'.");
|
||||
continue;
|
||||
}
|
||||
|
||||
cellEntries.Add(new CellEntry(
|
||||
rowAnchors[rowIndex].Label,
|
||||
rowIndex,
|
||||
columnAnchor.Key,
|
||||
CriticalTableParserSupport.BuildLines(cellFragments).ToList()));
|
||||
}
|
||||
}
|
||||
|
||||
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
|
||||
|
||||
var parsedCells = new List<ParsedCriticalCellArtifact>();
|
||||
var parsedResults = new List<ParsedCriticalResult>();
|
||||
|
||||
foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey, StringComparer.Ordinal))
|
||||
{
|
||||
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
|
||||
if (segmentCount > 2)
|
||||
{
|
||||
validationErrors.Add($"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
|
||||
}
|
||||
|
||||
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
|
||||
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
|
||||
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
|
||||
|
||||
parsedCells.Add(new ParsedCriticalCellArtifact(
|
||||
null,
|
||||
cellEntry.RollBandLabel,
|
||||
cellEntry.ColumnKey,
|
||||
cellEntry.Lines,
|
||||
rawCellText,
|
||||
descriptionText,
|
||||
rawAffixText));
|
||||
|
||||
parsedResults.Add(new ParsedCriticalResult(
|
||||
null,
|
||||
cellEntry.ColumnKey,
|
||||
cellEntry.RollBandLabel,
|
||||
rawCellText,
|
||||
descriptionText,
|
||||
rawAffixText));
|
||||
}
|
||||
|
||||
if (columnAnchors.Count != ExpectedColumns.Length)
|
||||
{
|
||||
validationErrors.Add($"Expected {ExpectedColumns.Length} variant columns but found {columnAnchors.Count}.");
|
||||
}
|
||||
|
||||
if (parsedCells.Count != rowAnchors.Count * columnAnchors.Count)
|
||||
{
|
||||
validationErrors.Add($"Expected {rowAnchors.Count * columnAnchors.Count} parsed cells but produced {parsedCells.Count}.");
|
||||
}
|
||||
|
||||
var validationReport = new ImportValidationReport(
|
||||
validationErrors.Count == 0,
|
||||
validationErrors,
|
||||
validationWarnings,
|
||||
rowAnchors.Count,
|
||||
parsedCells.Count);
|
||||
|
||||
var table = new ParsedCriticalTable(
|
||||
entry.Slug,
|
||||
entry.DisplayName,
|
||||
entry.Family,
|
||||
Path.GetFileName(entry.PdfPath),
|
||||
"Imported from PDF XML extraction.",
|
||||
[],
|
||||
ExpectedColumns.Select((item, index) => new ParsedCriticalColumn(item.Key, item.Label, "variant", index + 1)).ToList(),
|
||||
parsedRollBands,
|
||||
parsedResults);
|
||||
|
||||
return new CriticalTableParseResult(table, fragments, parsedCells, validationReport);
|
||||
}
|
||||
|
||||
private static List<XmlTextFragment> FindHeaderFragments(IReadOnlyList<XmlTextFragment> fragments)
|
||||
{
|
||||
var expectedLabels = ExpectedColumns
|
||||
.Select(item => item.Label.ToLowerInvariant())
|
||||
.ToList();
|
||||
|
||||
var headerCandidates = fragments
|
||||
.Where(item => expectedLabels.Contains(item.Text.Trim().ToLowerInvariant(), StringComparer.Ordinal))
|
||||
.OrderBy(item => item.Top)
|
||||
.ThenBy(item => item.Left)
|
||||
.ToList();
|
||||
|
||||
foreach (var group in CriticalTableParserSupport.GroupByTop(headerCandidates))
|
||||
{
|
||||
var ordered = group.OrderBy(item => item.Left).ToList();
|
||||
var labels = ordered.Select(item => item.Text.Trim().ToLowerInvariant()).ToList();
|
||||
if (labels.SequenceEqual(expectedLabels))
|
||||
{
|
||||
return ordered;
|
||||
}
|
||||
}
|
||||
|
||||
throw new InvalidOperationException("Could not find the variant-column header row in the XML artifact.");
|
||||
}
|
||||
|
||||
private static ColumnDefinition ResolveColumnDefinition(string value) =>
|
||||
ExpectedColumns.SingleOrDefault(item => string.Equals(item.Label, value.Trim(), StringComparison.OrdinalIgnoreCase))
|
||||
?? throw new InvalidOperationException($"Unsupported variant column label '{value}'.");
|
||||
|
||||
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
|
||||
{
|
||||
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
||||
var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
|
||||
|
||||
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
|
||||
{
|
||||
foreach (var columnKey in columnKeys)
|
||||
{
|
||||
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.ColumnKey == columnKey);
|
||||
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.ColumnKey == columnKey);
|
||||
if (current is null || next is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var leadingAffixCount = 0;
|
||||
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
||||
{
|
||||
leadingAffixCount++;
|
||||
}
|
||||
|
||||
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
|
||||
next.Lines.RemoveRange(0, leadingAffixCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int ResolveRowBoundaryTop(
|
||||
RowAnchor current,
|
||||
RowAnchor next,
|
||||
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
||||
{
|
||||
var linesBetweenLabels = bodyLines
|
||||
.Where(item => item.Top >= current.Top && item.Top < next.Top)
|
||||
.OrderBy(item => item.Top)
|
||||
.ToList();
|
||||
|
||||
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
|
||||
{
|
||||
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
|
||||
{
|
||||
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
||||
}
|
||||
|
||||
private sealed record ColumnDefinition(string Key, string Label);
|
||||
|
||||
private sealed record RowAnchor(string Label, int Top, int SortOrder);
|
||||
|
||||
private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
|
||||
{
|
||||
public string RollBandLabel { get; } = rollBandLabel;
|
||||
public int RowIndex { get; } = rowIndex;
|
||||
public string ColumnKey { get; } = columnKey;
|
||||
public List<string> Lines { get; } = lines;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user