diff --git a/docs/critical_import_tool.md b/docs/critical_import_tool.md index cb746c0..861c140 100644 --- a/docs/critical_import_tool.md +++ b/docs/critical_import_tool.md @@ -63,7 +63,6 @@ The current implementation supports: The current implementation does not yet support: - OCR/image-based PDFs such as `Void.pdf` -- normalized `critical_branch` population - normalized `critical_effect` population - automatic confidence scoring beyond validation errors @@ -210,10 +209,6 @@ The importer now explicitly rejects cells that still look structurally wrong aft This keeps the phase-2.1 safety goal in place while allowing broader standard-table layouts that render a single affix block either before or after the prose block. -## Planned Future Phases - -The current architecture is intended to support additional phases: - ### Phase 3: Broader Table Coverage Phase 3 expands the manifest and validates the shared `standard` parser across a broader set of `A-E` tables. @@ -494,11 +489,12 @@ Affix-like classification is intentionally conservative. Numeric prose lines suc The current implementation stores: -- `RawCellText` -- `DescriptionText` -- `RawAffixText` +- base `RawCellText` +- base `DescriptionText` +- base `RawAffixText` +- parsed conditional branches with condition text, branch prose, and branch affix text -It does not yet normalize branches or effects into separate tables. +It does not yet normalize effects into separate tables. ## Validation Rules diff --git a/src/RolemasterDb.App/Data/RolemasterDbContext.cs b/src/RolemasterDb.App/Data/RolemasterDbContext.cs index 6f0dd04..8452ee0 100644 --- a/src/RolemasterDb.App/Data/RolemasterDbContext.cs +++ b/src/RolemasterDb.App/Data/RolemasterDbContext.cs @@ -14,6 +14,7 @@ public sealed class RolemasterDbContext(DbContextOptions op public DbSet CriticalColumns => Set(); public DbSet CriticalRollBands => Set(); public DbSet CriticalResults => Set(); + public DbSet CriticalBranches => Set(); protected override void OnModelCreating(ModelBuilder modelBuilder) { @@ -78,5 +79,13 @@ public sealed class RolemasterDbContext(DbContextOptions op entity.HasIndex(item => new { item.CriticalTableId, item.CriticalGroupId, item.CriticalColumnId, item.CriticalRollBandId }).IsUnique(); entity.Property(item => item.ParseStatus).HasMaxLength(32); }); + + modelBuilder.Entity(entity => + { + entity.HasIndex(item => item.CriticalResultId); + entity.HasIndex(item => new { item.CriticalResultId, item.SortOrder }); + entity.Property(item => item.BranchKind).HasMaxLength(32); + entity.Property(item => item.ConditionKey).HasMaxLength(128); + }); } } diff --git a/src/RolemasterDb.App/Data/RolemasterDbInitializer.cs b/src/RolemasterDb.App/Data/RolemasterDbInitializer.cs index 63eb0d5..aa3ef6c 100644 --- a/src/RolemasterDb.App/Data/RolemasterDbInitializer.cs +++ b/src/RolemasterDb.App/Data/RolemasterDbInitializer.cs @@ -11,6 +11,7 @@ public static class RolemasterDbInitializer await using var dbContext = await dbFactory.CreateDbContextAsync(cancellationToken); await dbContext.Database.EnsureCreatedAsync(cancellationToken); + await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken); if (await dbContext.AttackTables.AnyAsync(cancellationToken)) { diff --git a/src/RolemasterDb.App/Data/RolemasterDbSchemaUpgrader.cs b/src/RolemasterDb.App/Data/RolemasterDbSchemaUpgrader.cs new file mode 100644 index 0000000..5b415df --- /dev/null +++ b/src/RolemasterDb.App/Data/RolemasterDbSchemaUpgrader.cs @@ -0,0 +1,43 @@ +using Microsoft.EntityFrameworkCore; + +namespace RolemasterDb.App.Data; + +public static class RolemasterDbSchemaUpgrader +{ + public static async Task EnsureLatestAsync(RolemasterDbContext dbContext, CancellationToken cancellationToken = default) + { + await dbContext.Database.ExecuteSqlRawAsync( + """ + CREATE TABLE IF NOT EXISTS "CriticalBranches" ( + "Id" INTEGER NOT NULL CONSTRAINT "PK_CriticalBranches" PRIMARY KEY AUTOINCREMENT, + "CriticalResultId" INTEGER NOT NULL, + "BranchKind" TEXT NOT NULL, + "ConditionKey" TEXT NULL, + "ConditionText" TEXT NOT NULL, + "ConditionJson" TEXT NOT NULL, + "RawText" TEXT NOT NULL, + "DescriptionText" TEXT NOT NULL, + "RawAffixText" TEXT NULL, + "ParsedJson" TEXT NOT NULL, + "SortOrder" INTEGER NOT NULL, + CONSTRAINT "FK_CriticalBranches_CriticalResults_CriticalResultId" + FOREIGN KEY ("CriticalResultId") REFERENCES "CriticalResults" ("Id") ON DELETE CASCADE + ); + """, + cancellationToken); + + await dbContext.Database.ExecuteSqlRawAsync( + """ + CREATE INDEX IF NOT EXISTS "IX_CriticalBranches_CriticalResultId" + ON "CriticalBranches" ("CriticalResultId"); + """, + cancellationToken); + + await dbContext.Database.ExecuteSqlRawAsync( + """ + CREATE INDEX IF NOT EXISTS "IX_CriticalBranches_CriticalResultId_SortOrder" + ON "CriticalBranches" ("CriticalResultId", "SortOrder"); + """, + cancellationToken); + } +} diff --git a/src/RolemasterDb.App/Domain/CriticalBranch.cs b/src/RolemasterDb.App/Domain/CriticalBranch.cs new file mode 100644 index 0000000..c202e5a --- /dev/null +++ b/src/RolemasterDb.App/Domain/CriticalBranch.cs @@ -0,0 +1,17 @@ +namespace RolemasterDb.App.Domain; + +public sealed class CriticalBranch +{ + public int Id { get; set; } + public int CriticalResultId { get; set; } + public string BranchKind { get; set; } = "conditional"; + public string? ConditionKey { get; set; } + public string ConditionText { get; set; } = string.Empty; + public string ConditionJson { get; set; } = "{}"; + public string RawText { get; set; } = string.Empty; + public string DescriptionText { get; set; } = string.Empty; + public string? RawAffixText { get; set; } + public string ParsedJson { get; set; } = "{}"; + public int SortOrder { get; set; } + public CriticalResult CriticalResult { get; set; } = null!; +} diff --git a/src/RolemasterDb.App/Domain/CriticalResult.cs b/src/RolemasterDb.App/Domain/CriticalResult.cs index 23eb595..1596425 100644 --- a/src/RolemasterDb.App/Domain/CriticalResult.cs +++ b/src/RolemasterDb.App/Domain/CriticalResult.cs @@ -16,4 +16,5 @@ public sealed class CriticalResult public CriticalGroup? CriticalGroup { get; set; } public CriticalColumn CriticalColumn { get; set; } = null!; public CriticalRollBand CriticalRollBand { get; set; } = null!; + public List Branches { get; set; } = []; } diff --git a/src/RolemasterDb.App/rolemaster.db b/src/RolemasterDb.App/rolemaster.db index 208d4c6..bba5078 100644 Binary files a/src/RolemasterDb.App/rolemaster.db and b/src/RolemasterDb.App/rolemaster.db differ diff --git a/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs b/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs index 7f598f9..13d807e 100644 --- a/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs +++ b/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs @@ -1,3 +1,6 @@ +using Microsoft.EntityFrameworkCore; + +using RolemasterDb.App.Data; using RolemasterDb.ImportTool.Parsing; namespace RolemasterDb.ImportTool.Tests; @@ -249,6 +252,83 @@ public sealed class StandardCriticalTableParserIntegrationTests Assert.Contains("Blast goes in through foe's eye", superSlaying.DescriptionText, StringComparison.OrdinalIgnoreCase); } + [Fact] + public async Task Slash_branch_cells_split_base_text_from_conditional_affix_branches() + { + var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "slash", StringComparison.Ordinal)); + var parseResult = await LoadParseResultAsync(entry); + var result = parseResult.Table.Results.Single(item => + item.GroupKey is null && + string.Equals(item.RollBandLabel, "36-45", StringComparison.Ordinal) && + string.Equals(item.ColumnKey, "B", StringComparison.Ordinal)); + + Assert.Equal("Strike foe in shin. If he doesn't have greaves, you slash open foe's shin.", result.DescriptionText); + Assert.Null(result.RawAffixText); + Assert.DoesNotContain("with leg greaves:", result.RawCellText, StringComparison.OrdinalIgnoreCase); + Assert.Equal(2, result.Branches.Count); + + var withGreaves = result.Branches.Single(item => string.Equals(item.ConditionText, "with leg greaves", StringComparison.OrdinalIgnoreCase)); + var withoutGreaves = result.Branches.Single(item => string.Equals(item.ConditionText, "w/o leg greaves", StringComparison.OrdinalIgnoreCase)); + + Assert.Equal("with_leg_greaves", withGreaves.ConditionKey); + Assert.Equal("+2H – π", withGreaves.RawAffixText); + Assert.Equal(string.Empty, withGreaves.DescriptionText); + Assert.Equal("without_leg_greaves", withoutGreaves.ConditionKey); + Assert.Equal("+2H – ∫", withoutGreaves.RawAffixText); + } + + [Fact] + public async Task Impact_branch_cells_keep_prose_branch_text_separate_from_affix_branch_text() + { + var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "impact", StringComparison.Ordinal)); + var parseResult = await LoadParseResultAsync(entry); + var result = parseResult.Table.Results.Single(item => + item.GroupKey is null && + string.Equals(item.RollBandLabel, "86-90", StringComparison.Ordinal) && + string.Equals(item.ColumnKey, "D", StringComparison.Ordinal)); + + Assert.Equal( + "Onslaught to foe's midsection. Organs are damaged and foe throws up blood. Foe's abdomen is seriously damaged. He falls and should not be moved.", + result.DescriptionText); + Assert.Null(result.RawAffixText); + Assert.Equal(2, result.Branches.Count); + + var withArmor = result.Branches.Single(item => string.Equals(item.ConditionText, "with abdominal armor", StringComparison.OrdinalIgnoreCase)); + var withoutArmor = result.Branches.Single(item => string.Equals(item.ConditionText, "w/o abdominal armor", StringComparison.OrdinalIgnoreCase)); + + Assert.Equal("12∑", withArmor.RawAffixText); + Assert.Equal(string.Empty, withArmor.DescriptionText); + Assert.Null(withoutArmor.RawAffixText); + Assert.Equal("dies in 6 rounds", withoutArmor.DescriptionText); + } + + [Fact] + public async Task Loader_upgrades_existing_sqlite_and_persists_branch_rows() + { + var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "slash", StringComparison.Ordinal)); + var parseResult = await LoadParseResultAsync(entry); + var databasePath = CreateTemporaryDatabaseCopy(); + var loader = new CriticalImportLoader(databasePath); + + await loader.LoadAsync(parseResult.Table); + + await using var dbContext = CreateDbContext(databasePath); + var result = await dbContext.CriticalResults + .Include(item => item.CriticalTable) + .Include(item => item.CriticalColumn) + .Include(item => item.CriticalRollBand) + .Include(item => item.Branches) + .SingleAsync(item => + item.CriticalTable.Slug == "slash" && + item.CriticalColumn.ColumnKey == "B" && + item.CriticalRollBand.Label == "36-45"); + + Assert.DoesNotContain("with leg greaves:", result.RawCellText, StringComparison.OrdinalIgnoreCase); + Assert.Equal(2, result.Branches.Count); + Assert.Contains(result.Branches, item => item.ConditionKey == "with_leg_greaves" && item.RawAffixText == "+2H – π"); + Assert.Contains(result.Branches, item => item.ConditionKey == "without_leg_greaves" && item.RawAffixText == "+2H – ∫"); + } + private static async Task LoadParseResultAsync(CriticalImportManifestEntry entry) { var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml"); @@ -278,6 +358,22 @@ public sealed class StandardCriticalTableParserIntegrationTests return cacheRoot; } + private static RolemasterDbContext CreateDbContext(string databasePath) + { + var options = new DbContextOptionsBuilder() + .UseSqlite($"Data Source={databasePath}") + .Options; + + return new RolemasterDbContext(options); + } + + private static string CreateTemporaryDatabaseCopy() + { + var databasePath = Path.Combine(GetArtifactCacheRoot(), $"rolemaster-{Guid.NewGuid():N}.db"); + File.Copy(Path.Combine(GetRepositoryRoot(), "src", "RolemasterDb.App", "rolemaster.db"), databasePath, true); + return databasePath; + } + private static string GetRepositoryRoot() { var probe = new DirectoryInfo(AppContext.BaseDirectory); diff --git a/src/RolemasterDb.ImportTool/CriticalImportLoader.cs b/src/RolemasterDb.ImportTool/CriticalImportLoader.cs index f7a0417..5a6133b 100644 --- a/src/RolemasterDb.ImportTool/CriticalImportLoader.cs +++ b/src/RolemasterDb.ImportTool/CriticalImportLoader.cs @@ -12,10 +12,12 @@ public sealed class CriticalImportLoader(string databasePath) { await using var dbContext = CreateDbContext(); await dbContext.Database.EnsureCreatedAsync(cancellationToken); + await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken); var removedTableCount = await dbContext.CriticalTables.CountAsync(cancellationToken); await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken); + await dbContext.CriticalBranches.ExecuteDeleteAsync(cancellationToken); await dbContext.CriticalResults.ExecuteDeleteAsync(cancellationToken); await dbContext.CriticalGroups.ExecuteDeleteAsync(cancellationToken); await dbContext.CriticalColumns.ExecuteDeleteAsync(cancellationToken); @@ -30,6 +32,7 @@ public sealed class CriticalImportLoader(string databasePath) { await using var dbContext = CreateDbContext(); await dbContext.Database.EnsureCreatedAsync(cancellationToken); + await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken); await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken); await DeleteTableAsync(dbContext, table.Slug, cancellationToken); @@ -86,7 +89,21 @@ public sealed class CriticalImportLoader(string databasePath) DescriptionText = item.DescriptionText, RawAffixText = item.RawAffixText, ParsedJson = "{}", - ParseStatus = "raw" + ParseStatus = "raw", + Branches = item.Branches + .Select(branch => new CriticalBranch + { + BranchKind = branch.BranchKind, + ConditionKey = branch.ConditionKey, + ConditionText = branch.ConditionText, + ConditionJson = "{}", + RawText = branch.RawText, + DescriptionText = branch.DescriptionText, + RawAffixText = branch.RawAffixText, + ParsedJson = "{}", + SortOrder = branch.SortOrder + }) + .ToList() }) .ToList(); @@ -121,6 +138,10 @@ public sealed class CriticalImportLoader(string databasePath) return; } + await dbContext.CriticalBranches + .Where(item => item.CriticalResult.CriticalTableId == tableId.Value) + .ExecuteDeleteAsync(cancellationToken); + await dbContext.CriticalResults .Where(item => item.CriticalTableId == tableId.Value) .ExecuteDeleteAsync(cancellationToken); diff --git a/src/RolemasterDb.ImportTool/Parsing/ColumnarCellEntry.cs b/src/RolemasterDb.ImportTool/Parsing/ColumnarCellEntry.cs new file mode 100644 index 0000000..a1823dd --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/ColumnarCellEntry.cs @@ -0,0 +1,10 @@ +namespace RolemasterDb.ImportTool.Parsing; + +internal sealed class ColumnarCellEntry(string? groupKey, string rollBandLabel, int rowIndex, string columnKey, List lines) +{ + public string? GroupKey { get; } = groupKey; + public string RollBandLabel { get; } = rollBandLabel; + public int RowIndex { get; } = rowIndex; + public string ColumnKey { get; } = columnKey; + public List Lines { get; } = lines; +} diff --git a/src/RolemasterDb.ImportTool/Parsing/CriticalCellParseContent.cs b/src/RolemasterDb.ImportTool/Parsing/CriticalCellParseContent.cs new file mode 100644 index 0000000..e5ea315 --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/CriticalCellParseContent.cs @@ -0,0 +1,17 @@ +namespace RolemasterDb.ImportTool.Parsing; + +internal sealed class CriticalCellParseContent( + IReadOnlyList baseLines, + string rawCellText, + string descriptionText, + string? rawAffixText, + IReadOnlyList branches, + IReadOnlyList validationErrors) +{ + public IReadOnlyList BaseLines { get; } = baseLines; + public string RawCellText { get; } = rawCellText; + public string DescriptionText { get; } = descriptionText; + public string? RawAffixText { get; } = rawAffixText; + public IReadOnlyList Branches { get; } = branches; + public IReadOnlyList ValidationErrors { get; } = validationErrors; +} diff --git a/src/RolemasterDb.ImportTool/Parsing/CriticalCellTextParser.cs b/src/RolemasterDb.ImportTool/Parsing/CriticalCellTextParser.cs new file mode 100644 index 0000000..0a561cf --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/CriticalCellTextParser.cs @@ -0,0 +1,114 @@ +namespace RolemasterDb.ImportTool.Parsing; + +internal static class CriticalCellTextParser +{ + internal static CriticalCellParseContent Parse(IReadOnlyList lines, ISet affixLegendSymbols) + { + var validationErrors = new List(); + var branchStartIndexes = FindBranchStartIndexes(lines); + var baseLineCount = branchStartIndexes.Count == 0 ? lines.Count : branchStartIndexes[0]; + var baseLines = lines.Take(baseLineCount).ToList(); + var branches = new List(); + + validationErrors.AddRange(ValidateSegmentCount(baseLines, affixLegendSymbols, "Base content")); + + for (var branchIndex = 0; branchIndex < branchStartIndexes.Count; branchIndex++) + { + var startIndex = branchStartIndexes[branchIndex]; + var endIndex = branchIndex == branchStartIndexes.Count - 1 + ? lines.Count + : branchStartIndexes[branchIndex + 1]; + + branches.Add(ParseBranch( + lines.Skip(startIndex).Take(endIndex - startIndex).ToList(), + branchIndex + 1, + affixLegendSymbols, + validationErrors)); + } + + var (rawCellText, descriptionText, rawAffixText) = BuildTextSections(baseLines, affixLegendSymbols); + return new CriticalCellParseContent(baseLines, rawCellText, descriptionText, rawAffixText, branches, validationErrors); + } + + private static ParsedCriticalBranch ParseBranch( + IReadOnlyList branchLines, + int sortOrder, + ISet affixLegendSymbols, + List validationErrors) + { + var firstLine = branchLines[0]; + var separatorIndex = firstLine.IndexOf(':', StringComparison.Ordinal); + var conditionText = CriticalTableParserSupport.CollapseWhitespace(firstLine[..separatorIndex]); + var firstPayloadLine = CriticalTableParserSupport.CollapseWhitespace(firstLine[(separatorIndex + 1)..]); + var payloadLines = new List(); + + if (!string.IsNullOrWhiteSpace(firstPayloadLine)) + { + payloadLines.Add(firstPayloadLine); + } + + foreach (var continuationLine in branchLines.Skip(1)) + { + var normalized = CriticalTableParserSupport.CollapseWhitespace(continuationLine); + if (!string.IsNullOrWhiteSpace(normalized)) + { + payloadLines.Add(normalized); + } + } + + validationErrors.AddRange(ValidateSegmentCount(payloadLines, affixLegendSymbols, $"Branch '{conditionText}'")); + + var (_, descriptionText, rawAffixText) = BuildTextSections(payloadLines, affixLegendSymbols); + return new ParsedCriticalBranch( + "conditional", + CriticalTableParserSupport.NormalizeConditionKey(conditionText), + conditionText, + string.Join(Environment.NewLine, branchLines), + descriptionText, + rawAffixText, + sortOrder); + } + + private static List FindBranchStartIndexes(IReadOnlyList lines) + { + var branchStartIndexes = new List(); + + for (var index = 0; index < lines.Count; index++) + { + if (CriticalTableParserSupport.IsConditionalBranchStartLine(lines[index])) + { + branchStartIndexes.Add(index); + } + } + + return branchStartIndexes; + } + + private static IReadOnlyList ValidateSegmentCount( + IReadOnlyList lines, + ISet affixLegendSymbols, + string scope) + { + if (lines.Count == 0) + { + return []; + } + + var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(lines, affixLegendSymbols); + return segmentCount > 2 + ? [$"{scope} interleaves prose and affix lines."] + : []; + } + + private static (string RawText, string DescriptionText, string? RawAffixText) BuildTextSections( + IReadOnlyList lines, + ISet affixLegendSymbols) + { + var rawText = string.Join(Environment.NewLine, lines); + var rawAffixLines = lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList(); + var descriptionLines = lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList(); + var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines)); + var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines); + return (rawText, descriptionText, rawAffixText); + } +} diff --git a/src/RolemasterDb.ImportTool/Parsing/CriticalTableParserSupport.cs b/src/RolemasterDb.ImportTool/Parsing/CriticalTableParserSupport.cs index 983ff2e..e7218e6 100644 --- a/src/RolemasterDb.ImportTool/Parsing/CriticalTableParserSupport.cs +++ b/src/RolemasterDb.ImportTool/Parsing/CriticalTableParserSupport.cs @@ -169,15 +169,9 @@ internal static class CriticalTableParserSupport return true; } - if (value.StartsWith("with ", StringComparison.OrdinalIgnoreCase) || - value.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) || - value.StartsWith("without ", StringComparison.OrdinalIgnoreCase) || - value.StartsWith("if ", StringComparison.OrdinalIgnoreCase) || - value.StartsWith("while ", StringComparison.OrdinalIgnoreCase) || - value.StartsWith("until ", StringComparison.OrdinalIgnoreCase) || - value.StartsWith("unless ", StringComparison.OrdinalIgnoreCase)) + if (IsConditionalBranchStartLine(value)) { - return value.Contains(':', StringComparison.Ordinal); + return true; } if (affixLegendSymbols.Count > 0 && @@ -242,6 +236,23 @@ internal static class CriticalTableParserSupport internal static string CollapseWhitespace(string value) => Regex.Replace(value.Trim(), @"\s+", " "); + internal static bool IsConditionalBranchStartLine(string value) + { + var normalized = value.Trim(); + if (!normalized.Contains(':', StringComparison.Ordinal)) + { + return false; + } + + return normalized.StartsWith("with ", StringComparison.OrdinalIgnoreCase) || + normalized.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) || + normalized.StartsWith("without ", StringComparison.OrdinalIgnoreCase) || + normalized.StartsWith("if ", StringComparison.OrdinalIgnoreCase) || + normalized.StartsWith("while ", StringComparison.OrdinalIgnoreCase) || + normalized.StartsWith("until ", StringComparison.OrdinalIgnoreCase) || + normalized.StartsWith("unless ", StringComparison.OrdinalIgnoreCase); + } + internal static string NormalizeText(string value) => value .Replace('\u00a0', ' ') @@ -250,6 +261,25 @@ internal static class CriticalTableParserSupport .Replace('’', '\'') .Trim(); + internal static string? NormalizeConditionKey(string conditionText) + { + var normalized = CollapseWhitespace(conditionText) + .ToLowerInvariant() + .Replace("w/o", "without", StringComparison.Ordinal); + normalized = Regex.Replace(normalized, @"[^a-z0-9]+", "_"); + normalized = normalized.Trim('_'); + return normalized.Length == 0 ? null : normalized; + } + + internal static int FindKeyTop(IReadOnlyList fragments) => + fragments + .Where(item => + string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) || + item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) || + item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase)) + .Select(item => (int?)item.Top) + .Min() ?? int.MaxValue; + internal static HashSet DetectAffixLegendSymbols(IReadOnlyList fragments, int keyTop) { if (keyTop == int.MaxValue) @@ -347,6 +377,138 @@ internal static class CriticalTableParserSupport return groups; } + internal static List CreateRowAnchors(IReadOnlyList rowLabelFragments) => + rowLabelFragments + .OrderBy(item => item.Top) + .Select((item, index) => new RowAnchor(NormalizeRollBandLabel(item.Text), item.Top, index + 1)) + .ToList(); + + internal static List BuildBodyFragments( + IReadOnlyList fragments, + int bodyStartTop, + int keyTop, + int leftCutoff, + IReadOnlyList rowAnchors, + IReadOnlyCollection excludedFragments, + IReadOnlyList<(string Key, double CenterX)> columnCenters, + ISet affixLegendSymbols) + { + var bodyFragments = fragments + .Where(item => + item.Top >= bodyStartTop && + item.Top < keyTop - TopGroupingTolerance && + !IsFooterPageNumberFragment(item, keyTop) && + !IsPotentialRowLabelFragment(item, leftCutoff) && + !rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) && + !excludedFragments.Contains(item)) + .ToList(); + + return SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols); + } + + internal static void RepairLeadingAffixLeakage(List cellEntries, ISet affixLegendSymbols) + { + var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex); + var axes = cellEntries + .Select(item => (item.GroupKey, item.ColumnKey)) + .Distinct() + .ToList(); + + for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++) + { + foreach (var (groupKey, columnKey) in axes) + { + var current = cellEntries.SingleOrDefault(item => + item.RowIndex == rowIndex && + string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) && + string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal)); + var next = cellEntries.SingleOrDefault(item => + item.RowIndex == rowIndex + 1 && + string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) && + string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal)); + if (current is null || next is null) + { + continue; + } + + var leadingAffixCount = 0; + while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols)) + { + leadingAffixCount++; + } + + if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count) + { + continue; + } + + current.Lines.AddRange(next.Lines.Take(leadingAffixCount)); + next.Lines.RemoveRange(0, leadingAffixCount); + } + } + } + + internal static int ResolveRowBoundaryTop( + RowAnchor current, + RowAnchor next, + IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines) + { + var linesBetweenLabels = bodyLines + .Where(item => item.Top >= current.Top && item.Top < next.Top) + .OrderBy(item => item.Top) + .ToList(); + + for (var index = linesBetweenLabels.Count - 2; index >= 0; index--) + { + if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike) + { + return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1; + } + } + + return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1; + } + + internal static void BuildParsedArtifacts( + IReadOnlyList cellEntries, + ISet affixLegendSymbols, + List parsedCells, + List parsedResults, + List validationErrors) + { + foreach (var cellEntry in cellEntries) + { + var content = CriticalCellTextParser.Parse(cellEntry.Lines, affixLegendSymbols); + validationErrors.AddRange(content.ValidationErrors.Select(error => + $"Cell '{BuildCellIdentifier(cellEntry)}': {error}")); + + parsedCells.Add(new ParsedCriticalCellArtifact( + cellEntry.GroupKey, + cellEntry.RollBandLabel, + cellEntry.ColumnKey, + cellEntry.Lines.ToList(), + content.BaseLines, + content.RawCellText, + content.DescriptionText, + content.RawAffixText, + content.Branches)); + + parsedResults.Add(new ParsedCriticalResult( + cellEntry.GroupKey, + cellEntry.ColumnKey, + cellEntry.RollBandLabel, + content.RawCellText, + content.DescriptionText, + content.RawAffixText, + content.Branches)); + } + } + + private static string BuildCellIdentifier(ColumnarCellEntry cellEntry) => + cellEntry.GroupKey is null + ? $"{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}" + : $"{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}"; + private static bool LooksLikeSplitRollBandStart(string value) => Regex.IsMatch(value.Trim(), @"^\d{2,3}\s*-$"); diff --git a/src/RolemasterDb.ImportTool/Parsing/GroupedVariantCriticalTableParser.cs b/src/RolemasterDb.ImportTool/Parsing/GroupedVariantCriticalTableParser.cs index ed6c3e4..b0fcf76 100644 --- a/src/RolemasterDb.ImportTool/Parsing/GroupedVariantCriticalTableParser.cs +++ b/src/RolemasterDb.ImportTool/Parsing/GroupedVariantCriticalTableParser.cs @@ -36,13 +36,7 @@ public sealed class GroupedVariantCriticalTableParser groupHeaders.Max(item => item.Top), columnHeaders.Max(item => item.Top)) + CriticalTableParserSupport.HeaderToBodyMinimumGap; - var keyTop = fragments - .Where(item => - string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) || - item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) || - item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase)) - .Select(item => (int?)item.Top) - .Min() ?? int.MaxValue; + var keyTop = CriticalTableParserSupport.FindKeyTop(fragments); var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop); var leftCutoff = columnHeaders.Min(item => item.Left) - 10; var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments( @@ -50,11 +44,7 @@ public sealed class GroupedVariantCriticalTableParser leftCutoff, bodyStartTop, keyTop); - - var rowAnchors = rowLabelFragments - .OrderBy(item => item.Top) - .Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1)) - .ToList(); + var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments); if (rowAnchors.Count == 0) { @@ -65,34 +55,33 @@ public sealed class GroupedVariantCriticalTableParser .Select(item => (item.CompositeKey, item.CenterX)) .ToList(); - var bodyFragments = fragments - .Where(item => - item.Top >= bodyStartTop && - item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance && - !CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) && - !CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) && - !rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) && - !groupHeaders.Contains(item) && - !columnHeaders.Contains(item)) - .ToList(); - bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols); + var excludedFragments = groupHeaders.Concat(columnHeaders).ToList(); + var bodyFragments = CriticalTableParserSupport.BuildBodyFragments( + fragments, + bodyStartTop, + keyTop, + leftCutoff, + rowAnchors, + excludedFragments, + columnCenters, + affixLegendSymbols); var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols); var parsedRollBands = rowAnchors .Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder)) .ToList(); - var cellEntries = new List(); + var cellEntries = new List(); for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++) { var rowStart = rowIndex == 0 ? bodyStartTop - : ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); + : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); var rowEnd = rowIndex == rowAnchors.Count - 1 ? keyTop - 1 - : ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); + : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); var rowFragments = bodyFragments .Where(item => item.Top >= rowStart && item.Top < rowEnd) @@ -112,7 +101,7 @@ public sealed class GroupedVariantCriticalTableParser continue; } - cellEntries.Add(new CellEntry( + cellEntries.Add(new ColumnarCellEntry( anchor.GroupKey, rowAnchors[rowIndex].Label, rowIndex, @@ -121,45 +110,11 @@ public sealed class GroupedVariantCriticalTableParser } } - RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); + CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); var parsedCells = new List(); var parsedResults = new List(); - - foreach (var cellEntry in cellEntries - .OrderBy(item => item.RowIndex) - .ThenBy(item => item.GroupKey, StringComparer.Ordinal) - .ThenBy(item => item.ColumnKey, StringComparer.Ordinal)) - { - var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols); - if (segmentCount > 2) - { - validationErrors.Add($"Cell '{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}' interleaves prose and affix lines."); - } - - var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList(); - var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList(); - var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines); - var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines)); - var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines); - - parsedCells.Add(new ParsedCriticalCellArtifact( - cellEntry.GroupKey, - cellEntry.RollBandLabel, - cellEntry.ColumnKey, - cellEntry.Lines, - rawCellText, - descriptionText, - rawAffixText)); - - parsedResults.Add(new ParsedCriticalResult( - cellEntry.GroupKey, - cellEntry.ColumnKey, - cellEntry.RollBandLabel, - rawCellText, - descriptionText, - rawAffixText)); - } + CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors); var expectedCellCount = rowAnchors.Count * ExpectedGroups.Length * ExpectedColumns.Length; if (parsedCells.Count != expectedCellCount) @@ -235,72 +190,4 @@ public sealed class GroupedVariantCriticalTableParser throw new InvalidOperationException("Could not find the grouped-variant column header row in the XML artifact."); } - - private static void RepairLeadingAffixLeakage(List cellEntries, ISet affixLegendSymbols) - { - var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex); - var axes = cellEntries - .Select(item => (item.GroupKey, item.ColumnKey)) - .Distinct() - .ToList(); - - for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++) - { - foreach (var (groupKey, columnKey) in axes) - { - var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.GroupKey == groupKey && item.ColumnKey == columnKey); - var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.GroupKey == groupKey && item.ColumnKey == columnKey); - if (current is null || next is null) - { - continue; - } - - var leadingAffixCount = 0; - while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols)) - { - leadingAffixCount++; - } - - if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count) - { - continue; - } - - current.Lines.AddRange(next.Lines.Take(leadingAffixCount)); - next.Lines.RemoveRange(0, leadingAffixCount); - } - } - } - - private static int ResolveRowBoundaryTop( - RowAnchor current, - RowAnchor next, - IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines) - { - var linesBetweenLabels = bodyLines - .Where(item => item.Top >= current.Top && item.Top < next.Top) - .OrderBy(item => item.Top) - .ToList(); - - for (var index = linesBetweenLabels.Count - 2; index >= 0; index--) - { - if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike) - { - return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1; - } - } - - return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1; - } - - private sealed record RowAnchor(string Label, int Top, int SortOrder); - - private sealed class CellEntry(string groupKey, string rollBandLabel, int rowIndex, string columnKey, List lines) - { - public string GroupKey { get; } = groupKey; - public string RollBandLabel { get; } = rollBandLabel; - public int RowIndex { get; } = rowIndex; - public string ColumnKey { get; } = columnKey; - public List Lines { get; } = lines; - } } diff --git a/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalBranch.cs b/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalBranch.cs new file mode 100644 index 0000000..869d4fd --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalBranch.cs @@ -0,0 +1,19 @@ +namespace RolemasterDb.ImportTool.Parsing; + +public sealed class ParsedCriticalBranch( + string branchKind, + string? conditionKey, + string conditionText, + string rawText, + string descriptionText, + string? rawAffixText, + int sortOrder) +{ + public string BranchKind { get; } = branchKind; + public string? ConditionKey { get; } = conditionKey; + public string ConditionText { get; } = conditionText; + public string RawText { get; } = rawText; + public string DescriptionText { get; } = descriptionText; + public string? RawAffixText { get; } = rawAffixText; + public int SortOrder { get; } = sortOrder; +} diff --git a/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs b/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs index ecfdc19..9377d7a 100644 --- a/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs +++ b/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs @@ -5,15 +5,19 @@ public sealed class ParsedCriticalCellArtifact( string rollBandLabel, string columnKey, IReadOnlyList lines, + IReadOnlyList baseLines, string rawCellText, string descriptionText, - string? rawAffixText) + string? rawAffixText, + IReadOnlyList branches) { public string? GroupKey { get; } = groupKey; public string RollBandLabel { get; } = rollBandLabel; public string ColumnKey { get; } = columnKey; public IReadOnlyList Lines { get; } = lines; + public IReadOnlyList BaseLines { get; } = baseLines; public string RawCellText { get; } = rawCellText; public string DescriptionText { get; } = descriptionText; public string? RawAffixText { get; } = rawAffixText; + public IReadOnlyList Branches { get; } = branches; } diff --git a/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalResult.cs b/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalResult.cs index 7d89774..0132f4d 100644 --- a/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalResult.cs +++ b/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalResult.cs @@ -6,7 +6,8 @@ public sealed class ParsedCriticalResult( string rollBandLabel, string rawCellText, string descriptionText, - string? rawAffixText) + string? rawAffixText, + IReadOnlyList branches) { public string? GroupKey { get; } = groupKey; public string ColumnKey { get; } = columnKey; @@ -14,4 +15,5 @@ public sealed class ParsedCriticalResult( public string RawCellText { get; } = rawCellText; public string DescriptionText { get; } = descriptionText; public string? RawAffixText { get; } = rawAffixText; + public IReadOnlyList Branches { get; } = branches; } diff --git a/src/RolemasterDb.ImportTool/Parsing/RowAnchor.cs b/src/RolemasterDb.ImportTool/Parsing/RowAnchor.cs new file mode 100644 index 0000000..86ff1a3 --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/RowAnchor.cs @@ -0,0 +1,3 @@ +namespace RolemasterDb.ImportTool.Parsing; + +internal sealed record RowAnchor(string Label, int Top, int SortOrder); diff --git a/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs index c250495..df9aa97 100644 --- a/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs +++ b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs @@ -15,13 +15,7 @@ public sealed class StandardCriticalTableParser .ToList(); var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap; - var keyTop = fragments - .Where(item => - string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) || - item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) || - item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase)) - .Select(item => (int?)item.Top) - .Min() ?? int.MaxValue; + var keyTop = CriticalTableParserSupport.FindKeyTop(fragments); var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop); var leftCutoff = headerFragments.Min(item => item.Left) - 10; var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments( @@ -29,44 +23,39 @@ public sealed class StandardCriticalTableParser leftCutoff, bodyStartTop, keyTop); - - var rowAnchors = rowLabelFragments - .OrderBy(item => item.Top) - .Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1)) - .ToList(); + var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments); if (rowAnchors.Count == 0) { validationErrors.Add("No roll-band labels were found in the XML artifact."); } - var bodyFragments = fragments - .Where(item => - item.Top >= bodyStartTop && - item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance && - !CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) && - !CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) && - !rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) && - !headerFragments.Contains(item)) - .ToList(); - bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols); + var bodyFragments = CriticalTableParserSupport.BuildBodyFragments( + fragments, + bodyStartTop, + keyTop, + leftCutoff, + rowAnchors, + headerFragments, + columnCenters, + affixLegendSymbols); var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols); var parsedRollBands = rowAnchors .Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder)) .ToList(); - var cellEntries = new List(); + var cellEntries = new List(); for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++) { var rowStart = rowIndex == 0 ? bodyStartTop - : ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); + : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); var rowEnd = rowIndex == rowAnchors.Count - 1 ? keyTop - 1 - : ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); + : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); var rowFragments = bodyFragments .Where(item => item.Top >= rowStart && item.Top < rowEnd) @@ -86,7 +75,8 @@ public sealed class StandardCriticalTableParser continue; } - cellEntries.Add(new CellEntry( + cellEntries.Add(new ColumnarCellEntry( + null, rowAnchors[rowIndex].Label, rowIndex, columnAnchor.Key, @@ -94,44 +84,11 @@ public sealed class StandardCriticalTableParser } } - RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); + CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); var parsedCells = new List(); var parsedResults = new List(); - - foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey)) - { - var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols); - - if (segmentCount > 2) - { - validationErrors.Add( - $"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines."); - } - - var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList(); - var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList(); - var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines); - var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines)); - var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines); - - parsedCells.Add(new ParsedCriticalCellArtifact( - null, - cellEntry.RollBandLabel, - cellEntry.ColumnKey, - cellEntry.Lines, - rawCellText, - descriptionText, - rawAffixText)); - - parsedResults.Add(new ParsedCriticalResult( - null, - cellEntry.ColumnKey, - cellEntry.RollBandLabel, - rawCellText, - descriptionText, - rawAffixText)); - } + CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors); if (columnCenters.Count != 5) { @@ -185,68 +142,4 @@ public sealed class StandardCriticalTableParser throw new InvalidOperationException("Could not find the standard-table A-E header row in the XML artifact."); } - - private static void RepairLeadingAffixLeakage(List cellEntries, ISet affixLegendSymbols) - { - var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex); - var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList(); - - for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++) - { - foreach (var columnKey in columnKeys) - { - var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.ColumnKey == columnKey); - var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.ColumnKey == columnKey); - if (current is null || next is null) - { - continue; - } - - var leadingAffixCount = 0; - while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols)) - { - leadingAffixCount++; - } - - if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count) - { - continue; - } - - current.Lines.AddRange(next.Lines.Take(leadingAffixCount)); - next.Lines.RemoveRange(0, leadingAffixCount); - } - } - } - - private static int ResolveRowBoundaryTop( - RowAnchor current, - RowAnchor next, - IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines) - { - var linesBetweenLabels = bodyLines - .Where(item => item.Top >= current.Top && item.Top < next.Top) - .OrderBy(item => item.Top) - .ToList(); - - for (var index = linesBetweenLabels.Count - 2; index >= 0; index--) - { - if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike) - { - return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1; - } - } - - return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1; - } - - private sealed record RowAnchor(string Label, int Top, int SortOrder); - - private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List lines) - { - public string RollBandLabel { get; } = rollBandLabel; - public int RowIndex { get; } = rowIndex; - public string ColumnKey { get; } = columnKey; - public List Lines { get; } = lines; - } } diff --git a/src/RolemasterDb.ImportTool/Parsing/VariantColumnCriticalTableParser.cs b/src/RolemasterDb.ImportTool/Parsing/VariantColumnCriticalTableParser.cs index 5e1a716..91f8dff 100644 --- a/src/RolemasterDb.ImportTool/Parsing/VariantColumnCriticalTableParser.cs +++ b/src/RolemasterDb.ImportTool/Parsing/VariantColumnCriticalTableParser.cs @@ -28,13 +28,7 @@ public sealed class VariantColumnCriticalTableParser .ToList(); var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap; - var keyTop = fragments - .Where(item => - string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) || - item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) || - item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase)) - .Select(item => (int?)item.Top) - .Min() ?? int.MaxValue; + var keyTop = CriticalTableParserSupport.FindKeyTop(fragments); var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop); var leftCutoff = headerFragments.Min(item => item.Left) - 10; var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments( @@ -42,11 +36,7 @@ public sealed class VariantColumnCriticalTableParser leftCutoff, bodyStartTop, keyTop); - - var rowAnchors = rowLabelFragments - .OrderBy(item => item.Top) - .Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1)) - .ToList(); + var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments); if (rowAnchors.Count == 0) { @@ -57,33 +47,32 @@ public sealed class VariantColumnCriticalTableParser .Select(item => (item.Key, item.CenterX)) .ToList(); - var bodyFragments = fragments - .Where(item => - item.Top >= bodyStartTop && - item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance && - !CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) && - !CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) && - !rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) && - !headerFragments.Contains(item)) - .ToList(); - bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols); + var bodyFragments = CriticalTableParserSupport.BuildBodyFragments( + fragments, + bodyStartTop, + keyTop, + leftCutoff, + rowAnchors, + headerFragments, + columnCenters, + affixLegendSymbols); var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols); var parsedRollBands = rowAnchors .Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder)) .ToList(); - var cellEntries = new List(); + var cellEntries = new List(); for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++) { var rowStart = rowIndex == 0 ? bodyStartTop - : ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); + : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); var rowEnd = rowIndex == rowAnchors.Count - 1 ? keyTop - 1 - : ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); + : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); var rowFragments = bodyFragments .Where(item => item.Top >= rowStart && item.Top < rowEnd) @@ -103,7 +92,8 @@ public sealed class VariantColumnCriticalTableParser continue; } - cellEntries.Add(new CellEntry( + cellEntries.Add(new ColumnarCellEntry( + null, rowAnchors[rowIndex].Label, rowIndex, columnAnchor.Key, @@ -111,42 +101,11 @@ public sealed class VariantColumnCriticalTableParser } } - RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); + CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); var parsedCells = new List(); var parsedResults = new List(); - - foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey, StringComparer.Ordinal)) - { - var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols); - if (segmentCount > 2) - { - validationErrors.Add($"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines."); - } - - var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList(); - var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList(); - var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines); - var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines)); - var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines); - - parsedCells.Add(new ParsedCriticalCellArtifact( - null, - cellEntry.RollBandLabel, - cellEntry.ColumnKey, - cellEntry.Lines, - rawCellText, - descriptionText, - rawAffixText)); - - parsedResults.Add(new ParsedCriticalResult( - null, - cellEntry.ColumnKey, - cellEntry.RollBandLabel, - rawCellText, - descriptionText, - rawAffixText)); - } + CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors); if (columnAnchors.Count != ExpectedColumns.Length) { @@ -208,69 +167,5 @@ public sealed class VariantColumnCriticalTableParser ExpectedColumns.SingleOrDefault(item => string.Equals(item.Label, value.Trim(), StringComparison.OrdinalIgnoreCase)) ?? throw new InvalidOperationException($"Unsupported variant column label '{value}'."); - private static void RepairLeadingAffixLeakage(List cellEntries, ISet affixLegendSymbols) - { - var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex); - var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList(); - - for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++) - { - foreach (var columnKey in columnKeys) - { - var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.ColumnKey == columnKey); - var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.ColumnKey == columnKey); - if (current is null || next is null) - { - continue; - } - - var leadingAffixCount = 0; - while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols)) - { - leadingAffixCount++; - } - - if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count) - { - continue; - } - - current.Lines.AddRange(next.Lines.Take(leadingAffixCount)); - next.Lines.RemoveRange(0, leadingAffixCount); - } - } - } - - private static int ResolveRowBoundaryTop( - RowAnchor current, - RowAnchor next, - IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines) - { - var linesBetweenLabels = bodyLines - .Where(item => item.Top >= current.Top && item.Top < next.Top) - .OrderBy(item => item.Top) - .ToList(); - - for (var index = linesBetweenLabels.Count - 2; index >= 0; index--) - { - if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike) - { - return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1; - } - } - - return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1; - } - private sealed record ColumnDefinition(string Key, string Label); - - private sealed record RowAnchor(string Label, int Top, int SortOrder); - - private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List lines) - { - public string RollBandLabel { get; } = rollBandLabel; - public int RowIndex { get; } = rowIndex; - public string ColumnKey { get; } = columnKey; - public List Lines { get; } = lines; - } }