Implement phase 5 critical branch extraction

This commit is contained in:
2026-03-14 10:21:26 +01:00
parent b2f61c3d73
commit 60c5d886a4
20 changed files with 589 additions and 399 deletions

View File

@@ -63,7 +63,6 @@ The current implementation supports:
The current implementation does not yet support: The current implementation does not yet support:
- OCR/image-based PDFs such as `Void.pdf` - OCR/image-based PDFs such as `Void.pdf`
- normalized `critical_branch` population
- normalized `critical_effect` population - normalized `critical_effect` population
- automatic confidence scoring beyond validation errors - automatic confidence scoring beyond validation errors
@@ -210,10 +209,6 @@ The importer now explicitly rejects cells that still look structurally wrong aft
This keeps the phase-2.1 safety goal in place while allowing broader standard-table layouts that render a single affix block either before or after the prose block. This keeps the phase-2.1 safety goal in place while allowing broader standard-table layouts that render a single affix block either before or after the prose block.
## Planned Future Phases
The current architecture is intended to support additional phases:
### Phase 3: Broader Table Coverage ### Phase 3: Broader Table Coverage
Phase 3 expands the manifest and validates the shared `standard` parser across a broader set of `A-E` tables. Phase 3 expands the manifest and validates the shared `standard` parser across a broader set of `A-E` tables.
@@ -494,11 +489,12 @@ Affix-like classification is intentionally conservative. Numeric prose lines suc
The current implementation stores: The current implementation stores:
- `RawCellText` - base `RawCellText`
- `DescriptionText` - base `DescriptionText`
- `RawAffixText` - base `RawAffixText`
- parsed conditional branches with condition text, branch prose, and branch affix text
It does not yet normalize branches or effects into separate tables. It does not yet normalize effects into separate tables.
## Validation Rules ## Validation Rules

View File

@@ -14,6 +14,7 @@ public sealed class RolemasterDbContext(DbContextOptions<RolemasterDbContext> op
public DbSet<CriticalColumn> CriticalColumns => Set<CriticalColumn>(); public DbSet<CriticalColumn> CriticalColumns => Set<CriticalColumn>();
public DbSet<CriticalRollBand> CriticalRollBands => Set<CriticalRollBand>(); public DbSet<CriticalRollBand> CriticalRollBands => Set<CriticalRollBand>();
public DbSet<CriticalResult> CriticalResults => Set<CriticalResult>(); public DbSet<CriticalResult> CriticalResults => Set<CriticalResult>();
public DbSet<CriticalBranch> CriticalBranches => Set<CriticalBranch>();
protected override void OnModelCreating(ModelBuilder modelBuilder) protected override void OnModelCreating(ModelBuilder modelBuilder)
{ {
@@ -78,5 +79,13 @@ public sealed class RolemasterDbContext(DbContextOptions<RolemasterDbContext> op
entity.HasIndex(item => new { item.CriticalTableId, item.CriticalGroupId, item.CriticalColumnId, item.CriticalRollBandId }).IsUnique(); entity.HasIndex(item => new { item.CriticalTableId, item.CriticalGroupId, item.CriticalColumnId, item.CriticalRollBandId }).IsUnique();
entity.Property(item => item.ParseStatus).HasMaxLength(32); entity.Property(item => item.ParseStatus).HasMaxLength(32);
}); });
modelBuilder.Entity<CriticalBranch>(entity =>
{
entity.HasIndex(item => item.CriticalResultId);
entity.HasIndex(item => new { item.CriticalResultId, item.SortOrder });
entity.Property(item => item.BranchKind).HasMaxLength(32);
entity.Property(item => item.ConditionKey).HasMaxLength(128);
});
} }
} }

View File

@@ -11,6 +11,7 @@ public static class RolemasterDbInitializer
await using var dbContext = await dbFactory.CreateDbContextAsync(cancellationToken); await using var dbContext = await dbFactory.CreateDbContextAsync(cancellationToken);
await dbContext.Database.EnsureCreatedAsync(cancellationToken); await dbContext.Database.EnsureCreatedAsync(cancellationToken);
await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
if (await dbContext.AttackTables.AnyAsync(cancellationToken)) if (await dbContext.AttackTables.AnyAsync(cancellationToken))
{ {

View File

@@ -0,0 +1,43 @@
using Microsoft.EntityFrameworkCore;
namespace RolemasterDb.App.Data;
public static class RolemasterDbSchemaUpgrader
{
public static async Task EnsureLatestAsync(RolemasterDbContext dbContext, CancellationToken cancellationToken = default)
{
await dbContext.Database.ExecuteSqlRawAsync(
"""
CREATE TABLE IF NOT EXISTS "CriticalBranches" (
"Id" INTEGER NOT NULL CONSTRAINT "PK_CriticalBranches" PRIMARY KEY AUTOINCREMENT,
"CriticalResultId" INTEGER NOT NULL,
"BranchKind" TEXT NOT NULL,
"ConditionKey" TEXT NULL,
"ConditionText" TEXT NOT NULL,
"ConditionJson" TEXT NOT NULL,
"RawText" TEXT NOT NULL,
"DescriptionText" TEXT NOT NULL,
"RawAffixText" TEXT NULL,
"ParsedJson" TEXT NOT NULL,
"SortOrder" INTEGER NOT NULL,
CONSTRAINT "FK_CriticalBranches_CriticalResults_CriticalResultId"
FOREIGN KEY ("CriticalResultId") REFERENCES "CriticalResults" ("Id") ON DELETE CASCADE
);
""",
cancellationToken);
await dbContext.Database.ExecuteSqlRawAsync(
"""
CREATE INDEX IF NOT EXISTS "IX_CriticalBranches_CriticalResultId"
ON "CriticalBranches" ("CriticalResultId");
""",
cancellationToken);
await dbContext.Database.ExecuteSqlRawAsync(
"""
CREATE INDEX IF NOT EXISTS "IX_CriticalBranches_CriticalResultId_SortOrder"
ON "CriticalBranches" ("CriticalResultId", "SortOrder");
""",
cancellationToken);
}
}

View File

@@ -0,0 +1,17 @@
namespace RolemasterDb.App.Domain;
public sealed class CriticalBranch
{
public int Id { get; set; }
public int CriticalResultId { get; set; }
public string BranchKind { get; set; } = "conditional";
public string? ConditionKey { get; set; }
public string ConditionText { get; set; } = string.Empty;
public string ConditionJson { get; set; } = "{}";
public string RawText { get; set; } = string.Empty;
public string DescriptionText { get; set; } = string.Empty;
public string? RawAffixText { get; set; }
public string ParsedJson { get; set; } = "{}";
public int SortOrder { get; set; }
public CriticalResult CriticalResult { get; set; } = null!;
}

View File

@@ -16,4 +16,5 @@ public sealed class CriticalResult
public CriticalGroup? CriticalGroup { get; set; } public CriticalGroup? CriticalGroup { get; set; }
public CriticalColumn CriticalColumn { get; set; } = null!; public CriticalColumn CriticalColumn { get; set; } = null!;
public CriticalRollBand CriticalRollBand { get; set; } = null!; public CriticalRollBand CriticalRollBand { get; set; } = null!;
public List<CriticalBranch> Branches { get; set; } = [];
} }

Binary file not shown.

View File

@@ -1,3 +1,6 @@
using Microsoft.EntityFrameworkCore;
using RolemasterDb.App.Data;
using RolemasterDb.ImportTool.Parsing; using RolemasterDb.ImportTool.Parsing;
namespace RolemasterDb.ImportTool.Tests; namespace RolemasterDb.ImportTool.Tests;
@@ -249,6 +252,83 @@ public sealed class StandardCriticalTableParserIntegrationTests
Assert.Contains("Blast goes in through foe's eye", superSlaying.DescriptionText, StringComparison.OrdinalIgnoreCase); Assert.Contains("Blast goes in through foe's eye", superSlaying.DescriptionText, StringComparison.OrdinalIgnoreCase);
} }
[Fact]
public async Task Slash_branch_cells_split_base_text_from_conditional_affix_branches()
{
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "slash", StringComparison.Ordinal));
var parseResult = await LoadParseResultAsync(entry);
var result = parseResult.Table.Results.Single(item =>
item.GroupKey is null &&
string.Equals(item.RollBandLabel, "36-45", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
Assert.Equal("Strike foe in shin. If he doesn't have greaves, you slash open foe's shin.", result.DescriptionText);
Assert.Null(result.RawAffixText);
Assert.DoesNotContain("with leg greaves:", result.RawCellText, StringComparison.OrdinalIgnoreCase);
Assert.Equal(2, result.Branches.Count);
var withGreaves = result.Branches.Single(item => string.Equals(item.ConditionText, "with leg greaves", StringComparison.OrdinalIgnoreCase));
var withoutGreaves = result.Branches.Single(item => string.Equals(item.ConditionText, "w/o leg greaves", StringComparison.OrdinalIgnoreCase));
Assert.Equal("with_leg_greaves", withGreaves.ConditionKey);
Assert.Equal("+2H π", withGreaves.RawAffixText);
Assert.Equal(string.Empty, withGreaves.DescriptionText);
Assert.Equal("without_leg_greaves", withoutGreaves.ConditionKey);
Assert.Equal("+2H ∫", withoutGreaves.RawAffixText);
}
[Fact]
public async Task Impact_branch_cells_keep_prose_branch_text_separate_from_affix_branch_text()
{
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "impact", StringComparison.Ordinal));
var parseResult = await LoadParseResultAsync(entry);
var result = parseResult.Table.Results.Single(item =>
item.GroupKey is null &&
string.Equals(item.RollBandLabel, "86-90", StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, "D", StringComparison.Ordinal));
Assert.Equal(
"Onslaught to foe's midsection. Organs are damaged and foe throws up blood. Foe's abdomen is seriously damaged. He falls and should not be moved.",
result.DescriptionText);
Assert.Null(result.RawAffixText);
Assert.Equal(2, result.Branches.Count);
var withArmor = result.Branches.Single(item => string.Equals(item.ConditionText, "with abdominal armor", StringComparison.OrdinalIgnoreCase));
var withoutArmor = result.Branches.Single(item => string.Equals(item.ConditionText, "w/o abdominal armor", StringComparison.OrdinalIgnoreCase));
Assert.Equal("12∑", withArmor.RawAffixText);
Assert.Equal(string.Empty, withArmor.DescriptionText);
Assert.Null(withoutArmor.RawAffixText);
Assert.Equal("dies in 6 rounds", withoutArmor.DescriptionText);
}
[Fact]
public async Task Loader_upgrades_existing_sqlite_and_persists_branch_rows()
{
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "slash", StringComparison.Ordinal));
var parseResult = await LoadParseResultAsync(entry);
var databasePath = CreateTemporaryDatabaseCopy();
var loader = new CriticalImportLoader(databasePath);
await loader.LoadAsync(parseResult.Table);
await using var dbContext = CreateDbContext(databasePath);
var result = await dbContext.CriticalResults
.Include(item => item.CriticalTable)
.Include(item => item.CriticalColumn)
.Include(item => item.CriticalRollBand)
.Include(item => item.Branches)
.SingleAsync(item =>
item.CriticalTable.Slug == "slash" &&
item.CriticalColumn.ColumnKey == "B" &&
item.CriticalRollBand.Label == "36-45");
Assert.DoesNotContain("with leg greaves:", result.RawCellText, StringComparison.OrdinalIgnoreCase);
Assert.Equal(2, result.Branches.Count);
Assert.Contains(result.Branches, item => item.ConditionKey == "with_leg_greaves" && item.RawAffixText == "+2H π");
Assert.Contains(result.Branches, item => item.ConditionKey == "without_leg_greaves" && item.RawAffixText == "+2H ∫");
}
private static async Task<CriticalTableParseResult> LoadParseResultAsync(CriticalImportManifestEntry entry) private static async Task<CriticalTableParseResult> LoadParseResultAsync(CriticalImportManifestEntry entry)
{ {
var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml"); var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml");
@@ -278,6 +358,22 @@ public sealed class StandardCriticalTableParserIntegrationTests
return cacheRoot; return cacheRoot;
} }
private static RolemasterDbContext CreateDbContext(string databasePath)
{
var options = new DbContextOptionsBuilder<RolemasterDbContext>()
.UseSqlite($"Data Source={databasePath}")
.Options;
return new RolemasterDbContext(options);
}
private static string CreateTemporaryDatabaseCopy()
{
var databasePath = Path.Combine(GetArtifactCacheRoot(), $"rolemaster-{Guid.NewGuid():N}.db");
File.Copy(Path.Combine(GetRepositoryRoot(), "src", "RolemasterDb.App", "rolemaster.db"), databasePath, true);
return databasePath;
}
private static string GetRepositoryRoot() private static string GetRepositoryRoot()
{ {
var probe = new DirectoryInfo(AppContext.BaseDirectory); var probe = new DirectoryInfo(AppContext.BaseDirectory);

View File

@@ -12,10 +12,12 @@ public sealed class CriticalImportLoader(string databasePath)
{ {
await using var dbContext = CreateDbContext(); await using var dbContext = CreateDbContext();
await dbContext.Database.EnsureCreatedAsync(cancellationToken); await dbContext.Database.EnsureCreatedAsync(cancellationToken);
await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
var removedTableCount = await dbContext.CriticalTables.CountAsync(cancellationToken); var removedTableCount = await dbContext.CriticalTables.CountAsync(cancellationToken);
await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken); await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
await dbContext.CriticalBranches.ExecuteDeleteAsync(cancellationToken);
await dbContext.CriticalResults.ExecuteDeleteAsync(cancellationToken); await dbContext.CriticalResults.ExecuteDeleteAsync(cancellationToken);
await dbContext.CriticalGroups.ExecuteDeleteAsync(cancellationToken); await dbContext.CriticalGroups.ExecuteDeleteAsync(cancellationToken);
await dbContext.CriticalColumns.ExecuteDeleteAsync(cancellationToken); await dbContext.CriticalColumns.ExecuteDeleteAsync(cancellationToken);
@@ -30,6 +32,7 @@ public sealed class CriticalImportLoader(string databasePath)
{ {
await using var dbContext = CreateDbContext(); await using var dbContext = CreateDbContext();
await dbContext.Database.EnsureCreatedAsync(cancellationToken); await dbContext.Database.EnsureCreatedAsync(cancellationToken);
await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken); await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
await DeleteTableAsync(dbContext, table.Slug, cancellationToken); await DeleteTableAsync(dbContext, table.Slug, cancellationToken);
@@ -86,7 +89,21 @@ public sealed class CriticalImportLoader(string databasePath)
DescriptionText = item.DescriptionText, DescriptionText = item.DescriptionText,
RawAffixText = item.RawAffixText, RawAffixText = item.RawAffixText,
ParsedJson = "{}", ParsedJson = "{}",
ParseStatus = "raw" ParseStatus = "raw",
Branches = item.Branches
.Select(branch => new CriticalBranch
{
BranchKind = branch.BranchKind,
ConditionKey = branch.ConditionKey,
ConditionText = branch.ConditionText,
ConditionJson = "{}",
RawText = branch.RawText,
DescriptionText = branch.DescriptionText,
RawAffixText = branch.RawAffixText,
ParsedJson = "{}",
SortOrder = branch.SortOrder
})
.ToList()
}) })
.ToList(); .ToList();
@@ -121,6 +138,10 @@ public sealed class CriticalImportLoader(string databasePath)
return; return;
} }
await dbContext.CriticalBranches
.Where(item => item.CriticalResult.CriticalTableId == tableId.Value)
.ExecuteDeleteAsync(cancellationToken);
await dbContext.CriticalResults await dbContext.CriticalResults
.Where(item => item.CriticalTableId == tableId.Value) .Where(item => item.CriticalTableId == tableId.Value)
.ExecuteDeleteAsync(cancellationToken); .ExecuteDeleteAsync(cancellationToken);

View File

@@ -0,0 +1,10 @@
namespace RolemasterDb.ImportTool.Parsing;
internal sealed class ColumnarCellEntry(string? groupKey, string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
{
public string? GroupKey { get; } = groupKey;
public string RollBandLabel { get; } = rollBandLabel;
public int RowIndex { get; } = rowIndex;
public string ColumnKey { get; } = columnKey;
public List<string> Lines { get; } = lines;
}

View File

@@ -0,0 +1,17 @@
namespace RolemasterDb.ImportTool.Parsing;
internal sealed class CriticalCellParseContent(
IReadOnlyList<string> baseLines,
string rawCellText,
string descriptionText,
string? rawAffixText,
IReadOnlyList<ParsedCriticalBranch> branches,
IReadOnlyList<string> validationErrors)
{
public IReadOnlyList<string> BaseLines { get; } = baseLines;
public string RawCellText { get; } = rawCellText;
public string DescriptionText { get; } = descriptionText;
public string? RawAffixText { get; } = rawAffixText;
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
public IReadOnlyList<string> ValidationErrors { get; } = validationErrors;
}

View File

@@ -0,0 +1,114 @@
namespace RolemasterDb.ImportTool.Parsing;
internal static class CriticalCellTextParser
{
internal static CriticalCellParseContent Parse(IReadOnlyList<string> lines, ISet<string> affixLegendSymbols)
{
var validationErrors = new List<string>();
var branchStartIndexes = FindBranchStartIndexes(lines);
var baseLineCount = branchStartIndexes.Count == 0 ? lines.Count : branchStartIndexes[0];
var baseLines = lines.Take(baseLineCount).ToList();
var branches = new List<ParsedCriticalBranch>();
validationErrors.AddRange(ValidateSegmentCount(baseLines, affixLegendSymbols, "Base content"));
for (var branchIndex = 0; branchIndex < branchStartIndexes.Count; branchIndex++)
{
var startIndex = branchStartIndexes[branchIndex];
var endIndex = branchIndex == branchStartIndexes.Count - 1
? lines.Count
: branchStartIndexes[branchIndex + 1];
branches.Add(ParseBranch(
lines.Skip(startIndex).Take(endIndex - startIndex).ToList(),
branchIndex + 1,
affixLegendSymbols,
validationErrors));
}
var (rawCellText, descriptionText, rawAffixText) = BuildTextSections(baseLines, affixLegendSymbols);
return new CriticalCellParseContent(baseLines, rawCellText, descriptionText, rawAffixText, branches, validationErrors);
}
private static ParsedCriticalBranch ParseBranch(
IReadOnlyList<string> branchLines,
int sortOrder,
ISet<string> affixLegendSymbols,
List<string> validationErrors)
{
var firstLine = branchLines[0];
var separatorIndex = firstLine.IndexOf(':', StringComparison.Ordinal);
var conditionText = CriticalTableParserSupport.CollapseWhitespace(firstLine[..separatorIndex]);
var firstPayloadLine = CriticalTableParserSupport.CollapseWhitespace(firstLine[(separatorIndex + 1)..]);
var payloadLines = new List<string>();
if (!string.IsNullOrWhiteSpace(firstPayloadLine))
{
payloadLines.Add(firstPayloadLine);
}
foreach (var continuationLine in branchLines.Skip(1))
{
var normalized = CriticalTableParserSupport.CollapseWhitespace(continuationLine);
if (!string.IsNullOrWhiteSpace(normalized))
{
payloadLines.Add(normalized);
}
}
validationErrors.AddRange(ValidateSegmentCount(payloadLines, affixLegendSymbols, $"Branch '{conditionText}'"));
var (_, descriptionText, rawAffixText) = BuildTextSections(payloadLines, affixLegendSymbols);
return new ParsedCriticalBranch(
"conditional",
CriticalTableParserSupport.NormalizeConditionKey(conditionText),
conditionText,
string.Join(Environment.NewLine, branchLines),
descriptionText,
rawAffixText,
sortOrder);
}
private static List<int> FindBranchStartIndexes(IReadOnlyList<string> lines)
{
var branchStartIndexes = new List<int>();
for (var index = 0; index < lines.Count; index++)
{
if (CriticalTableParserSupport.IsConditionalBranchStartLine(lines[index]))
{
branchStartIndexes.Add(index);
}
}
return branchStartIndexes;
}
private static IReadOnlyList<string> ValidateSegmentCount(
IReadOnlyList<string> lines,
ISet<string> affixLegendSymbols,
string scope)
{
if (lines.Count == 0)
{
return [];
}
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(lines, affixLegendSymbols);
return segmentCount > 2
? [$"{scope} interleaves prose and affix lines."]
: [];
}
private static (string RawText, string DescriptionText, string? RawAffixText) BuildTextSections(
IReadOnlyList<string> lines,
ISet<string> affixLegendSymbols)
{
var rawText = string.Join(Environment.NewLine, lines);
var rawAffixLines = lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionLines = lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
return (rawText, descriptionText, rawAffixText);
}
}

View File

@@ -169,15 +169,9 @@ internal static class CriticalTableParserSupport
return true; return true;
} }
if (value.StartsWith("with ", StringComparison.OrdinalIgnoreCase) || if (IsConditionalBranchStartLine(value))
value.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("without ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("if ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("while ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("until ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("unless ", StringComparison.OrdinalIgnoreCase))
{ {
return value.Contains(':', StringComparison.Ordinal); return true;
} }
if (affixLegendSymbols.Count > 0 && if (affixLegendSymbols.Count > 0 &&
@@ -242,6 +236,23 @@ internal static class CriticalTableParserSupport
internal static string CollapseWhitespace(string value) => internal static string CollapseWhitespace(string value) =>
Regex.Replace(value.Trim(), @"\s+", " "); Regex.Replace(value.Trim(), @"\s+", " ");
internal static bool IsConditionalBranchStartLine(string value)
{
var normalized = value.Trim();
if (!normalized.Contains(':', StringComparison.Ordinal))
{
return false;
}
return normalized.StartsWith("with ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("without ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("if ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("while ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("until ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("unless ", StringComparison.OrdinalIgnoreCase);
}
internal static string NormalizeText(string value) => internal static string NormalizeText(string value) =>
value value
.Replace('\u00a0', ' ') .Replace('\u00a0', ' ')
@@ -250,6 +261,25 @@ internal static class CriticalTableParserSupport
.Replace('', '\'') .Replace('', '\'')
.Trim(); .Trim();
internal static string? NormalizeConditionKey(string conditionText)
{
var normalized = CollapseWhitespace(conditionText)
.ToLowerInvariant()
.Replace("w/o", "without", StringComparison.Ordinal);
normalized = Regex.Replace(normalized, @"[^a-z0-9]+", "_");
normalized = normalized.Trim('_');
return normalized.Length == 0 ? null : normalized;
}
internal static int FindKeyTop(IReadOnlyList<XmlTextFragment> fragments) =>
fragments
.Where(item =>
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
.Select(item => (int?)item.Top)
.Min() ?? int.MaxValue;
internal static HashSet<string> DetectAffixLegendSymbols(IReadOnlyList<XmlTextFragment> fragments, int keyTop) internal static HashSet<string> DetectAffixLegendSymbols(IReadOnlyList<XmlTextFragment> fragments, int keyTop)
{ {
if (keyTop == int.MaxValue) if (keyTop == int.MaxValue)
@@ -347,6 +377,138 @@ internal static class CriticalTableParserSupport
return groups; return groups;
} }
internal static List<RowAnchor> CreateRowAnchors(IReadOnlyList<XmlTextFragment> rowLabelFragments) =>
rowLabelFragments
.OrderBy(item => item.Top)
.Select((item, index) => new RowAnchor(NormalizeRollBandLabel(item.Text), item.Top, index + 1))
.ToList();
internal static List<XmlTextFragment> BuildBodyFragments(
IReadOnlyList<XmlTextFragment> fragments,
int bodyStartTop,
int keyTop,
int leftCutoff,
IReadOnlyList<RowAnchor> rowAnchors,
IReadOnlyCollection<XmlTextFragment> excludedFragments,
IReadOnlyList<(string Key, double CenterX)> columnCenters,
ISet<string> affixLegendSymbols)
{
var bodyFragments = fragments
.Where(item =>
item.Top >= bodyStartTop &&
item.Top < keyTop - TopGroupingTolerance &&
!IsFooterPageNumberFragment(item, keyTop) &&
!IsPotentialRowLabelFragment(item, leftCutoff) &&
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
!excludedFragments.Contains(item))
.ToList();
return SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
}
internal static void RepairLeadingAffixLeakage(List<ColumnarCellEntry> cellEntries, ISet<string> affixLegendSymbols)
{
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
var axes = cellEntries
.Select(item => (item.GroupKey, item.ColumnKey))
.Distinct()
.ToList();
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
{
foreach (var (groupKey, columnKey) in axes)
{
var current = cellEntries.SingleOrDefault(item =>
item.RowIndex == rowIndex &&
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
var next = cellEntries.SingleOrDefault(item =>
item.RowIndex == rowIndex + 1 &&
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
if (current is null || next is null)
{
continue;
}
var leadingAffixCount = 0;
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
{
leadingAffixCount++;
}
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
{
continue;
}
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
next.Lines.RemoveRange(0, leadingAffixCount);
}
}
}
internal static int ResolveRowBoundaryTop(
RowAnchor current,
RowAnchor next,
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
{
var linesBetweenLabels = bodyLines
.Where(item => item.Top >= current.Top && item.Top < next.Top)
.OrderBy(item => item.Top)
.ToList();
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
{
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
{
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
}
}
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
}
internal static void BuildParsedArtifacts(
IReadOnlyList<ColumnarCellEntry> cellEntries,
ISet<string> affixLegendSymbols,
List<ParsedCriticalCellArtifact> parsedCells,
List<ParsedCriticalResult> parsedResults,
List<string> validationErrors)
{
foreach (var cellEntry in cellEntries)
{
var content = CriticalCellTextParser.Parse(cellEntry.Lines, affixLegendSymbols);
validationErrors.AddRange(content.ValidationErrors.Select(error =>
$"Cell '{BuildCellIdentifier(cellEntry)}': {error}"));
parsedCells.Add(new ParsedCriticalCellArtifact(
cellEntry.GroupKey,
cellEntry.RollBandLabel,
cellEntry.ColumnKey,
cellEntry.Lines.ToList(),
content.BaseLines,
content.RawCellText,
content.DescriptionText,
content.RawAffixText,
content.Branches));
parsedResults.Add(new ParsedCriticalResult(
cellEntry.GroupKey,
cellEntry.ColumnKey,
cellEntry.RollBandLabel,
content.RawCellText,
content.DescriptionText,
content.RawAffixText,
content.Branches));
}
}
private static string BuildCellIdentifier(ColumnarCellEntry cellEntry) =>
cellEntry.GroupKey is null
? $"{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}"
: $"{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}";
private static bool LooksLikeSplitRollBandStart(string value) => private static bool LooksLikeSplitRollBandStart(string value) =>
Regex.IsMatch(value.Trim(), @"^\d{2,3}\s*-$"); Regex.IsMatch(value.Trim(), @"^\d{2,3}\s*-$");

View File

@@ -36,13 +36,7 @@ public sealed class GroupedVariantCriticalTableParser
groupHeaders.Max(item => item.Top), groupHeaders.Max(item => item.Top),
columnHeaders.Max(item => item.Top)) columnHeaders.Max(item => item.Top))
+ CriticalTableParserSupport.HeaderToBodyMinimumGap; + CriticalTableParserSupport.HeaderToBodyMinimumGap;
var keyTop = fragments var keyTop = CriticalTableParserSupport.FindKeyTop(fragments);
.Where(item =>
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
.Select(item => (int?)item.Top)
.Min() ?? int.MaxValue;
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop); var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
var leftCutoff = columnHeaders.Min(item => item.Left) - 10; var leftCutoff = columnHeaders.Min(item => item.Left) - 10;
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments( var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
@@ -50,11 +44,7 @@ public sealed class GroupedVariantCriticalTableParser
leftCutoff, leftCutoff,
bodyStartTop, bodyStartTop,
keyTop); keyTop);
var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments);
var rowAnchors = rowLabelFragments
.OrderBy(item => item.Top)
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
.ToList();
if (rowAnchors.Count == 0) if (rowAnchors.Count == 0)
{ {
@@ -65,34 +55,33 @@ public sealed class GroupedVariantCriticalTableParser
.Select(item => (item.CompositeKey, item.CenterX)) .Select(item => (item.CompositeKey, item.CenterX))
.ToList(); .ToList();
var bodyFragments = fragments var excludedFragments = groupHeaders.Concat(columnHeaders).ToList();
.Where(item => var bodyFragments = CriticalTableParserSupport.BuildBodyFragments(
item.Top >= bodyStartTop && fragments,
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance && bodyStartTop,
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) && keyTop,
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) && leftCutoff,
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) && rowAnchors,
!groupHeaders.Contains(item) && excludedFragments,
!columnHeaders.Contains(item)) columnCenters,
.ToList(); affixLegendSymbols);
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols); var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
var parsedRollBands = rowAnchors var parsedRollBands = rowAnchors
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder)) .Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
.ToList(); .ToList();
var cellEntries = new List<CellEntry>(); var cellEntries = new List<ColumnarCellEntry>();
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++) for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
{ {
var rowStart = rowIndex == 0 var rowStart = rowIndex == 0
? bodyStartTop ? bodyStartTop
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
var rowEnd = rowIndex == rowAnchors.Count - 1 var rowEnd = rowIndex == rowAnchors.Count - 1
? keyTop - 1 ? keyTop - 1
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
var rowFragments = bodyFragments var rowFragments = bodyFragments
.Where(item => item.Top >= rowStart && item.Top < rowEnd) .Where(item => item.Top >= rowStart && item.Top < rowEnd)
@@ -112,7 +101,7 @@ public sealed class GroupedVariantCriticalTableParser
continue; continue;
} }
cellEntries.Add(new CellEntry( cellEntries.Add(new ColumnarCellEntry(
anchor.GroupKey, anchor.GroupKey,
rowAnchors[rowIndex].Label, rowAnchors[rowIndex].Label,
rowIndex, rowIndex,
@@ -121,45 +110,11 @@ public sealed class GroupedVariantCriticalTableParser
} }
} }
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
var parsedCells = new List<ParsedCriticalCellArtifact>(); var parsedCells = new List<ParsedCriticalCellArtifact>();
var parsedResults = new List<ParsedCriticalResult>(); var parsedResults = new List<ParsedCriticalResult>();
CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors);
foreach (var cellEntry in cellEntries
.OrderBy(item => item.RowIndex)
.ThenBy(item => item.GroupKey, StringComparer.Ordinal)
.ThenBy(item => item.ColumnKey, StringComparer.Ordinal))
{
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
if (segmentCount > 2)
{
validationErrors.Add($"Cell '{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
}
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
parsedCells.Add(new ParsedCriticalCellArtifact(
cellEntry.GroupKey,
cellEntry.RollBandLabel,
cellEntry.ColumnKey,
cellEntry.Lines,
rawCellText,
descriptionText,
rawAffixText));
parsedResults.Add(new ParsedCriticalResult(
cellEntry.GroupKey,
cellEntry.ColumnKey,
cellEntry.RollBandLabel,
rawCellText,
descriptionText,
rawAffixText));
}
var expectedCellCount = rowAnchors.Count * ExpectedGroups.Length * ExpectedColumns.Length; var expectedCellCount = rowAnchors.Count * ExpectedGroups.Length * ExpectedColumns.Length;
if (parsedCells.Count != expectedCellCount) if (parsedCells.Count != expectedCellCount)
@@ -235,72 +190,4 @@ public sealed class GroupedVariantCriticalTableParser
throw new InvalidOperationException("Could not find the grouped-variant column header row in the XML artifact."); throw new InvalidOperationException("Could not find the grouped-variant column header row in the XML artifact.");
} }
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
{
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
var axes = cellEntries
.Select(item => (item.GroupKey, item.ColumnKey))
.Distinct()
.ToList();
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
{
foreach (var (groupKey, columnKey) in axes)
{
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.GroupKey == groupKey && item.ColumnKey == columnKey);
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.GroupKey == groupKey && item.ColumnKey == columnKey);
if (current is null || next is null)
{
continue;
}
var leadingAffixCount = 0;
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
{
leadingAffixCount++;
}
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
{
continue;
}
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
next.Lines.RemoveRange(0, leadingAffixCount);
}
}
}
private static int ResolveRowBoundaryTop(
RowAnchor current,
RowAnchor next,
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
{
var linesBetweenLabels = bodyLines
.Where(item => item.Top >= current.Top && item.Top < next.Top)
.OrderBy(item => item.Top)
.ToList();
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
{
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
{
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
}
}
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
}
private sealed record RowAnchor(string Label, int Top, int SortOrder);
private sealed class CellEntry(string groupKey, string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
{
public string GroupKey { get; } = groupKey;
public string RollBandLabel { get; } = rollBandLabel;
public int RowIndex { get; } = rowIndex;
public string ColumnKey { get; } = columnKey;
public List<string> Lines { get; } = lines;
}
} }

View File

@@ -0,0 +1,19 @@
namespace RolemasterDb.ImportTool.Parsing;
public sealed class ParsedCriticalBranch(
string branchKind,
string? conditionKey,
string conditionText,
string rawText,
string descriptionText,
string? rawAffixText,
int sortOrder)
{
public string BranchKind { get; } = branchKind;
public string? ConditionKey { get; } = conditionKey;
public string ConditionText { get; } = conditionText;
public string RawText { get; } = rawText;
public string DescriptionText { get; } = descriptionText;
public string? RawAffixText { get; } = rawAffixText;
public int SortOrder { get; } = sortOrder;
}

View File

@@ -5,15 +5,19 @@ public sealed class ParsedCriticalCellArtifact(
string rollBandLabel, string rollBandLabel,
string columnKey, string columnKey,
IReadOnlyList<string> lines, IReadOnlyList<string> lines,
IReadOnlyList<string> baseLines,
string rawCellText, string rawCellText,
string descriptionText, string descriptionText,
string? rawAffixText) string? rawAffixText,
IReadOnlyList<ParsedCriticalBranch> branches)
{ {
public string? GroupKey { get; } = groupKey; public string? GroupKey { get; } = groupKey;
public string RollBandLabel { get; } = rollBandLabel; public string RollBandLabel { get; } = rollBandLabel;
public string ColumnKey { get; } = columnKey; public string ColumnKey { get; } = columnKey;
public IReadOnlyList<string> Lines { get; } = lines; public IReadOnlyList<string> Lines { get; } = lines;
public IReadOnlyList<string> BaseLines { get; } = baseLines;
public string RawCellText { get; } = rawCellText; public string RawCellText { get; } = rawCellText;
public string DescriptionText { get; } = descriptionText; public string DescriptionText { get; } = descriptionText;
public string? RawAffixText { get; } = rawAffixText; public string? RawAffixText { get; } = rawAffixText;
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
} }

View File

@@ -6,7 +6,8 @@ public sealed class ParsedCriticalResult(
string rollBandLabel, string rollBandLabel,
string rawCellText, string rawCellText,
string descriptionText, string descriptionText,
string? rawAffixText) string? rawAffixText,
IReadOnlyList<ParsedCriticalBranch> branches)
{ {
public string? GroupKey { get; } = groupKey; public string? GroupKey { get; } = groupKey;
public string ColumnKey { get; } = columnKey; public string ColumnKey { get; } = columnKey;
@@ -14,4 +15,5 @@ public sealed class ParsedCriticalResult(
public string RawCellText { get; } = rawCellText; public string RawCellText { get; } = rawCellText;
public string DescriptionText { get; } = descriptionText; public string DescriptionText { get; } = descriptionText;
public string? RawAffixText { get; } = rawAffixText; public string? RawAffixText { get; } = rawAffixText;
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
} }

View File

@@ -0,0 +1,3 @@
namespace RolemasterDb.ImportTool.Parsing;
internal sealed record RowAnchor(string Label, int Top, int SortOrder);

View File

@@ -15,13 +15,7 @@ public sealed class StandardCriticalTableParser
.ToList(); .ToList();
var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap; var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap;
var keyTop = fragments var keyTop = CriticalTableParserSupport.FindKeyTop(fragments);
.Where(item =>
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
.Select(item => (int?)item.Top)
.Min() ?? int.MaxValue;
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop); var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
var leftCutoff = headerFragments.Min(item => item.Left) - 10; var leftCutoff = headerFragments.Min(item => item.Left) - 10;
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments( var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
@@ -29,44 +23,39 @@ public sealed class StandardCriticalTableParser
leftCutoff, leftCutoff,
bodyStartTop, bodyStartTop,
keyTop); keyTop);
var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments);
var rowAnchors = rowLabelFragments
.OrderBy(item => item.Top)
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
.ToList();
if (rowAnchors.Count == 0) if (rowAnchors.Count == 0)
{ {
validationErrors.Add("No roll-band labels were found in the XML artifact."); validationErrors.Add("No roll-band labels were found in the XML artifact.");
} }
var bodyFragments = fragments var bodyFragments = CriticalTableParserSupport.BuildBodyFragments(
.Where(item => fragments,
item.Top >= bodyStartTop && bodyStartTop,
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance && keyTop,
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) && leftCutoff,
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) && rowAnchors,
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) && headerFragments,
!headerFragments.Contains(item)) columnCenters,
.ToList(); affixLegendSymbols);
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols); var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
var parsedRollBands = rowAnchors var parsedRollBands = rowAnchors
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder)) .Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
.ToList(); .ToList();
var cellEntries = new List<CellEntry>(); var cellEntries = new List<ColumnarCellEntry>();
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++) for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
{ {
var rowStart = rowIndex == 0 var rowStart = rowIndex == 0
? bodyStartTop ? bodyStartTop
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
var rowEnd = rowIndex == rowAnchors.Count - 1 var rowEnd = rowIndex == rowAnchors.Count - 1
? keyTop - 1 ? keyTop - 1
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
var rowFragments = bodyFragments var rowFragments = bodyFragments
.Where(item => item.Top >= rowStart && item.Top < rowEnd) .Where(item => item.Top >= rowStart && item.Top < rowEnd)
@@ -86,7 +75,8 @@ public sealed class StandardCriticalTableParser
continue; continue;
} }
cellEntries.Add(new CellEntry( cellEntries.Add(new ColumnarCellEntry(
null,
rowAnchors[rowIndex].Label, rowAnchors[rowIndex].Label,
rowIndex, rowIndex,
columnAnchor.Key, columnAnchor.Key,
@@ -94,44 +84,11 @@ public sealed class StandardCriticalTableParser
} }
} }
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
var parsedCells = new List<ParsedCriticalCellArtifact>(); var parsedCells = new List<ParsedCriticalCellArtifact>();
var parsedResults = new List<ParsedCriticalResult>(); var parsedResults = new List<ParsedCriticalResult>();
CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors);
foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey))
{
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
if (segmentCount > 2)
{
validationErrors.Add(
$"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
}
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
parsedCells.Add(new ParsedCriticalCellArtifact(
null,
cellEntry.RollBandLabel,
cellEntry.ColumnKey,
cellEntry.Lines,
rawCellText,
descriptionText,
rawAffixText));
parsedResults.Add(new ParsedCriticalResult(
null,
cellEntry.ColumnKey,
cellEntry.RollBandLabel,
rawCellText,
descriptionText,
rawAffixText));
}
if (columnCenters.Count != 5) if (columnCenters.Count != 5)
{ {
@@ -185,68 +142,4 @@ public sealed class StandardCriticalTableParser
throw new InvalidOperationException("Could not find the standard-table A-E header row in the XML artifact."); throw new InvalidOperationException("Could not find the standard-table A-E header row in the XML artifact.");
} }
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
{
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
{
foreach (var columnKey in columnKeys)
{
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.ColumnKey == columnKey);
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.ColumnKey == columnKey);
if (current is null || next is null)
{
continue;
}
var leadingAffixCount = 0;
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
{
leadingAffixCount++;
}
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
{
continue;
}
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
next.Lines.RemoveRange(0, leadingAffixCount);
}
}
}
private static int ResolveRowBoundaryTop(
RowAnchor current,
RowAnchor next,
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
{
var linesBetweenLabels = bodyLines
.Where(item => item.Top >= current.Top && item.Top < next.Top)
.OrderBy(item => item.Top)
.ToList();
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
{
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
{
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
}
}
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
}
private sealed record RowAnchor(string Label, int Top, int SortOrder);
private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
{
public string RollBandLabel { get; } = rollBandLabel;
public int RowIndex { get; } = rowIndex;
public string ColumnKey { get; } = columnKey;
public List<string> Lines { get; } = lines;
}
} }

View File

@@ -28,13 +28,7 @@ public sealed class VariantColumnCriticalTableParser
.ToList(); .ToList();
var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap; var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap;
var keyTop = fragments var keyTop = CriticalTableParserSupport.FindKeyTop(fragments);
.Where(item =>
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
.Select(item => (int?)item.Top)
.Min() ?? int.MaxValue;
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop); var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
var leftCutoff = headerFragments.Min(item => item.Left) - 10; var leftCutoff = headerFragments.Min(item => item.Left) - 10;
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments( var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
@@ -42,11 +36,7 @@ public sealed class VariantColumnCriticalTableParser
leftCutoff, leftCutoff,
bodyStartTop, bodyStartTop,
keyTop); keyTop);
var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments);
var rowAnchors = rowLabelFragments
.OrderBy(item => item.Top)
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
.ToList();
if (rowAnchors.Count == 0) if (rowAnchors.Count == 0)
{ {
@@ -57,33 +47,32 @@ public sealed class VariantColumnCriticalTableParser
.Select(item => (item.Key, item.CenterX)) .Select(item => (item.Key, item.CenterX))
.ToList(); .ToList();
var bodyFragments = fragments var bodyFragments = CriticalTableParserSupport.BuildBodyFragments(
.Where(item => fragments,
item.Top >= bodyStartTop && bodyStartTop,
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance && keyTop,
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) && leftCutoff,
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) && rowAnchors,
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) && headerFragments,
!headerFragments.Contains(item)) columnCenters,
.ToList(); affixLegendSymbols);
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols); var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
var parsedRollBands = rowAnchors var parsedRollBands = rowAnchors
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder)) .Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
.ToList(); .ToList();
var cellEntries = new List<CellEntry>(); var cellEntries = new List<ColumnarCellEntry>();
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++) for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
{ {
var rowStart = rowIndex == 0 var rowStart = rowIndex == 0
? bodyStartTop ? bodyStartTop
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines); : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
var rowEnd = rowIndex == rowAnchors.Count - 1 var rowEnd = rowIndex == rowAnchors.Count - 1
? keyTop - 1 ? keyTop - 1
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines); : CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
var rowFragments = bodyFragments var rowFragments = bodyFragments
.Where(item => item.Top >= rowStart && item.Top < rowEnd) .Where(item => item.Top >= rowStart && item.Top < rowEnd)
@@ -103,7 +92,8 @@ public sealed class VariantColumnCriticalTableParser
continue; continue;
} }
cellEntries.Add(new CellEntry( cellEntries.Add(new ColumnarCellEntry(
null,
rowAnchors[rowIndex].Label, rowAnchors[rowIndex].Label,
rowIndex, rowIndex,
columnAnchor.Key, columnAnchor.Key,
@@ -111,42 +101,11 @@ public sealed class VariantColumnCriticalTableParser
} }
} }
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols); CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
var parsedCells = new List<ParsedCriticalCellArtifact>(); var parsedCells = new List<ParsedCriticalCellArtifact>();
var parsedResults = new List<ParsedCriticalResult>(); var parsedResults = new List<ParsedCriticalResult>();
CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors);
foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey, StringComparer.Ordinal))
{
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
if (segmentCount > 2)
{
validationErrors.Add($"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
}
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
parsedCells.Add(new ParsedCriticalCellArtifact(
null,
cellEntry.RollBandLabel,
cellEntry.ColumnKey,
cellEntry.Lines,
rawCellText,
descriptionText,
rawAffixText));
parsedResults.Add(new ParsedCriticalResult(
null,
cellEntry.ColumnKey,
cellEntry.RollBandLabel,
rawCellText,
descriptionText,
rawAffixText));
}
if (columnAnchors.Count != ExpectedColumns.Length) if (columnAnchors.Count != ExpectedColumns.Length)
{ {
@@ -208,69 +167,5 @@ public sealed class VariantColumnCriticalTableParser
ExpectedColumns.SingleOrDefault(item => string.Equals(item.Label, value.Trim(), StringComparison.OrdinalIgnoreCase)) ExpectedColumns.SingleOrDefault(item => string.Equals(item.Label, value.Trim(), StringComparison.OrdinalIgnoreCase))
?? throw new InvalidOperationException($"Unsupported variant column label '{value}'."); ?? throw new InvalidOperationException($"Unsupported variant column label '{value}'.");
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
{
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
{
foreach (var columnKey in columnKeys)
{
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.ColumnKey == columnKey);
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.ColumnKey == columnKey);
if (current is null || next is null)
{
continue;
}
var leadingAffixCount = 0;
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
{
leadingAffixCount++;
}
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
{
continue;
}
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
next.Lines.RemoveRange(0, leadingAffixCount);
}
}
}
private static int ResolveRowBoundaryTop(
RowAnchor current,
RowAnchor next,
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
{
var linesBetweenLabels = bodyLines
.Where(item => item.Top >= current.Top && item.Top < next.Top)
.OrderBy(item => item.Top)
.ToList();
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
{
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
{
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
}
}
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
}
private sealed record ColumnDefinition(string Key, string Label); private sealed record ColumnDefinition(string Key, string Label);
private sealed record RowAnchor(string Label, int Top, int SortOrder);
private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
{
public string RollBandLabel { get; } = rollBandLabel;
public int RowIndex { get; } = rowIndex;
public string ColumnKey { get; } = columnKey;
public List<string> Lines { get; } = lines;
}
} }