Implement phase 5 critical branch extraction
This commit is contained in:
@@ -63,7 +63,6 @@ The current implementation supports:
|
|||||||
The current implementation does not yet support:
|
The current implementation does not yet support:
|
||||||
|
|
||||||
- OCR/image-based PDFs such as `Void.pdf`
|
- OCR/image-based PDFs such as `Void.pdf`
|
||||||
- normalized `critical_branch` population
|
|
||||||
- normalized `critical_effect` population
|
- normalized `critical_effect` population
|
||||||
- automatic confidence scoring beyond validation errors
|
- automatic confidence scoring beyond validation errors
|
||||||
|
|
||||||
@@ -210,10 +209,6 @@ The importer now explicitly rejects cells that still look structurally wrong aft
|
|||||||
|
|
||||||
This keeps the phase-2.1 safety goal in place while allowing broader standard-table layouts that render a single affix block either before or after the prose block.
|
This keeps the phase-2.1 safety goal in place while allowing broader standard-table layouts that render a single affix block either before or after the prose block.
|
||||||
|
|
||||||
## Planned Future Phases
|
|
||||||
|
|
||||||
The current architecture is intended to support additional phases:
|
|
||||||
|
|
||||||
### Phase 3: Broader Table Coverage
|
### Phase 3: Broader Table Coverage
|
||||||
|
|
||||||
Phase 3 expands the manifest and validates the shared `standard` parser across a broader set of `A-E` tables.
|
Phase 3 expands the manifest and validates the shared `standard` parser across a broader set of `A-E` tables.
|
||||||
@@ -494,11 +489,12 @@ Affix-like classification is intentionally conservative. Numeric prose lines suc
|
|||||||
|
|
||||||
The current implementation stores:
|
The current implementation stores:
|
||||||
|
|
||||||
- `RawCellText`
|
- base `RawCellText`
|
||||||
- `DescriptionText`
|
- base `DescriptionText`
|
||||||
- `RawAffixText`
|
- base `RawAffixText`
|
||||||
|
- parsed conditional branches with condition text, branch prose, and branch affix text
|
||||||
|
|
||||||
It does not yet normalize branches or effects into separate tables.
|
It does not yet normalize effects into separate tables.
|
||||||
|
|
||||||
## Validation Rules
|
## Validation Rules
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ public sealed class RolemasterDbContext(DbContextOptions<RolemasterDbContext> op
|
|||||||
public DbSet<CriticalColumn> CriticalColumns => Set<CriticalColumn>();
|
public DbSet<CriticalColumn> CriticalColumns => Set<CriticalColumn>();
|
||||||
public DbSet<CriticalRollBand> CriticalRollBands => Set<CriticalRollBand>();
|
public DbSet<CriticalRollBand> CriticalRollBands => Set<CriticalRollBand>();
|
||||||
public DbSet<CriticalResult> CriticalResults => Set<CriticalResult>();
|
public DbSet<CriticalResult> CriticalResults => Set<CriticalResult>();
|
||||||
|
public DbSet<CriticalBranch> CriticalBranches => Set<CriticalBranch>();
|
||||||
|
|
||||||
protected override void OnModelCreating(ModelBuilder modelBuilder)
|
protected override void OnModelCreating(ModelBuilder modelBuilder)
|
||||||
{
|
{
|
||||||
@@ -78,5 +79,13 @@ public sealed class RolemasterDbContext(DbContextOptions<RolemasterDbContext> op
|
|||||||
entity.HasIndex(item => new { item.CriticalTableId, item.CriticalGroupId, item.CriticalColumnId, item.CriticalRollBandId }).IsUnique();
|
entity.HasIndex(item => new { item.CriticalTableId, item.CriticalGroupId, item.CriticalColumnId, item.CriticalRollBandId }).IsUnique();
|
||||||
entity.Property(item => item.ParseStatus).HasMaxLength(32);
|
entity.Property(item => item.ParseStatus).HasMaxLength(32);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity<CriticalBranch>(entity =>
|
||||||
|
{
|
||||||
|
entity.HasIndex(item => item.CriticalResultId);
|
||||||
|
entity.HasIndex(item => new { item.CriticalResultId, item.SortOrder });
|
||||||
|
entity.Property(item => item.BranchKind).HasMaxLength(32);
|
||||||
|
entity.Property(item => item.ConditionKey).HasMaxLength(128);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ public static class RolemasterDbInitializer
|
|||||||
await using var dbContext = await dbFactory.CreateDbContextAsync(cancellationToken);
|
await using var dbContext = await dbFactory.CreateDbContextAsync(cancellationToken);
|
||||||
|
|
||||||
await dbContext.Database.EnsureCreatedAsync(cancellationToken);
|
await dbContext.Database.EnsureCreatedAsync(cancellationToken);
|
||||||
|
await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
|
||||||
|
|
||||||
if (await dbContext.AttackTables.AnyAsync(cancellationToken))
|
if (await dbContext.AttackTables.AnyAsync(cancellationToken))
|
||||||
{
|
{
|
||||||
|
|||||||
43
src/RolemasterDb.App/Data/RolemasterDbSchemaUpgrader.cs
Normal file
43
src/RolemasterDb.App/Data/RolemasterDbSchemaUpgrader.cs
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
|
namespace RolemasterDb.App.Data;
|
||||||
|
|
||||||
|
public static class RolemasterDbSchemaUpgrader
|
||||||
|
{
|
||||||
|
public static async Task EnsureLatestAsync(RolemasterDbContext dbContext, CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
await dbContext.Database.ExecuteSqlRawAsync(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS "CriticalBranches" (
|
||||||
|
"Id" INTEGER NOT NULL CONSTRAINT "PK_CriticalBranches" PRIMARY KEY AUTOINCREMENT,
|
||||||
|
"CriticalResultId" INTEGER NOT NULL,
|
||||||
|
"BranchKind" TEXT NOT NULL,
|
||||||
|
"ConditionKey" TEXT NULL,
|
||||||
|
"ConditionText" TEXT NOT NULL,
|
||||||
|
"ConditionJson" TEXT NOT NULL,
|
||||||
|
"RawText" TEXT NOT NULL,
|
||||||
|
"DescriptionText" TEXT NOT NULL,
|
||||||
|
"RawAffixText" TEXT NULL,
|
||||||
|
"ParsedJson" TEXT NOT NULL,
|
||||||
|
"SortOrder" INTEGER NOT NULL,
|
||||||
|
CONSTRAINT "FK_CriticalBranches_CriticalResults_CriticalResultId"
|
||||||
|
FOREIGN KEY ("CriticalResultId") REFERENCES "CriticalResults" ("Id") ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
""",
|
||||||
|
cancellationToken);
|
||||||
|
|
||||||
|
await dbContext.Database.ExecuteSqlRawAsync(
|
||||||
|
"""
|
||||||
|
CREATE INDEX IF NOT EXISTS "IX_CriticalBranches_CriticalResultId"
|
||||||
|
ON "CriticalBranches" ("CriticalResultId");
|
||||||
|
""",
|
||||||
|
cancellationToken);
|
||||||
|
|
||||||
|
await dbContext.Database.ExecuteSqlRawAsync(
|
||||||
|
"""
|
||||||
|
CREATE INDEX IF NOT EXISTS "IX_CriticalBranches_CriticalResultId_SortOrder"
|
||||||
|
ON "CriticalBranches" ("CriticalResultId", "SortOrder");
|
||||||
|
""",
|
||||||
|
cancellationToken);
|
||||||
|
}
|
||||||
|
}
|
||||||
17
src/RolemasterDb.App/Domain/CriticalBranch.cs
Normal file
17
src/RolemasterDb.App/Domain/CriticalBranch.cs
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
namespace RolemasterDb.App.Domain;
|
||||||
|
|
||||||
|
public sealed class CriticalBranch
|
||||||
|
{
|
||||||
|
public int Id { get; set; }
|
||||||
|
public int CriticalResultId { get; set; }
|
||||||
|
public string BranchKind { get; set; } = "conditional";
|
||||||
|
public string? ConditionKey { get; set; }
|
||||||
|
public string ConditionText { get; set; } = string.Empty;
|
||||||
|
public string ConditionJson { get; set; } = "{}";
|
||||||
|
public string RawText { get; set; } = string.Empty;
|
||||||
|
public string DescriptionText { get; set; } = string.Empty;
|
||||||
|
public string? RawAffixText { get; set; }
|
||||||
|
public string ParsedJson { get; set; } = "{}";
|
||||||
|
public int SortOrder { get; set; }
|
||||||
|
public CriticalResult CriticalResult { get; set; } = null!;
|
||||||
|
}
|
||||||
@@ -16,4 +16,5 @@ public sealed class CriticalResult
|
|||||||
public CriticalGroup? CriticalGroup { get; set; }
|
public CriticalGroup? CriticalGroup { get; set; }
|
||||||
public CriticalColumn CriticalColumn { get; set; } = null!;
|
public CriticalColumn CriticalColumn { get; set; } = null!;
|
||||||
public CriticalRollBand CriticalRollBand { get; set; } = null!;
|
public CriticalRollBand CriticalRollBand { get; set; } = null!;
|
||||||
|
public List<CriticalBranch> Branches { get; set; } = [];
|
||||||
}
|
}
|
||||||
|
|||||||
Binary file not shown.
@@ -1,3 +1,6 @@
|
|||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
|
using RolemasterDb.App.Data;
|
||||||
using RolemasterDb.ImportTool.Parsing;
|
using RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
namespace RolemasterDb.ImportTool.Tests;
|
namespace RolemasterDb.ImportTool.Tests;
|
||||||
@@ -249,6 +252,83 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
|||||||
Assert.Contains("Blast goes in through foe's eye", superSlaying.DescriptionText, StringComparison.OrdinalIgnoreCase);
|
Assert.Contains("Blast goes in through foe's eye", superSlaying.DescriptionText, StringComparison.OrdinalIgnoreCase);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Slash_branch_cells_split_base_text_from_conditional_affix_branches()
|
||||||
|
{
|
||||||
|
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "slash", StringComparison.Ordinal));
|
||||||
|
var parseResult = await LoadParseResultAsync(entry);
|
||||||
|
var result = parseResult.Table.Results.Single(item =>
|
||||||
|
item.GroupKey is null &&
|
||||||
|
string.Equals(item.RollBandLabel, "36-45", StringComparison.Ordinal) &&
|
||||||
|
string.Equals(item.ColumnKey, "B", StringComparison.Ordinal));
|
||||||
|
|
||||||
|
Assert.Equal("Strike foe in shin. If he doesn't have greaves, you slash open foe's shin.", result.DescriptionText);
|
||||||
|
Assert.Null(result.RawAffixText);
|
||||||
|
Assert.DoesNotContain("with leg greaves:", result.RawCellText, StringComparison.OrdinalIgnoreCase);
|
||||||
|
Assert.Equal(2, result.Branches.Count);
|
||||||
|
|
||||||
|
var withGreaves = result.Branches.Single(item => string.Equals(item.ConditionText, "with leg greaves", StringComparison.OrdinalIgnoreCase));
|
||||||
|
var withoutGreaves = result.Branches.Single(item => string.Equals(item.ConditionText, "w/o leg greaves", StringComparison.OrdinalIgnoreCase));
|
||||||
|
|
||||||
|
Assert.Equal("with_leg_greaves", withGreaves.ConditionKey);
|
||||||
|
Assert.Equal("+2H – π", withGreaves.RawAffixText);
|
||||||
|
Assert.Equal(string.Empty, withGreaves.DescriptionText);
|
||||||
|
Assert.Equal("without_leg_greaves", withoutGreaves.ConditionKey);
|
||||||
|
Assert.Equal("+2H – ∫", withoutGreaves.RawAffixText);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Impact_branch_cells_keep_prose_branch_text_separate_from_affix_branch_text()
|
||||||
|
{
|
||||||
|
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "impact", StringComparison.Ordinal));
|
||||||
|
var parseResult = await LoadParseResultAsync(entry);
|
||||||
|
var result = parseResult.Table.Results.Single(item =>
|
||||||
|
item.GroupKey is null &&
|
||||||
|
string.Equals(item.RollBandLabel, "86-90", StringComparison.Ordinal) &&
|
||||||
|
string.Equals(item.ColumnKey, "D", StringComparison.Ordinal));
|
||||||
|
|
||||||
|
Assert.Equal(
|
||||||
|
"Onslaught to foe's midsection. Organs are damaged and foe throws up blood. Foe's abdomen is seriously damaged. He falls and should not be moved.",
|
||||||
|
result.DescriptionText);
|
||||||
|
Assert.Null(result.RawAffixText);
|
||||||
|
Assert.Equal(2, result.Branches.Count);
|
||||||
|
|
||||||
|
var withArmor = result.Branches.Single(item => string.Equals(item.ConditionText, "with abdominal armor", StringComparison.OrdinalIgnoreCase));
|
||||||
|
var withoutArmor = result.Branches.Single(item => string.Equals(item.ConditionText, "w/o abdominal armor", StringComparison.OrdinalIgnoreCase));
|
||||||
|
|
||||||
|
Assert.Equal("12∑", withArmor.RawAffixText);
|
||||||
|
Assert.Equal(string.Empty, withArmor.DescriptionText);
|
||||||
|
Assert.Null(withoutArmor.RawAffixText);
|
||||||
|
Assert.Equal("dies in 6 rounds", withoutArmor.DescriptionText);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Loader_upgrades_existing_sqlite_and_persists_branch_rows()
|
||||||
|
{
|
||||||
|
var entry = LoadManifest().Tables.Single(item => string.Equals(item.Slug, "slash", StringComparison.Ordinal));
|
||||||
|
var parseResult = await LoadParseResultAsync(entry);
|
||||||
|
var databasePath = CreateTemporaryDatabaseCopy();
|
||||||
|
var loader = new CriticalImportLoader(databasePath);
|
||||||
|
|
||||||
|
await loader.LoadAsync(parseResult.Table);
|
||||||
|
|
||||||
|
await using var dbContext = CreateDbContext(databasePath);
|
||||||
|
var result = await dbContext.CriticalResults
|
||||||
|
.Include(item => item.CriticalTable)
|
||||||
|
.Include(item => item.CriticalColumn)
|
||||||
|
.Include(item => item.CriticalRollBand)
|
||||||
|
.Include(item => item.Branches)
|
||||||
|
.SingleAsync(item =>
|
||||||
|
item.CriticalTable.Slug == "slash" &&
|
||||||
|
item.CriticalColumn.ColumnKey == "B" &&
|
||||||
|
item.CriticalRollBand.Label == "36-45");
|
||||||
|
|
||||||
|
Assert.DoesNotContain("with leg greaves:", result.RawCellText, StringComparison.OrdinalIgnoreCase);
|
||||||
|
Assert.Equal(2, result.Branches.Count);
|
||||||
|
Assert.Contains(result.Branches, item => item.ConditionKey == "with_leg_greaves" && item.RawAffixText == "+2H – π");
|
||||||
|
Assert.Contains(result.Branches, item => item.ConditionKey == "without_leg_greaves" && item.RawAffixText == "+2H – ∫");
|
||||||
|
}
|
||||||
|
|
||||||
private static async Task<CriticalTableParseResult> LoadParseResultAsync(CriticalImportManifestEntry entry)
|
private static async Task<CriticalTableParseResult> LoadParseResultAsync(CriticalImportManifestEntry entry)
|
||||||
{
|
{
|
||||||
var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml");
|
var xmlPath = Path.Combine(GetArtifactCacheRoot(), $"{entry.Slug}.xml");
|
||||||
@@ -278,6 +358,22 @@ public sealed class StandardCriticalTableParserIntegrationTests
|
|||||||
return cacheRoot;
|
return cacheRoot;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static RolemasterDbContext CreateDbContext(string databasePath)
|
||||||
|
{
|
||||||
|
var options = new DbContextOptionsBuilder<RolemasterDbContext>()
|
||||||
|
.UseSqlite($"Data Source={databasePath}")
|
||||||
|
.Options;
|
||||||
|
|
||||||
|
return new RolemasterDbContext(options);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string CreateTemporaryDatabaseCopy()
|
||||||
|
{
|
||||||
|
var databasePath = Path.Combine(GetArtifactCacheRoot(), $"rolemaster-{Guid.NewGuid():N}.db");
|
||||||
|
File.Copy(Path.Combine(GetRepositoryRoot(), "src", "RolemasterDb.App", "rolemaster.db"), databasePath, true);
|
||||||
|
return databasePath;
|
||||||
|
}
|
||||||
|
|
||||||
private static string GetRepositoryRoot()
|
private static string GetRepositoryRoot()
|
||||||
{
|
{
|
||||||
var probe = new DirectoryInfo(AppContext.BaseDirectory);
|
var probe = new DirectoryInfo(AppContext.BaseDirectory);
|
||||||
|
|||||||
@@ -12,10 +12,12 @@ public sealed class CriticalImportLoader(string databasePath)
|
|||||||
{
|
{
|
||||||
await using var dbContext = CreateDbContext();
|
await using var dbContext = CreateDbContext();
|
||||||
await dbContext.Database.EnsureCreatedAsync(cancellationToken);
|
await dbContext.Database.EnsureCreatedAsync(cancellationToken);
|
||||||
|
await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
|
||||||
|
|
||||||
var removedTableCount = await dbContext.CriticalTables.CountAsync(cancellationToken);
|
var removedTableCount = await dbContext.CriticalTables.CountAsync(cancellationToken);
|
||||||
await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
|
await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
|
||||||
|
|
||||||
|
await dbContext.CriticalBranches.ExecuteDeleteAsync(cancellationToken);
|
||||||
await dbContext.CriticalResults.ExecuteDeleteAsync(cancellationToken);
|
await dbContext.CriticalResults.ExecuteDeleteAsync(cancellationToken);
|
||||||
await dbContext.CriticalGroups.ExecuteDeleteAsync(cancellationToken);
|
await dbContext.CriticalGroups.ExecuteDeleteAsync(cancellationToken);
|
||||||
await dbContext.CriticalColumns.ExecuteDeleteAsync(cancellationToken);
|
await dbContext.CriticalColumns.ExecuteDeleteAsync(cancellationToken);
|
||||||
@@ -30,6 +32,7 @@ public sealed class CriticalImportLoader(string databasePath)
|
|||||||
{
|
{
|
||||||
await using var dbContext = CreateDbContext();
|
await using var dbContext = CreateDbContext();
|
||||||
await dbContext.Database.EnsureCreatedAsync(cancellationToken);
|
await dbContext.Database.EnsureCreatedAsync(cancellationToken);
|
||||||
|
await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
|
||||||
await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
|
await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
|
||||||
|
|
||||||
await DeleteTableAsync(dbContext, table.Slug, cancellationToken);
|
await DeleteTableAsync(dbContext, table.Slug, cancellationToken);
|
||||||
@@ -86,7 +89,21 @@ public sealed class CriticalImportLoader(string databasePath)
|
|||||||
DescriptionText = item.DescriptionText,
|
DescriptionText = item.DescriptionText,
|
||||||
RawAffixText = item.RawAffixText,
|
RawAffixText = item.RawAffixText,
|
||||||
ParsedJson = "{}",
|
ParsedJson = "{}",
|
||||||
ParseStatus = "raw"
|
ParseStatus = "raw",
|
||||||
|
Branches = item.Branches
|
||||||
|
.Select(branch => new CriticalBranch
|
||||||
|
{
|
||||||
|
BranchKind = branch.BranchKind,
|
||||||
|
ConditionKey = branch.ConditionKey,
|
||||||
|
ConditionText = branch.ConditionText,
|
||||||
|
ConditionJson = "{}",
|
||||||
|
RawText = branch.RawText,
|
||||||
|
DescriptionText = branch.DescriptionText,
|
||||||
|
RawAffixText = branch.RawAffixText,
|
||||||
|
ParsedJson = "{}",
|
||||||
|
SortOrder = branch.SortOrder
|
||||||
|
})
|
||||||
|
.ToList()
|
||||||
})
|
})
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
@@ -121,6 +138,10 @@ public sealed class CriticalImportLoader(string databasePath)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await dbContext.CriticalBranches
|
||||||
|
.Where(item => item.CriticalResult.CriticalTableId == tableId.Value)
|
||||||
|
.ExecuteDeleteAsync(cancellationToken);
|
||||||
|
|
||||||
await dbContext.CriticalResults
|
await dbContext.CriticalResults
|
||||||
.Where(item => item.CriticalTableId == tableId.Value)
|
.Where(item => item.CriticalTableId == tableId.Value)
|
||||||
.ExecuteDeleteAsync(cancellationToken);
|
.ExecuteDeleteAsync(cancellationToken);
|
||||||
|
|||||||
10
src/RolemasterDb.ImportTool/Parsing/ColumnarCellEntry.cs
Normal file
10
src/RolemasterDb.ImportTool/Parsing/ColumnarCellEntry.cs
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
namespace RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
|
internal sealed class ColumnarCellEntry(string? groupKey, string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
|
||||||
|
{
|
||||||
|
public string? GroupKey { get; } = groupKey;
|
||||||
|
public string RollBandLabel { get; } = rollBandLabel;
|
||||||
|
public int RowIndex { get; } = rowIndex;
|
||||||
|
public string ColumnKey { get; } = columnKey;
|
||||||
|
public List<string> Lines { get; } = lines;
|
||||||
|
}
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
namespace RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
|
internal sealed class CriticalCellParseContent(
|
||||||
|
IReadOnlyList<string> baseLines,
|
||||||
|
string rawCellText,
|
||||||
|
string descriptionText,
|
||||||
|
string? rawAffixText,
|
||||||
|
IReadOnlyList<ParsedCriticalBranch> branches,
|
||||||
|
IReadOnlyList<string> validationErrors)
|
||||||
|
{
|
||||||
|
public IReadOnlyList<string> BaseLines { get; } = baseLines;
|
||||||
|
public string RawCellText { get; } = rawCellText;
|
||||||
|
public string DescriptionText { get; } = descriptionText;
|
||||||
|
public string? RawAffixText { get; } = rawAffixText;
|
||||||
|
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
|
||||||
|
public IReadOnlyList<string> ValidationErrors { get; } = validationErrors;
|
||||||
|
}
|
||||||
114
src/RolemasterDb.ImportTool/Parsing/CriticalCellTextParser.cs
Normal file
114
src/RolemasterDb.ImportTool/Parsing/CriticalCellTextParser.cs
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
namespace RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
|
internal static class CriticalCellTextParser
|
||||||
|
{
|
||||||
|
internal static CriticalCellParseContent Parse(IReadOnlyList<string> lines, ISet<string> affixLegendSymbols)
|
||||||
|
{
|
||||||
|
var validationErrors = new List<string>();
|
||||||
|
var branchStartIndexes = FindBranchStartIndexes(lines);
|
||||||
|
var baseLineCount = branchStartIndexes.Count == 0 ? lines.Count : branchStartIndexes[0];
|
||||||
|
var baseLines = lines.Take(baseLineCount).ToList();
|
||||||
|
var branches = new List<ParsedCriticalBranch>();
|
||||||
|
|
||||||
|
validationErrors.AddRange(ValidateSegmentCount(baseLines, affixLegendSymbols, "Base content"));
|
||||||
|
|
||||||
|
for (var branchIndex = 0; branchIndex < branchStartIndexes.Count; branchIndex++)
|
||||||
|
{
|
||||||
|
var startIndex = branchStartIndexes[branchIndex];
|
||||||
|
var endIndex = branchIndex == branchStartIndexes.Count - 1
|
||||||
|
? lines.Count
|
||||||
|
: branchStartIndexes[branchIndex + 1];
|
||||||
|
|
||||||
|
branches.Add(ParseBranch(
|
||||||
|
lines.Skip(startIndex).Take(endIndex - startIndex).ToList(),
|
||||||
|
branchIndex + 1,
|
||||||
|
affixLegendSymbols,
|
||||||
|
validationErrors));
|
||||||
|
}
|
||||||
|
|
||||||
|
var (rawCellText, descriptionText, rawAffixText) = BuildTextSections(baseLines, affixLegendSymbols);
|
||||||
|
return new CriticalCellParseContent(baseLines, rawCellText, descriptionText, rawAffixText, branches, validationErrors);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ParsedCriticalBranch ParseBranch(
|
||||||
|
IReadOnlyList<string> branchLines,
|
||||||
|
int sortOrder,
|
||||||
|
ISet<string> affixLegendSymbols,
|
||||||
|
List<string> validationErrors)
|
||||||
|
{
|
||||||
|
var firstLine = branchLines[0];
|
||||||
|
var separatorIndex = firstLine.IndexOf(':', StringComparison.Ordinal);
|
||||||
|
var conditionText = CriticalTableParserSupport.CollapseWhitespace(firstLine[..separatorIndex]);
|
||||||
|
var firstPayloadLine = CriticalTableParserSupport.CollapseWhitespace(firstLine[(separatorIndex + 1)..]);
|
||||||
|
var payloadLines = new List<string>();
|
||||||
|
|
||||||
|
if (!string.IsNullOrWhiteSpace(firstPayloadLine))
|
||||||
|
{
|
||||||
|
payloadLines.Add(firstPayloadLine);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (var continuationLine in branchLines.Skip(1))
|
||||||
|
{
|
||||||
|
var normalized = CriticalTableParserSupport.CollapseWhitespace(continuationLine);
|
||||||
|
if (!string.IsNullOrWhiteSpace(normalized))
|
||||||
|
{
|
||||||
|
payloadLines.Add(normalized);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
validationErrors.AddRange(ValidateSegmentCount(payloadLines, affixLegendSymbols, $"Branch '{conditionText}'"));
|
||||||
|
|
||||||
|
var (_, descriptionText, rawAffixText) = BuildTextSections(payloadLines, affixLegendSymbols);
|
||||||
|
return new ParsedCriticalBranch(
|
||||||
|
"conditional",
|
||||||
|
CriticalTableParserSupport.NormalizeConditionKey(conditionText),
|
||||||
|
conditionText,
|
||||||
|
string.Join(Environment.NewLine, branchLines),
|
||||||
|
descriptionText,
|
||||||
|
rawAffixText,
|
||||||
|
sortOrder);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<int> FindBranchStartIndexes(IReadOnlyList<string> lines)
|
||||||
|
{
|
||||||
|
var branchStartIndexes = new List<int>();
|
||||||
|
|
||||||
|
for (var index = 0; index < lines.Count; index++)
|
||||||
|
{
|
||||||
|
if (CriticalTableParserSupport.IsConditionalBranchStartLine(lines[index]))
|
||||||
|
{
|
||||||
|
branchStartIndexes.Add(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return branchStartIndexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static IReadOnlyList<string> ValidateSegmentCount(
|
||||||
|
IReadOnlyList<string> lines,
|
||||||
|
ISet<string> affixLegendSymbols,
|
||||||
|
string scope)
|
||||||
|
{
|
||||||
|
if (lines.Count == 0)
|
||||||
|
{
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(lines, affixLegendSymbols);
|
||||||
|
return segmentCount > 2
|
||||||
|
? [$"{scope} interleaves prose and affix lines."]
|
||||||
|
: [];
|
||||||
|
}
|
||||||
|
|
||||||
|
private static (string RawText, string DescriptionText, string? RawAffixText) BuildTextSections(
|
||||||
|
IReadOnlyList<string> lines,
|
||||||
|
ISet<string> affixLegendSymbols)
|
||||||
|
{
|
||||||
|
var rawText = string.Join(Environment.NewLine, lines);
|
||||||
|
var rawAffixLines = lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||||
|
var descriptionLines = lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
||||||
|
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
|
||||||
|
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
|
||||||
|
return (rawText, descriptionText, rawAffixText);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -169,15 +169,9 @@ internal static class CriticalTableParserSupport
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (value.StartsWith("with ", StringComparison.OrdinalIgnoreCase) ||
|
if (IsConditionalBranchStartLine(value))
|
||||||
value.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
value.StartsWith("without ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
value.StartsWith("if ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
value.StartsWith("while ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
value.StartsWith("until ", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
value.StartsWith("unless ", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
{
|
||||||
return value.Contains(':', StringComparison.Ordinal);
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (affixLegendSymbols.Count > 0 &&
|
if (affixLegendSymbols.Count > 0 &&
|
||||||
@@ -242,6 +236,23 @@ internal static class CriticalTableParserSupport
|
|||||||
internal static string CollapseWhitespace(string value) =>
|
internal static string CollapseWhitespace(string value) =>
|
||||||
Regex.Replace(value.Trim(), @"\s+", " ");
|
Regex.Replace(value.Trim(), @"\s+", " ");
|
||||||
|
|
||||||
|
internal static bool IsConditionalBranchStartLine(string value)
|
||||||
|
{
|
||||||
|
var normalized = value.Trim();
|
||||||
|
if (!normalized.Contains(':', StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized.StartsWith("with ", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
normalized.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
normalized.StartsWith("without ", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
normalized.StartsWith("if ", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
normalized.StartsWith("while ", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
normalized.StartsWith("until ", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
normalized.StartsWith("unless ", StringComparison.OrdinalIgnoreCase);
|
||||||
|
}
|
||||||
|
|
||||||
internal static string NormalizeText(string value) =>
|
internal static string NormalizeText(string value) =>
|
||||||
value
|
value
|
||||||
.Replace('\u00a0', ' ')
|
.Replace('\u00a0', ' ')
|
||||||
@@ -250,6 +261,25 @@ internal static class CriticalTableParserSupport
|
|||||||
.Replace('’', '\'')
|
.Replace('’', '\'')
|
||||||
.Trim();
|
.Trim();
|
||||||
|
|
||||||
|
internal static string? NormalizeConditionKey(string conditionText)
|
||||||
|
{
|
||||||
|
var normalized = CollapseWhitespace(conditionText)
|
||||||
|
.ToLowerInvariant()
|
||||||
|
.Replace("w/o", "without", StringComparison.Ordinal);
|
||||||
|
normalized = Regex.Replace(normalized, @"[^a-z0-9]+", "_");
|
||||||
|
normalized = normalized.Trim('_');
|
||||||
|
return normalized.Length == 0 ? null : normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
internal static int FindKeyTop(IReadOnlyList<XmlTextFragment> fragments) =>
|
||||||
|
fragments
|
||||||
|
.Where(item =>
|
||||||
|
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
|
||||||
|
.Select(item => (int?)item.Top)
|
||||||
|
.Min() ?? int.MaxValue;
|
||||||
|
|
||||||
internal static HashSet<string> DetectAffixLegendSymbols(IReadOnlyList<XmlTextFragment> fragments, int keyTop)
|
internal static HashSet<string> DetectAffixLegendSymbols(IReadOnlyList<XmlTextFragment> fragments, int keyTop)
|
||||||
{
|
{
|
||||||
if (keyTop == int.MaxValue)
|
if (keyTop == int.MaxValue)
|
||||||
@@ -347,6 +377,138 @@ internal static class CriticalTableParserSupport
|
|||||||
return groups;
|
return groups;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal static List<RowAnchor> CreateRowAnchors(IReadOnlyList<XmlTextFragment> rowLabelFragments) =>
|
||||||
|
rowLabelFragments
|
||||||
|
.OrderBy(item => item.Top)
|
||||||
|
.Select((item, index) => new RowAnchor(NormalizeRollBandLabel(item.Text), item.Top, index + 1))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
internal static List<XmlTextFragment> BuildBodyFragments(
|
||||||
|
IReadOnlyList<XmlTextFragment> fragments,
|
||||||
|
int bodyStartTop,
|
||||||
|
int keyTop,
|
||||||
|
int leftCutoff,
|
||||||
|
IReadOnlyList<RowAnchor> rowAnchors,
|
||||||
|
IReadOnlyCollection<XmlTextFragment> excludedFragments,
|
||||||
|
IReadOnlyList<(string Key, double CenterX)> columnCenters,
|
||||||
|
ISet<string> affixLegendSymbols)
|
||||||
|
{
|
||||||
|
var bodyFragments = fragments
|
||||||
|
.Where(item =>
|
||||||
|
item.Top >= bodyStartTop &&
|
||||||
|
item.Top < keyTop - TopGroupingTolerance &&
|
||||||
|
!IsFooterPageNumberFragment(item, keyTop) &&
|
||||||
|
!IsPotentialRowLabelFragment(item, leftCutoff) &&
|
||||||
|
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
|
||||||
|
!excludedFragments.Contains(item))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
return SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
||||||
|
}
|
||||||
|
|
||||||
|
internal static void RepairLeadingAffixLeakage(List<ColumnarCellEntry> cellEntries, ISet<string> affixLegendSymbols)
|
||||||
|
{
|
||||||
|
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
||||||
|
var axes = cellEntries
|
||||||
|
.Select(item => (item.GroupKey, item.ColumnKey))
|
||||||
|
.Distinct()
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
|
||||||
|
{
|
||||||
|
foreach (var (groupKey, columnKey) in axes)
|
||||||
|
{
|
||||||
|
var current = cellEntries.SingleOrDefault(item =>
|
||||||
|
item.RowIndex == rowIndex &&
|
||||||
|
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
|
||||||
|
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
||||||
|
var next = cellEntries.SingleOrDefault(item =>
|
||||||
|
item.RowIndex == rowIndex + 1 &&
|
||||||
|
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
|
||||||
|
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
|
||||||
|
if (current is null || next is null)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var leadingAffixCount = 0;
|
||||||
|
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
||||||
|
{
|
||||||
|
leadingAffixCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
|
||||||
|
next.Lines.RemoveRange(0, leadingAffixCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal static int ResolveRowBoundaryTop(
|
||||||
|
RowAnchor current,
|
||||||
|
RowAnchor next,
|
||||||
|
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
||||||
|
{
|
||||||
|
var linesBetweenLabels = bodyLines
|
||||||
|
.Where(item => item.Top >= current.Top && item.Top < next.Top)
|
||||||
|
.OrderBy(item => item.Top)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
|
||||||
|
{
|
||||||
|
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
|
||||||
|
{
|
||||||
|
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
internal static void BuildParsedArtifacts(
|
||||||
|
IReadOnlyList<ColumnarCellEntry> cellEntries,
|
||||||
|
ISet<string> affixLegendSymbols,
|
||||||
|
List<ParsedCriticalCellArtifact> parsedCells,
|
||||||
|
List<ParsedCriticalResult> parsedResults,
|
||||||
|
List<string> validationErrors)
|
||||||
|
{
|
||||||
|
foreach (var cellEntry in cellEntries)
|
||||||
|
{
|
||||||
|
var content = CriticalCellTextParser.Parse(cellEntry.Lines, affixLegendSymbols);
|
||||||
|
validationErrors.AddRange(content.ValidationErrors.Select(error =>
|
||||||
|
$"Cell '{BuildCellIdentifier(cellEntry)}': {error}"));
|
||||||
|
|
||||||
|
parsedCells.Add(new ParsedCriticalCellArtifact(
|
||||||
|
cellEntry.GroupKey,
|
||||||
|
cellEntry.RollBandLabel,
|
||||||
|
cellEntry.ColumnKey,
|
||||||
|
cellEntry.Lines.ToList(),
|
||||||
|
content.BaseLines,
|
||||||
|
content.RawCellText,
|
||||||
|
content.DescriptionText,
|
||||||
|
content.RawAffixText,
|
||||||
|
content.Branches));
|
||||||
|
|
||||||
|
parsedResults.Add(new ParsedCriticalResult(
|
||||||
|
cellEntry.GroupKey,
|
||||||
|
cellEntry.ColumnKey,
|
||||||
|
cellEntry.RollBandLabel,
|
||||||
|
content.RawCellText,
|
||||||
|
content.DescriptionText,
|
||||||
|
content.RawAffixText,
|
||||||
|
content.Branches));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string BuildCellIdentifier(ColumnarCellEntry cellEntry) =>
|
||||||
|
cellEntry.GroupKey is null
|
||||||
|
? $"{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}"
|
||||||
|
: $"{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}";
|
||||||
|
|
||||||
private static bool LooksLikeSplitRollBandStart(string value) =>
|
private static bool LooksLikeSplitRollBandStart(string value) =>
|
||||||
Regex.IsMatch(value.Trim(), @"^\d{2,3}\s*-$");
|
Regex.IsMatch(value.Trim(), @"^\d{2,3}\s*-$");
|
||||||
|
|
||||||
|
|||||||
@@ -36,13 +36,7 @@ public sealed class GroupedVariantCriticalTableParser
|
|||||||
groupHeaders.Max(item => item.Top),
|
groupHeaders.Max(item => item.Top),
|
||||||
columnHeaders.Max(item => item.Top))
|
columnHeaders.Max(item => item.Top))
|
||||||
+ CriticalTableParserSupport.HeaderToBodyMinimumGap;
|
+ CriticalTableParserSupport.HeaderToBodyMinimumGap;
|
||||||
var keyTop = fragments
|
var keyTop = CriticalTableParserSupport.FindKeyTop(fragments);
|
||||||
.Where(item =>
|
|
||||||
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
|
|
||||||
.Select(item => (int?)item.Top)
|
|
||||||
.Min() ?? int.MaxValue;
|
|
||||||
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
|
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
|
||||||
var leftCutoff = columnHeaders.Min(item => item.Left) - 10;
|
var leftCutoff = columnHeaders.Min(item => item.Left) - 10;
|
||||||
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
|
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
|
||||||
@@ -50,11 +44,7 @@ public sealed class GroupedVariantCriticalTableParser
|
|||||||
leftCutoff,
|
leftCutoff,
|
||||||
bodyStartTop,
|
bodyStartTop,
|
||||||
keyTop);
|
keyTop);
|
||||||
|
var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments);
|
||||||
var rowAnchors = rowLabelFragments
|
|
||||||
.OrderBy(item => item.Top)
|
|
||||||
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
if (rowAnchors.Count == 0)
|
if (rowAnchors.Count == 0)
|
||||||
{
|
{
|
||||||
@@ -65,34 +55,33 @@ public sealed class GroupedVariantCriticalTableParser
|
|||||||
.Select(item => (item.CompositeKey, item.CenterX))
|
.Select(item => (item.CompositeKey, item.CenterX))
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
var bodyFragments = fragments
|
var excludedFragments = groupHeaders.Concat(columnHeaders).ToList();
|
||||||
.Where(item =>
|
var bodyFragments = CriticalTableParserSupport.BuildBodyFragments(
|
||||||
item.Top >= bodyStartTop &&
|
fragments,
|
||||||
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance &&
|
bodyStartTop,
|
||||||
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) &&
|
keyTop,
|
||||||
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) &&
|
leftCutoff,
|
||||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
|
rowAnchors,
|
||||||
!groupHeaders.Contains(item) &&
|
excludedFragments,
|
||||||
!columnHeaders.Contains(item))
|
columnCenters,
|
||||||
.ToList();
|
affixLegendSymbols);
|
||||||
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
|
||||||
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
||||||
|
|
||||||
var parsedRollBands = rowAnchors
|
var parsedRollBands = rowAnchors
|
||||||
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
|
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
var cellEntries = new List<CellEntry>();
|
var cellEntries = new List<ColumnarCellEntry>();
|
||||||
|
|
||||||
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
|
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
|
||||||
{
|
{
|
||||||
var rowStart = rowIndex == 0
|
var rowStart = rowIndex == 0
|
||||||
? bodyStartTop
|
? bodyStartTop
|
||||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
|
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
|
||||||
|
|
||||||
var rowEnd = rowIndex == rowAnchors.Count - 1
|
var rowEnd = rowIndex == rowAnchors.Count - 1
|
||||||
? keyTop - 1
|
? keyTop - 1
|
||||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
|
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
|
||||||
|
|
||||||
var rowFragments = bodyFragments
|
var rowFragments = bodyFragments
|
||||||
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
|
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
|
||||||
@@ -112,7 +101,7 @@ public sealed class GroupedVariantCriticalTableParser
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
cellEntries.Add(new CellEntry(
|
cellEntries.Add(new ColumnarCellEntry(
|
||||||
anchor.GroupKey,
|
anchor.GroupKey,
|
||||||
rowAnchors[rowIndex].Label,
|
rowAnchors[rowIndex].Label,
|
||||||
rowIndex,
|
rowIndex,
|
||||||
@@ -121,45 +110,11 @@ public sealed class GroupedVariantCriticalTableParser
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
|
CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
|
||||||
|
|
||||||
var parsedCells = new List<ParsedCriticalCellArtifact>();
|
var parsedCells = new List<ParsedCriticalCellArtifact>();
|
||||||
var parsedResults = new List<ParsedCriticalResult>();
|
var parsedResults = new List<ParsedCriticalResult>();
|
||||||
|
CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors);
|
||||||
foreach (var cellEntry in cellEntries
|
|
||||||
.OrderBy(item => item.RowIndex)
|
|
||||||
.ThenBy(item => item.GroupKey, StringComparer.Ordinal)
|
|
||||||
.ThenBy(item => item.ColumnKey, StringComparer.Ordinal))
|
|
||||||
{
|
|
||||||
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
|
|
||||||
if (segmentCount > 2)
|
|
||||||
{
|
|
||||||
validationErrors.Add($"Cell '{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
|
|
||||||
}
|
|
||||||
|
|
||||||
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
|
||||||
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
|
||||||
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
|
|
||||||
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
|
|
||||||
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
|
|
||||||
|
|
||||||
parsedCells.Add(new ParsedCriticalCellArtifact(
|
|
||||||
cellEntry.GroupKey,
|
|
||||||
cellEntry.RollBandLabel,
|
|
||||||
cellEntry.ColumnKey,
|
|
||||||
cellEntry.Lines,
|
|
||||||
rawCellText,
|
|
||||||
descriptionText,
|
|
||||||
rawAffixText));
|
|
||||||
|
|
||||||
parsedResults.Add(new ParsedCriticalResult(
|
|
||||||
cellEntry.GroupKey,
|
|
||||||
cellEntry.ColumnKey,
|
|
||||||
cellEntry.RollBandLabel,
|
|
||||||
rawCellText,
|
|
||||||
descriptionText,
|
|
||||||
rawAffixText));
|
|
||||||
}
|
|
||||||
|
|
||||||
var expectedCellCount = rowAnchors.Count * ExpectedGroups.Length * ExpectedColumns.Length;
|
var expectedCellCount = rowAnchors.Count * ExpectedGroups.Length * ExpectedColumns.Length;
|
||||||
if (parsedCells.Count != expectedCellCount)
|
if (parsedCells.Count != expectedCellCount)
|
||||||
@@ -235,72 +190,4 @@ public sealed class GroupedVariantCriticalTableParser
|
|||||||
|
|
||||||
throw new InvalidOperationException("Could not find the grouped-variant column header row in the XML artifact.");
|
throw new InvalidOperationException("Could not find the grouped-variant column header row in the XML artifact.");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
|
|
||||||
{
|
|
||||||
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
|
||||||
var axes = cellEntries
|
|
||||||
.Select(item => (item.GroupKey, item.ColumnKey))
|
|
||||||
.Distinct()
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
|
|
||||||
{
|
|
||||||
foreach (var (groupKey, columnKey) in axes)
|
|
||||||
{
|
|
||||||
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.GroupKey == groupKey && item.ColumnKey == columnKey);
|
|
||||||
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.GroupKey == groupKey && item.ColumnKey == columnKey);
|
|
||||||
if (current is null || next is null)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
var leadingAffixCount = 0;
|
|
||||||
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
|
||||||
{
|
|
||||||
leadingAffixCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
|
|
||||||
next.Lines.RemoveRange(0, leadingAffixCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int ResolveRowBoundaryTop(
|
|
||||||
RowAnchor current,
|
|
||||||
RowAnchor next,
|
|
||||||
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
|
||||||
{
|
|
||||||
var linesBetweenLabels = bodyLines
|
|
||||||
.Where(item => item.Top >= current.Top && item.Top < next.Top)
|
|
||||||
.OrderBy(item => item.Top)
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
|
|
||||||
{
|
|
||||||
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
|
|
||||||
{
|
|
||||||
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private sealed record RowAnchor(string Label, int Top, int SortOrder);
|
|
||||||
|
|
||||||
private sealed class CellEntry(string groupKey, string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
|
|
||||||
{
|
|
||||||
public string GroupKey { get; } = groupKey;
|
|
||||||
public string RollBandLabel { get; } = rollBandLabel;
|
|
||||||
public int RowIndex { get; } = rowIndex;
|
|
||||||
public string ColumnKey { get; } = columnKey;
|
|
||||||
public List<string> Lines { get; } = lines;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
19
src/RolemasterDb.ImportTool/Parsing/ParsedCriticalBranch.cs
Normal file
19
src/RolemasterDb.ImportTool/Parsing/ParsedCriticalBranch.cs
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
namespace RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
|
public sealed class ParsedCriticalBranch(
|
||||||
|
string branchKind,
|
||||||
|
string? conditionKey,
|
||||||
|
string conditionText,
|
||||||
|
string rawText,
|
||||||
|
string descriptionText,
|
||||||
|
string? rawAffixText,
|
||||||
|
int sortOrder)
|
||||||
|
{
|
||||||
|
public string BranchKind { get; } = branchKind;
|
||||||
|
public string? ConditionKey { get; } = conditionKey;
|
||||||
|
public string ConditionText { get; } = conditionText;
|
||||||
|
public string RawText { get; } = rawText;
|
||||||
|
public string DescriptionText { get; } = descriptionText;
|
||||||
|
public string? RawAffixText { get; } = rawAffixText;
|
||||||
|
public int SortOrder { get; } = sortOrder;
|
||||||
|
}
|
||||||
@@ -5,15 +5,19 @@ public sealed class ParsedCriticalCellArtifact(
|
|||||||
string rollBandLabel,
|
string rollBandLabel,
|
||||||
string columnKey,
|
string columnKey,
|
||||||
IReadOnlyList<string> lines,
|
IReadOnlyList<string> lines,
|
||||||
|
IReadOnlyList<string> baseLines,
|
||||||
string rawCellText,
|
string rawCellText,
|
||||||
string descriptionText,
|
string descriptionText,
|
||||||
string? rawAffixText)
|
string? rawAffixText,
|
||||||
|
IReadOnlyList<ParsedCriticalBranch> branches)
|
||||||
{
|
{
|
||||||
public string? GroupKey { get; } = groupKey;
|
public string? GroupKey { get; } = groupKey;
|
||||||
public string RollBandLabel { get; } = rollBandLabel;
|
public string RollBandLabel { get; } = rollBandLabel;
|
||||||
public string ColumnKey { get; } = columnKey;
|
public string ColumnKey { get; } = columnKey;
|
||||||
public IReadOnlyList<string> Lines { get; } = lines;
|
public IReadOnlyList<string> Lines { get; } = lines;
|
||||||
|
public IReadOnlyList<string> BaseLines { get; } = baseLines;
|
||||||
public string RawCellText { get; } = rawCellText;
|
public string RawCellText { get; } = rawCellText;
|
||||||
public string DescriptionText { get; } = descriptionText;
|
public string DescriptionText { get; } = descriptionText;
|
||||||
public string? RawAffixText { get; } = rawAffixText;
|
public string? RawAffixText { get; } = rawAffixText;
|
||||||
|
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,8 @@ public sealed class ParsedCriticalResult(
|
|||||||
string rollBandLabel,
|
string rollBandLabel,
|
||||||
string rawCellText,
|
string rawCellText,
|
||||||
string descriptionText,
|
string descriptionText,
|
||||||
string? rawAffixText)
|
string? rawAffixText,
|
||||||
|
IReadOnlyList<ParsedCriticalBranch> branches)
|
||||||
{
|
{
|
||||||
public string? GroupKey { get; } = groupKey;
|
public string? GroupKey { get; } = groupKey;
|
||||||
public string ColumnKey { get; } = columnKey;
|
public string ColumnKey { get; } = columnKey;
|
||||||
@@ -14,4 +15,5 @@ public sealed class ParsedCriticalResult(
|
|||||||
public string RawCellText { get; } = rawCellText;
|
public string RawCellText { get; } = rawCellText;
|
||||||
public string DescriptionText { get; } = descriptionText;
|
public string DescriptionText { get; } = descriptionText;
|
||||||
public string? RawAffixText { get; } = rawAffixText;
|
public string? RawAffixText { get; } = rawAffixText;
|
||||||
|
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
|
||||||
}
|
}
|
||||||
|
|||||||
3
src/RolemasterDb.ImportTool/Parsing/RowAnchor.cs
Normal file
3
src/RolemasterDb.ImportTool/Parsing/RowAnchor.cs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
namespace RolemasterDb.ImportTool.Parsing;
|
||||||
|
|
||||||
|
internal sealed record RowAnchor(string Label, int Top, int SortOrder);
|
||||||
@@ -15,13 +15,7 @@ public sealed class StandardCriticalTableParser
|
|||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap;
|
var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap;
|
||||||
var keyTop = fragments
|
var keyTop = CriticalTableParserSupport.FindKeyTop(fragments);
|
||||||
.Where(item =>
|
|
||||||
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
|
|
||||||
.Select(item => (int?)item.Top)
|
|
||||||
.Min() ?? int.MaxValue;
|
|
||||||
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
|
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
|
||||||
var leftCutoff = headerFragments.Min(item => item.Left) - 10;
|
var leftCutoff = headerFragments.Min(item => item.Left) - 10;
|
||||||
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
|
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
|
||||||
@@ -29,44 +23,39 @@ public sealed class StandardCriticalTableParser
|
|||||||
leftCutoff,
|
leftCutoff,
|
||||||
bodyStartTop,
|
bodyStartTop,
|
||||||
keyTop);
|
keyTop);
|
||||||
|
var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments);
|
||||||
var rowAnchors = rowLabelFragments
|
|
||||||
.OrderBy(item => item.Top)
|
|
||||||
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
if (rowAnchors.Count == 0)
|
if (rowAnchors.Count == 0)
|
||||||
{
|
{
|
||||||
validationErrors.Add("No roll-band labels were found in the XML artifact.");
|
validationErrors.Add("No roll-band labels were found in the XML artifact.");
|
||||||
}
|
}
|
||||||
|
|
||||||
var bodyFragments = fragments
|
var bodyFragments = CriticalTableParserSupport.BuildBodyFragments(
|
||||||
.Where(item =>
|
fragments,
|
||||||
item.Top >= bodyStartTop &&
|
bodyStartTop,
|
||||||
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance &&
|
keyTop,
|
||||||
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) &&
|
leftCutoff,
|
||||||
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) &&
|
rowAnchors,
|
||||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
|
headerFragments,
|
||||||
!headerFragments.Contains(item))
|
columnCenters,
|
||||||
.ToList();
|
affixLegendSymbols);
|
||||||
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
|
||||||
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
||||||
|
|
||||||
var parsedRollBands = rowAnchors
|
var parsedRollBands = rowAnchors
|
||||||
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
|
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
var cellEntries = new List<CellEntry>();
|
var cellEntries = new List<ColumnarCellEntry>();
|
||||||
|
|
||||||
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
|
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
|
||||||
{
|
{
|
||||||
var rowStart = rowIndex == 0
|
var rowStart = rowIndex == 0
|
||||||
? bodyStartTop
|
? bodyStartTop
|
||||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
|
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
|
||||||
|
|
||||||
var rowEnd = rowIndex == rowAnchors.Count - 1
|
var rowEnd = rowIndex == rowAnchors.Count - 1
|
||||||
? keyTop - 1
|
? keyTop - 1
|
||||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
|
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
|
||||||
|
|
||||||
var rowFragments = bodyFragments
|
var rowFragments = bodyFragments
|
||||||
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
|
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
|
||||||
@@ -86,7 +75,8 @@ public sealed class StandardCriticalTableParser
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
cellEntries.Add(new CellEntry(
|
cellEntries.Add(new ColumnarCellEntry(
|
||||||
|
null,
|
||||||
rowAnchors[rowIndex].Label,
|
rowAnchors[rowIndex].Label,
|
||||||
rowIndex,
|
rowIndex,
|
||||||
columnAnchor.Key,
|
columnAnchor.Key,
|
||||||
@@ -94,44 +84,11 @@ public sealed class StandardCriticalTableParser
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
|
CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
|
||||||
|
|
||||||
var parsedCells = new List<ParsedCriticalCellArtifact>();
|
var parsedCells = new List<ParsedCriticalCellArtifact>();
|
||||||
var parsedResults = new List<ParsedCriticalResult>();
|
var parsedResults = new List<ParsedCriticalResult>();
|
||||||
|
CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors);
|
||||||
foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey))
|
|
||||||
{
|
|
||||||
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
|
|
||||||
|
|
||||||
if (segmentCount > 2)
|
|
||||||
{
|
|
||||||
validationErrors.Add(
|
|
||||||
$"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
|
|
||||||
}
|
|
||||||
|
|
||||||
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
|
||||||
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
|
||||||
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
|
|
||||||
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
|
|
||||||
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
|
|
||||||
|
|
||||||
parsedCells.Add(new ParsedCriticalCellArtifact(
|
|
||||||
null,
|
|
||||||
cellEntry.RollBandLabel,
|
|
||||||
cellEntry.ColumnKey,
|
|
||||||
cellEntry.Lines,
|
|
||||||
rawCellText,
|
|
||||||
descriptionText,
|
|
||||||
rawAffixText));
|
|
||||||
|
|
||||||
parsedResults.Add(new ParsedCriticalResult(
|
|
||||||
null,
|
|
||||||
cellEntry.ColumnKey,
|
|
||||||
cellEntry.RollBandLabel,
|
|
||||||
rawCellText,
|
|
||||||
descriptionText,
|
|
||||||
rawAffixText));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (columnCenters.Count != 5)
|
if (columnCenters.Count != 5)
|
||||||
{
|
{
|
||||||
@@ -185,68 +142,4 @@ public sealed class StandardCriticalTableParser
|
|||||||
|
|
||||||
throw new InvalidOperationException("Could not find the standard-table A-E header row in the XML artifact.");
|
throw new InvalidOperationException("Could not find the standard-table A-E header row in the XML artifact.");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
|
|
||||||
{
|
|
||||||
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
|
||||||
var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
|
|
||||||
|
|
||||||
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
|
|
||||||
{
|
|
||||||
foreach (var columnKey in columnKeys)
|
|
||||||
{
|
|
||||||
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.ColumnKey == columnKey);
|
|
||||||
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.ColumnKey == columnKey);
|
|
||||||
if (current is null || next is null)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
var leadingAffixCount = 0;
|
|
||||||
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
|
||||||
{
|
|
||||||
leadingAffixCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
|
|
||||||
next.Lines.RemoveRange(0, leadingAffixCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int ResolveRowBoundaryTop(
|
|
||||||
RowAnchor current,
|
|
||||||
RowAnchor next,
|
|
||||||
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
|
||||||
{
|
|
||||||
var linesBetweenLabels = bodyLines
|
|
||||||
.Where(item => item.Top >= current.Top && item.Top < next.Top)
|
|
||||||
.OrderBy(item => item.Top)
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
|
|
||||||
{
|
|
||||||
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
|
|
||||||
{
|
|
||||||
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private sealed record RowAnchor(string Label, int Top, int SortOrder);
|
|
||||||
|
|
||||||
private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
|
|
||||||
{
|
|
||||||
public string RollBandLabel { get; } = rollBandLabel;
|
|
||||||
public int RowIndex { get; } = rowIndex;
|
|
||||||
public string ColumnKey { get; } = columnKey;
|
|
||||||
public List<string> Lines { get; } = lines;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,13 +28,7 @@ public sealed class VariantColumnCriticalTableParser
|
|||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap;
|
var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap;
|
||||||
var keyTop = fragments
|
var keyTop = CriticalTableParserSupport.FindKeyTop(fragments);
|
||||||
.Where(item =>
|
|
||||||
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
|
|
||||||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
|
|
||||||
.Select(item => (int?)item.Top)
|
|
||||||
.Min() ?? int.MaxValue;
|
|
||||||
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
|
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
|
||||||
var leftCutoff = headerFragments.Min(item => item.Left) - 10;
|
var leftCutoff = headerFragments.Min(item => item.Left) - 10;
|
||||||
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
|
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
|
||||||
@@ -42,11 +36,7 @@ public sealed class VariantColumnCriticalTableParser
|
|||||||
leftCutoff,
|
leftCutoff,
|
||||||
bodyStartTop,
|
bodyStartTop,
|
||||||
keyTop);
|
keyTop);
|
||||||
|
var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments);
|
||||||
var rowAnchors = rowLabelFragments
|
|
||||||
.OrderBy(item => item.Top)
|
|
||||||
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
if (rowAnchors.Count == 0)
|
if (rowAnchors.Count == 0)
|
||||||
{
|
{
|
||||||
@@ -57,33 +47,32 @@ public sealed class VariantColumnCriticalTableParser
|
|||||||
.Select(item => (item.Key, item.CenterX))
|
.Select(item => (item.Key, item.CenterX))
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
var bodyFragments = fragments
|
var bodyFragments = CriticalTableParserSupport.BuildBodyFragments(
|
||||||
.Where(item =>
|
fragments,
|
||||||
item.Top >= bodyStartTop &&
|
bodyStartTop,
|
||||||
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance &&
|
keyTop,
|
||||||
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) &&
|
leftCutoff,
|
||||||
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) &&
|
rowAnchors,
|
||||||
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
|
headerFragments,
|
||||||
!headerFragments.Contains(item))
|
columnCenters,
|
||||||
.ToList();
|
affixLegendSymbols);
|
||||||
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
|
|
||||||
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
|
||||||
|
|
||||||
var parsedRollBands = rowAnchors
|
var parsedRollBands = rowAnchors
|
||||||
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
|
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
var cellEntries = new List<CellEntry>();
|
var cellEntries = new List<ColumnarCellEntry>();
|
||||||
|
|
||||||
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
|
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
|
||||||
{
|
{
|
||||||
var rowStart = rowIndex == 0
|
var rowStart = rowIndex == 0
|
||||||
? bodyStartTop
|
? bodyStartTop
|
||||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
|
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
|
||||||
|
|
||||||
var rowEnd = rowIndex == rowAnchors.Count - 1
|
var rowEnd = rowIndex == rowAnchors.Count - 1
|
||||||
? keyTop - 1
|
? keyTop - 1
|
||||||
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
|
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
|
||||||
|
|
||||||
var rowFragments = bodyFragments
|
var rowFragments = bodyFragments
|
||||||
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
|
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
|
||||||
@@ -103,7 +92,8 @@ public sealed class VariantColumnCriticalTableParser
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
cellEntries.Add(new CellEntry(
|
cellEntries.Add(new ColumnarCellEntry(
|
||||||
|
null,
|
||||||
rowAnchors[rowIndex].Label,
|
rowAnchors[rowIndex].Label,
|
||||||
rowIndex,
|
rowIndex,
|
||||||
columnAnchor.Key,
|
columnAnchor.Key,
|
||||||
@@ -111,42 +101,11 @@ public sealed class VariantColumnCriticalTableParser
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
|
CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
|
||||||
|
|
||||||
var parsedCells = new List<ParsedCriticalCellArtifact>();
|
var parsedCells = new List<ParsedCriticalCellArtifact>();
|
||||||
var parsedResults = new List<ParsedCriticalResult>();
|
var parsedResults = new List<ParsedCriticalResult>();
|
||||||
|
CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors);
|
||||||
foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey, StringComparer.Ordinal))
|
|
||||||
{
|
|
||||||
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
|
|
||||||
if (segmentCount > 2)
|
|
||||||
{
|
|
||||||
validationErrors.Add($"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
|
|
||||||
}
|
|
||||||
|
|
||||||
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
|
||||||
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
|
|
||||||
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
|
|
||||||
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
|
|
||||||
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
|
|
||||||
|
|
||||||
parsedCells.Add(new ParsedCriticalCellArtifact(
|
|
||||||
null,
|
|
||||||
cellEntry.RollBandLabel,
|
|
||||||
cellEntry.ColumnKey,
|
|
||||||
cellEntry.Lines,
|
|
||||||
rawCellText,
|
|
||||||
descriptionText,
|
|
||||||
rawAffixText));
|
|
||||||
|
|
||||||
parsedResults.Add(new ParsedCriticalResult(
|
|
||||||
null,
|
|
||||||
cellEntry.ColumnKey,
|
|
||||||
cellEntry.RollBandLabel,
|
|
||||||
rawCellText,
|
|
||||||
descriptionText,
|
|
||||||
rawAffixText));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (columnAnchors.Count != ExpectedColumns.Length)
|
if (columnAnchors.Count != ExpectedColumns.Length)
|
||||||
{
|
{
|
||||||
@@ -208,69 +167,5 @@ public sealed class VariantColumnCriticalTableParser
|
|||||||
ExpectedColumns.SingleOrDefault(item => string.Equals(item.Label, value.Trim(), StringComparison.OrdinalIgnoreCase))
|
ExpectedColumns.SingleOrDefault(item => string.Equals(item.Label, value.Trim(), StringComparison.OrdinalIgnoreCase))
|
||||||
?? throw new InvalidOperationException($"Unsupported variant column label '{value}'.");
|
?? throw new InvalidOperationException($"Unsupported variant column label '{value}'.");
|
||||||
|
|
||||||
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
|
|
||||||
{
|
|
||||||
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
|
|
||||||
var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
|
|
||||||
|
|
||||||
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
|
|
||||||
{
|
|
||||||
foreach (var columnKey in columnKeys)
|
|
||||||
{
|
|
||||||
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.ColumnKey == columnKey);
|
|
||||||
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.ColumnKey == columnKey);
|
|
||||||
if (current is null || next is null)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
var leadingAffixCount = 0;
|
|
||||||
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
|
|
||||||
{
|
|
||||||
leadingAffixCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
|
|
||||||
next.Lines.RemoveRange(0, leadingAffixCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int ResolveRowBoundaryTop(
|
|
||||||
RowAnchor current,
|
|
||||||
RowAnchor next,
|
|
||||||
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
|
|
||||||
{
|
|
||||||
var linesBetweenLabels = bodyLines
|
|
||||||
.Where(item => item.Top >= current.Top && item.Top < next.Top)
|
|
||||||
.OrderBy(item => item.Top)
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
|
|
||||||
{
|
|
||||||
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
|
|
||||||
{
|
|
||||||
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private sealed record ColumnDefinition(string Key, string Label);
|
private sealed record ColumnDefinition(string Key, string Label);
|
||||||
|
|
||||||
private sealed record RowAnchor(string Label, int Top, int SortOrder);
|
|
||||||
|
|
||||||
private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
|
|
||||||
{
|
|
||||||
public string RollBandLabel { get; } = rollBandLabel;
|
|
||||||
public int RowIndex { get; } = rowIndex;
|
|
||||||
public string ColumnKey { get; } = columnKey;
|
|
||||||
public List<string> Lines { get; } = lines;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user