Implement phase 5 critical branch extraction

This commit is contained in:
2026-03-14 10:21:26 +01:00
parent b2f61c3d73
commit 60c5d886a4
20 changed files with 589 additions and 399 deletions

View File

@@ -12,10 +12,12 @@ public sealed class CriticalImportLoader(string databasePath)
{
await using var dbContext = CreateDbContext();
await dbContext.Database.EnsureCreatedAsync(cancellationToken);
await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
var removedTableCount = await dbContext.CriticalTables.CountAsync(cancellationToken);
await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
await dbContext.CriticalBranches.ExecuteDeleteAsync(cancellationToken);
await dbContext.CriticalResults.ExecuteDeleteAsync(cancellationToken);
await dbContext.CriticalGroups.ExecuteDeleteAsync(cancellationToken);
await dbContext.CriticalColumns.ExecuteDeleteAsync(cancellationToken);
@@ -30,6 +32,7 @@ public sealed class CriticalImportLoader(string databasePath)
{
await using var dbContext = CreateDbContext();
await dbContext.Database.EnsureCreatedAsync(cancellationToken);
await RolemasterDbSchemaUpgrader.EnsureLatestAsync(dbContext, cancellationToken);
await using var transaction = await dbContext.Database.BeginTransactionAsync(cancellationToken);
await DeleteTableAsync(dbContext, table.Slug, cancellationToken);
@@ -86,7 +89,21 @@ public sealed class CriticalImportLoader(string databasePath)
DescriptionText = item.DescriptionText,
RawAffixText = item.RawAffixText,
ParsedJson = "{}",
ParseStatus = "raw"
ParseStatus = "raw",
Branches = item.Branches
.Select(branch => new CriticalBranch
{
BranchKind = branch.BranchKind,
ConditionKey = branch.ConditionKey,
ConditionText = branch.ConditionText,
ConditionJson = "{}",
RawText = branch.RawText,
DescriptionText = branch.DescriptionText,
RawAffixText = branch.RawAffixText,
ParsedJson = "{}",
SortOrder = branch.SortOrder
})
.ToList()
})
.ToList();
@@ -121,6 +138,10 @@ public sealed class CriticalImportLoader(string databasePath)
return;
}
await dbContext.CriticalBranches
.Where(item => item.CriticalResult.CriticalTableId == tableId.Value)
.ExecuteDeleteAsync(cancellationToken);
await dbContext.CriticalResults
.Where(item => item.CriticalTableId == tableId.Value)
.ExecuteDeleteAsync(cancellationToken);

View File

@@ -0,0 +1,10 @@
namespace RolemasterDb.ImportTool.Parsing;
internal sealed class ColumnarCellEntry(string? groupKey, string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
{
public string? GroupKey { get; } = groupKey;
public string RollBandLabel { get; } = rollBandLabel;
public int RowIndex { get; } = rowIndex;
public string ColumnKey { get; } = columnKey;
public List<string> Lines { get; } = lines;
}

View File

@@ -0,0 +1,17 @@
namespace RolemasterDb.ImportTool.Parsing;
internal sealed class CriticalCellParseContent(
IReadOnlyList<string> baseLines,
string rawCellText,
string descriptionText,
string? rawAffixText,
IReadOnlyList<ParsedCriticalBranch> branches,
IReadOnlyList<string> validationErrors)
{
public IReadOnlyList<string> BaseLines { get; } = baseLines;
public string RawCellText { get; } = rawCellText;
public string DescriptionText { get; } = descriptionText;
public string? RawAffixText { get; } = rawAffixText;
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
public IReadOnlyList<string> ValidationErrors { get; } = validationErrors;
}

View File

@@ -0,0 +1,114 @@
namespace RolemasterDb.ImportTool.Parsing;
internal static class CriticalCellTextParser
{
internal static CriticalCellParseContent Parse(IReadOnlyList<string> lines, ISet<string> affixLegendSymbols)
{
var validationErrors = new List<string>();
var branchStartIndexes = FindBranchStartIndexes(lines);
var baseLineCount = branchStartIndexes.Count == 0 ? lines.Count : branchStartIndexes[0];
var baseLines = lines.Take(baseLineCount).ToList();
var branches = new List<ParsedCriticalBranch>();
validationErrors.AddRange(ValidateSegmentCount(baseLines, affixLegendSymbols, "Base content"));
for (var branchIndex = 0; branchIndex < branchStartIndexes.Count; branchIndex++)
{
var startIndex = branchStartIndexes[branchIndex];
var endIndex = branchIndex == branchStartIndexes.Count - 1
? lines.Count
: branchStartIndexes[branchIndex + 1];
branches.Add(ParseBranch(
lines.Skip(startIndex).Take(endIndex - startIndex).ToList(),
branchIndex + 1,
affixLegendSymbols,
validationErrors));
}
var (rawCellText, descriptionText, rawAffixText) = BuildTextSections(baseLines, affixLegendSymbols);
return new CriticalCellParseContent(baseLines, rawCellText, descriptionText, rawAffixText, branches, validationErrors);
}
private static ParsedCriticalBranch ParseBranch(
IReadOnlyList<string> branchLines,
int sortOrder,
ISet<string> affixLegendSymbols,
List<string> validationErrors)
{
var firstLine = branchLines[0];
var separatorIndex = firstLine.IndexOf(':', StringComparison.Ordinal);
var conditionText = CriticalTableParserSupport.CollapseWhitespace(firstLine[..separatorIndex]);
var firstPayloadLine = CriticalTableParserSupport.CollapseWhitespace(firstLine[(separatorIndex + 1)..]);
var payloadLines = new List<string>();
if (!string.IsNullOrWhiteSpace(firstPayloadLine))
{
payloadLines.Add(firstPayloadLine);
}
foreach (var continuationLine in branchLines.Skip(1))
{
var normalized = CriticalTableParserSupport.CollapseWhitespace(continuationLine);
if (!string.IsNullOrWhiteSpace(normalized))
{
payloadLines.Add(normalized);
}
}
validationErrors.AddRange(ValidateSegmentCount(payloadLines, affixLegendSymbols, $"Branch '{conditionText}'"));
var (_, descriptionText, rawAffixText) = BuildTextSections(payloadLines, affixLegendSymbols);
return new ParsedCriticalBranch(
"conditional",
CriticalTableParserSupport.NormalizeConditionKey(conditionText),
conditionText,
string.Join(Environment.NewLine, branchLines),
descriptionText,
rawAffixText,
sortOrder);
}
private static List<int> FindBranchStartIndexes(IReadOnlyList<string> lines)
{
var branchStartIndexes = new List<int>();
for (var index = 0; index < lines.Count; index++)
{
if (CriticalTableParserSupport.IsConditionalBranchStartLine(lines[index]))
{
branchStartIndexes.Add(index);
}
}
return branchStartIndexes;
}
private static IReadOnlyList<string> ValidateSegmentCount(
IReadOnlyList<string> lines,
ISet<string> affixLegendSymbols,
string scope)
{
if (lines.Count == 0)
{
return [];
}
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(lines, affixLegendSymbols);
return segmentCount > 2
? [$"{scope} interleaves prose and affix lines."]
: [];
}
private static (string RawText, string DescriptionText, string? RawAffixText) BuildTextSections(
IReadOnlyList<string> lines,
ISet<string> affixLegendSymbols)
{
var rawText = string.Join(Environment.NewLine, lines);
var rawAffixLines = lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionLines = lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
return (rawText, descriptionText, rawAffixText);
}
}

View File

@@ -169,15 +169,9 @@ internal static class CriticalTableParserSupport
return true;
}
if (value.StartsWith("with ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("without ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("if ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("while ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("until ", StringComparison.OrdinalIgnoreCase) ||
value.StartsWith("unless ", StringComparison.OrdinalIgnoreCase))
if (IsConditionalBranchStartLine(value))
{
return value.Contains(':', StringComparison.Ordinal);
return true;
}
if (affixLegendSymbols.Count > 0 &&
@@ -242,6 +236,23 @@ internal static class CriticalTableParserSupport
internal static string CollapseWhitespace(string value) =>
Regex.Replace(value.Trim(), @"\s+", " ");
internal static bool IsConditionalBranchStartLine(string value)
{
var normalized = value.Trim();
if (!normalized.Contains(':', StringComparison.Ordinal))
{
return false;
}
return normalized.StartsWith("with ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("w/o ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("without ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("if ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("while ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("until ", StringComparison.OrdinalIgnoreCase) ||
normalized.StartsWith("unless ", StringComparison.OrdinalIgnoreCase);
}
internal static string NormalizeText(string value) =>
value
.Replace('\u00a0', ' ')
@@ -250,6 +261,25 @@ internal static class CriticalTableParserSupport
.Replace('', '\'')
.Trim();
internal static string? NormalizeConditionKey(string conditionText)
{
var normalized = CollapseWhitespace(conditionText)
.ToLowerInvariant()
.Replace("w/o", "without", StringComparison.Ordinal);
normalized = Regex.Replace(normalized, @"[^a-z0-9]+", "_");
normalized = normalized.Trim('_');
return normalized.Length == 0 ? null : normalized;
}
internal static int FindKeyTop(IReadOnlyList<XmlTextFragment> fragments) =>
fragments
.Where(item =>
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
.Select(item => (int?)item.Top)
.Min() ?? int.MaxValue;
internal static HashSet<string> DetectAffixLegendSymbols(IReadOnlyList<XmlTextFragment> fragments, int keyTop)
{
if (keyTop == int.MaxValue)
@@ -347,6 +377,138 @@ internal static class CriticalTableParserSupport
return groups;
}
internal static List<RowAnchor> CreateRowAnchors(IReadOnlyList<XmlTextFragment> rowLabelFragments) =>
rowLabelFragments
.OrderBy(item => item.Top)
.Select((item, index) => new RowAnchor(NormalizeRollBandLabel(item.Text), item.Top, index + 1))
.ToList();
internal static List<XmlTextFragment> BuildBodyFragments(
IReadOnlyList<XmlTextFragment> fragments,
int bodyStartTop,
int keyTop,
int leftCutoff,
IReadOnlyList<RowAnchor> rowAnchors,
IReadOnlyCollection<XmlTextFragment> excludedFragments,
IReadOnlyList<(string Key, double CenterX)> columnCenters,
ISet<string> affixLegendSymbols)
{
var bodyFragments = fragments
.Where(item =>
item.Top >= bodyStartTop &&
item.Top < keyTop - TopGroupingTolerance &&
!IsFooterPageNumberFragment(item, keyTop) &&
!IsPotentialRowLabelFragment(item, leftCutoff) &&
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
!excludedFragments.Contains(item))
.ToList();
return SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
}
internal static void RepairLeadingAffixLeakage(List<ColumnarCellEntry> cellEntries, ISet<string> affixLegendSymbols)
{
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
var axes = cellEntries
.Select(item => (item.GroupKey, item.ColumnKey))
.Distinct()
.ToList();
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
{
foreach (var (groupKey, columnKey) in axes)
{
var current = cellEntries.SingleOrDefault(item =>
item.RowIndex == rowIndex &&
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
var next = cellEntries.SingleOrDefault(item =>
item.RowIndex == rowIndex + 1 &&
string.Equals(item.GroupKey, groupKey, StringComparison.Ordinal) &&
string.Equals(item.ColumnKey, columnKey, StringComparison.Ordinal));
if (current is null || next is null)
{
continue;
}
var leadingAffixCount = 0;
while (leadingAffixCount < next.Lines.Count && IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
{
leadingAffixCount++;
}
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
{
continue;
}
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
next.Lines.RemoveRange(0, leadingAffixCount);
}
}
}
internal static int ResolveRowBoundaryTop(
RowAnchor current,
RowAnchor next,
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
{
var linesBetweenLabels = bodyLines
.Where(item => item.Top >= current.Top && item.Top < next.Top)
.OrderBy(item => item.Top)
.ToList();
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
{
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
{
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
}
}
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
}
internal static void BuildParsedArtifacts(
IReadOnlyList<ColumnarCellEntry> cellEntries,
ISet<string> affixLegendSymbols,
List<ParsedCriticalCellArtifact> parsedCells,
List<ParsedCriticalResult> parsedResults,
List<string> validationErrors)
{
foreach (var cellEntry in cellEntries)
{
var content = CriticalCellTextParser.Parse(cellEntry.Lines, affixLegendSymbols);
validationErrors.AddRange(content.ValidationErrors.Select(error =>
$"Cell '{BuildCellIdentifier(cellEntry)}': {error}"));
parsedCells.Add(new ParsedCriticalCellArtifact(
cellEntry.GroupKey,
cellEntry.RollBandLabel,
cellEntry.ColumnKey,
cellEntry.Lines.ToList(),
content.BaseLines,
content.RawCellText,
content.DescriptionText,
content.RawAffixText,
content.Branches));
parsedResults.Add(new ParsedCriticalResult(
cellEntry.GroupKey,
cellEntry.ColumnKey,
cellEntry.RollBandLabel,
content.RawCellText,
content.DescriptionText,
content.RawAffixText,
content.Branches));
}
}
private static string BuildCellIdentifier(ColumnarCellEntry cellEntry) =>
cellEntry.GroupKey is null
? $"{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}"
: $"{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}";
private static bool LooksLikeSplitRollBandStart(string value) =>
Regex.IsMatch(value.Trim(), @"^\d{2,3}\s*-$");

View File

@@ -36,13 +36,7 @@ public sealed class GroupedVariantCriticalTableParser
groupHeaders.Max(item => item.Top),
columnHeaders.Max(item => item.Top))
+ CriticalTableParserSupport.HeaderToBodyMinimumGap;
var keyTop = fragments
.Where(item =>
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
.Select(item => (int?)item.Top)
.Min() ?? int.MaxValue;
var keyTop = CriticalTableParserSupport.FindKeyTop(fragments);
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
var leftCutoff = columnHeaders.Min(item => item.Left) - 10;
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
@@ -50,11 +44,7 @@ public sealed class GroupedVariantCriticalTableParser
leftCutoff,
bodyStartTop,
keyTop);
var rowAnchors = rowLabelFragments
.OrderBy(item => item.Top)
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
.ToList();
var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments);
if (rowAnchors.Count == 0)
{
@@ -65,34 +55,33 @@ public sealed class GroupedVariantCriticalTableParser
.Select(item => (item.CompositeKey, item.CenterX))
.ToList();
var bodyFragments = fragments
.Where(item =>
item.Top >= bodyStartTop &&
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance &&
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) &&
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) &&
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
!groupHeaders.Contains(item) &&
!columnHeaders.Contains(item))
.ToList();
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
var excludedFragments = groupHeaders.Concat(columnHeaders).ToList();
var bodyFragments = CriticalTableParserSupport.BuildBodyFragments(
fragments,
bodyStartTop,
keyTop,
leftCutoff,
rowAnchors,
excludedFragments,
columnCenters,
affixLegendSymbols);
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
var parsedRollBands = rowAnchors
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
.ToList();
var cellEntries = new List<CellEntry>();
var cellEntries = new List<ColumnarCellEntry>();
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
{
var rowStart = rowIndex == 0
? bodyStartTop
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
var rowEnd = rowIndex == rowAnchors.Count - 1
? keyTop - 1
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
var rowFragments = bodyFragments
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
@@ -112,7 +101,7 @@ public sealed class GroupedVariantCriticalTableParser
continue;
}
cellEntries.Add(new CellEntry(
cellEntries.Add(new ColumnarCellEntry(
anchor.GroupKey,
rowAnchors[rowIndex].Label,
rowIndex,
@@ -121,45 +110,11 @@ public sealed class GroupedVariantCriticalTableParser
}
}
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
var parsedCells = new List<ParsedCriticalCellArtifact>();
var parsedResults = new List<ParsedCriticalResult>();
foreach (var cellEntry in cellEntries
.OrderBy(item => item.RowIndex)
.ThenBy(item => item.GroupKey, StringComparer.Ordinal)
.ThenBy(item => item.ColumnKey, StringComparer.Ordinal))
{
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
if (segmentCount > 2)
{
validationErrors.Add($"Cell '{cellEntry.RollBandLabel}/{cellEntry.GroupKey}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
}
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
parsedCells.Add(new ParsedCriticalCellArtifact(
cellEntry.GroupKey,
cellEntry.RollBandLabel,
cellEntry.ColumnKey,
cellEntry.Lines,
rawCellText,
descriptionText,
rawAffixText));
parsedResults.Add(new ParsedCriticalResult(
cellEntry.GroupKey,
cellEntry.ColumnKey,
cellEntry.RollBandLabel,
rawCellText,
descriptionText,
rawAffixText));
}
CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors);
var expectedCellCount = rowAnchors.Count * ExpectedGroups.Length * ExpectedColumns.Length;
if (parsedCells.Count != expectedCellCount)
@@ -235,72 +190,4 @@ public sealed class GroupedVariantCriticalTableParser
throw new InvalidOperationException("Could not find the grouped-variant column header row in the XML artifact.");
}
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
{
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
var axes = cellEntries
.Select(item => (item.GroupKey, item.ColumnKey))
.Distinct()
.ToList();
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
{
foreach (var (groupKey, columnKey) in axes)
{
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.GroupKey == groupKey && item.ColumnKey == columnKey);
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.GroupKey == groupKey && item.ColumnKey == columnKey);
if (current is null || next is null)
{
continue;
}
var leadingAffixCount = 0;
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
{
leadingAffixCount++;
}
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
{
continue;
}
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
next.Lines.RemoveRange(0, leadingAffixCount);
}
}
}
private static int ResolveRowBoundaryTop(
RowAnchor current,
RowAnchor next,
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
{
var linesBetweenLabels = bodyLines
.Where(item => item.Top >= current.Top && item.Top < next.Top)
.OrderBy(item => item.Top)
.ToList();
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
{
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
{
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
}
}
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
}
private sealed record RowAnchor(string Label, int Top, int SortOrder);
private sealed class CellEntry(string groupKey, string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
{
public string GroupKey { get; } = groupKey;
public string RollBandLabel { get; } = rollBandLabel;
public int RowIndex { get; } = rowIndex;
public string ColumnKey { get; } = columnKey;
public List<string> Lines { get; } = lines;
}
}

View File

@@ -0,0 +1,19 @@
namespace RolemasterDb.ImportTool.Parsing;
public sealed class ParsedCriticalBranch(
string branchKind,
string? conditionKey,
string conditionText,
string rawText,
string descriptionText,
string? rawAffixText,
int sortOrder)
{
public string BranchKind { get; } = branchKind;
public string? ConditionKey { get; } = conditionKey;
public string ConditionText { get; } = conditionText;
public string RawText { get; } = rawText;
public string DescriptionText { get; } = descriptionText;
public string? RawAffixText { get; } = rawAffixText;
public int SortOrder { get; } = sortOrder;
}

View File

@@ -5,15 +5,19 @@ public sealed class ParsedCriticalCellArtifact(
string rollBandLabel,
string columnKey,
IReadOnlyList<string> lines,
IReadOnlyList<string> baseLines,
string rawCellText,
string descriptionText,
string? rawAffixText)
string? rawAffixText,
IReadOnlyList<ParsedCriticalBranch> branches)
{
public string? GroupKey { get; } = groupKey;
public string RollBandLabel { get; } = rollBandLabel;
public string ColumnKey { get; } = columnKey;
public IReadOnlyList<string> Lines { get; } = lines;
public IReadOnlyList<string> BaseLines { get; } = baseLines;
public string RawCellText { get; } = rawCellText;
public string DescriptionText { get; } = descriptionText;
public string? RawAffixText { get; } = rawAffixText;
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
}

View File

@@ -6,7 +6,8 @@ public sealed class ParsedCriticalResult(
string rollBandLabel,
string rawCellText,
string descriptionText,
string? rawAffixText)
string? rawAffixText,
IReadOnlyList<ParsedCriticalBranch> branches)
{
public string? GroupKey { get; } = groupKey;
public string ColumnKey { get; } = columnKey;
@@ -14,4 +15,5 @@ public sealed class ParsedCriticalResult(
public string RawCellText { get; } = rawCellText;
public string DescriptionText { get; } = descriptionText;
public string? RawAffixText { get; } = rawAffixText;
public IReadOnlyList<ParsedCriticalBranch> Branches { get; } = branches;
}

View File

@@ -0,0 +1,3 @@
namespace RolemasterDb.ImportTool.Parsing;
internal sealed record RowAnchor(string Label, int Top, int SortOrder);

View File

@@ -15,13 +15,7 @@ public sealed class StandardCriticalTableParser
.ToList();
var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap;
var keyTop = fragments
.Where(item =>
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
.Select(item => (int?)item.Top)
.Min() ?? int.MaxValue;
var keyTop = CriticalTableParserSupport.FindKeyTop(fragments);
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
var leftCutoff = headerFragments.Min(item => item.Left) - 10;
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
@@ -29,44 +23,39 @@ public sealed class StandardCriticalTableParser
leftCutoff,
bodyStartTop,
keyTop);
var rowAnchors = rowLabelFragments
.OrderBy(item => item.Top)
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
.ToList();
var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments);
if (rowAnchors.Count == 0)
{
validationErrors.Add("No roll-band labels were found in the XML artifact.");
}
var bodyFragments = fragments
.Where(item =>
item.Top >= bodyStartTop &&
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance &&
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) &&
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) &&
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
!headerFragments.Contains(item))
.ToList();
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
var bodyFragments = CriticalTableParserSupport.BuildBodyFragments(
fragments,
bodyStartTop,
keyTop,
leftCutoff,
rowAnchors,
headerFragments,
columnCenters,
affixLegendSymbols);
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
var parsedRollBands = rowAnchors
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
.ToList();
var cellEntries = new List<CellEntry>();
var cellEntries = new List<ColumnarCellEntry>();
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
{
var rowStart = rowIndex == 0
? bodyStartTop
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
var rowEnd = rowIndex == rowAnchors.Count - 1
? keyTop - 1
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
var rowFragments = bodyFragments
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
@@ -86,7 +75,8 @@ public sealed class StandardCriticalTableParser
continue;
}
cellEntries.Add(new CellEntry(
cellEntries.Add(new ColumnarCellEntry(
null,
rowAnchors[rowIndex].Label,
rowIndex,
columnAnchor.Key,
@@ -94,44 +84,11 @@ public sealed class StandardCriticalTableParser
}
}
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
var parsedCells = new List<ParsedCriticalCellArtifact>();
var parsedResults = new List<ParsedCriticalResult>();
foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey))
{
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
if (segmentCount > 2)
{
validationErrors.Add(
$"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
}
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
parsedCells.Add(new ParsedCriticalCellArtifact(
null,
cellEntry.RollBandLabel,
cellEntry.ColumnKey,
cellEntry.Lines,
rawCellText,
descriptionText,
rawAffixText));
parsedResults.Add(new ParsedCriticalResult(
null,
cellEntry.ColumnKey,
cellEntry.RollBandLabel,
rawCellText,
descriptionText,
rawAffixText));
}
CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors);
if (columnCenters.Count != 5)
{
@@ -185,68 +142,4 @@ public sealed class StandardCriticalTableParser
throw new InvalidOperationException("Could not find the standard-table A-E header row in the XML artifact.");
}
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
{
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
{
foreach (var columnKey in columnKeys)
{
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.ColumnKey == columnKey);
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.ColumnKey == columnKey);
if (current is null || next is null)
{
continue;
}
var leadingAffixCount = 0;
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
{
leadingAffixCount++;
}
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
{
continue;
}
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
next.Lines.RemoveRange(0, leadingAffixCount);
}
}
}
private static int ResolveRowBoundaryTop(
RowAnchor current,
RowAnchor next,
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
{
var linesBetweenLabels = bodyLines
.Where(item => item.Top >= current.Top && item.Top < next.Top)
.OrderBy(item => item.Top)
.ToList();
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
{
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
{
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
}
}
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
}
private sealed record RowAnchor(string Label, int Top, int SortOrder);
private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
{
public string RollBandLabel { get; } = rollBandLabel;
public int RowIndex { get; } = rowIndex;
public string ColumnKey { get; } = columnKey;
public List<string> Lines { get; } = lines;
}
}

View File

@@ -28,13 +28,7 @@ public sealed class VariantColumnCriticalTableParser
.ToList();
var bodyStartTop = headerFragments.Max(item => item.Top) + CriticalTableParserSupport.HeaderToBodyMinimumGap;
var keyTop = fragments
.Where(item =>
string.Equals(item.Text, "Key:", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("must parry", StringComparison.OrdinalIgnoreCase) ||
item.Text.Contains("attacker gets", StringComparison.OrdinalIgnoreCase))
.Select(item => (int?)item.Top)
.Min() ?? int.MaxValue;
var keyTop = CriticalTableParserSupport.FindKeyTop(fragments);
var affixLegendSymbols = CriticalTableParserSupport.DetectAffixLegendSymbols(fragments, keyTop);
var leftCutoff = headerFragments.Min(item => item.Left) - 10;
var rowLabelFragments = CriticalTableParserSupport.FindRowLabelFragments(
@@ -42,11 +36,7 @@ public sealed class VariantColumnCriticalTableParser
leftCutoff,
bodyStartTop,
keyTop);
var rowAnchors = rowLabelFragments
.OrderBy(item => item.Top)
.Select((item, index) => new RowAnchor(CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), item.Top, index + 1))
.ToList();
var rowAnchors = CriticalTableParserSupport.CreateRowAnchors(rowLabelFragments);
if (rowAnchors.Count == 0)
{
@@ -57,33 +47,32 @@ public sealed class VariantColumnCriticalTableParser
.Select(item => (item.Key, item.CenterX))
.ToList();
var bodyFragments = fragments
.Where(item =>
item.Top >= bodyStartTop &&
item.Top < keyTop - CriticalTableParserSupport.TopGroupingTolerance &&
!CriticalTableParserSupport.IsFooterPageNumberFragment(item, keyTop) &&
!CriticalTableParserSupport.IsPotentialRowLabelFragment(item, leftCutoff) &&
!rowAnchors.Any(anchor => anchor.Top == item.Top && string.Equals(anchor.Label, CriticalTableParserSupport.NormalizeRollBandLabel(item.Text), StringComparison.OrdinalIgnoreCase)) &&
!headerFragments.Contains(item))
.ToList();
bodyFragments = CriticalTableParserSupport.SplitBoundaryCrossingAffixFragments(bodyFragments, columnCenters, affixLegendSymbols);
var bodyFragments = CriticalTableParserSupport.BuildBodyFragments(
fragments,
bodyStartTop,
keyTop,
leftCutoff,
rowAnchors,
headerFragments,
columnCenters,
affixLegendSymbols);
var bodyLines = CriticalTableParserSupport.BuildBodyLines(bodyFragments, columnCenters, affixLegendSymbols);
var parsedRollBands = rowAnchors
.Select(anchor => CriticalTableParserSupport.CreateRollBand(anchor.Label, anchor.SortOrder))
.ToList();
var cellEntries = new List<CellEntry>();
var cellEntries = new List<ColumnarCellEntry>();
for (var rowIndex = 0; rowIndex < rowAnchors.Count; rowIndex++)
{
var rowStart = rowIndex == 0
? bodyStartTop
: ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex - 1], rowAnchors[rowIndex], bodyLines);
var rowEnd = rowIndex == rowAnchors.Count - 1
? keyTop - 1
: ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
: CriticalTableParserSupport.ResolveRowBoundaryTop(rowAnchors[rowIndex], rowAnchors[rowIndex + 1], bodyLines);
var rowFragments = bodyFragments
.Where(item => item.Top >= rowStart && item.Top < rowEnd)
@@ -103,7 +92,8 @@ public sealed class VariantColumnCriticalTableParser
continue;
}
cellEntries.Add(new CellEntry(
cellEntries.Add(new ColumnarCellEntry(
null,
rowAnchors[rowIndex].Label,
rowIndex,
columnAnchor.Key,
@@ -111,42 +101,11 @@ public sealed class VariantColumnCriticalTableParser
}
}
RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
CriticalTableParserSupport.RepairLeadingAffixLeakage(cellEntries, affixLegendSymbols);
var parsedCells = new List<ParsedCriticalCellArtifact>();
var parsedResults = new List<ParsedCriticalResult>();
foreach (var cellEntry in cellEntries.OrderBy(item => item.RowIndex).ThenBy(item => item.ColumnKey, StringComparer.Ordinal))
{
var segmentCount = CriticalTableParserSupport.CountLineTypeSegments(cellEntry.Lines, affixLegendSymbols);
if (segmentCount > 2)
{
validationErrors.Add($"Cell '{cellEntry.RollBandLabel}/{cellEntry.ColumnKey}' interleaves prose and affix lines.");
}
var rawAffixLines = cellEntry.Lines.Where(line => CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var descriptionLines = cellEntry.Lines.Where(line => !CriticalTableParserSupport.IsAffixLikeLine(line, affixLegendSymbols)).ToList();
var rawCellText = string.Join(Environment.NewLine, cellEntry.Lines);
var descriptionText = CriticalTableParserSupport.CollapseWhitespace(string.Join(' ', descriptionLines));
var rawAffixText = rawAffixLines.Count == 0 ? null : string.Join(Environment.NewLine, rawAffixLines);
parsedCells.Add(new ParsedCriticalCellArtifact(
null,
cellEntry.RollBandLabel,
cellEntry.ColumnKey,
cellEntry.Lines,
rawCellText,
descriptionText,
rawAffixText));
parsedResults.Add(new ParsedCriticalResult(
null,
cellEntry.ColumnKey,
cellEntry.RollBandLabel,
rawCellText,
descriptionText,
rawAffixText));
}
CriticalTableParserSupport.BuildParsedArtifacts(cellEntries, affixLegendSymbols, parsedCells, parsedResults, validationErrors);
if (columnAnchors.Count != ExpectedColumns.Length)
{
@@ -208,69 +167,5 @@ public sealed class VariantColumnCriticalTableParser
ExpectedColumns.SingleOrDefault(item => string.Equals(item.Label, value.Trim(), StringComparison.OrdinalIgnoreCase))
?? throw new InvalidOperationException($"Unsupported variant column label '{value}'.");
private static void RepairLeadingAffixLeakage(List<CellEntry> cellEntries, ISet<string> affixLegendSymbols)
{
var maxRowIndex = cellEntries.Count == 0 ? -1 : cellEntries.Max(item => item.RowIndex);
var columnKeys = cellEntries.Select(item => item.ColumnKey).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
for (var rowIndex = 0; rowIndex < maxRowIndex; rowIndex++)
{
foreach (var columnKey in columnKeys)
{
var current = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex && item.ColumnKey == columnKey);
var next = cellEntries.SingleOrDefault(item => item.RowIndex == rowIndex + 1 && item.ColumnKey == columnKey);
if (current is null || next is null)
{
continue;
}
var leadingAffixCount = 0;
while (leadingAffixCount < next.Lines.Count && CriticalTableParserSupport.IsAffixLikeLine(next.Lines[leadingAffixCount], affixLegendSymbols))
{
leadingAffixCount++;
}
if (leadingAffixCount == 0 || leadingAffixCount == next.Lines.Count)
{
continue;
}
current.Lines.AddRange(next.Lines.Take(leadingAffixCount));
next.Lines.RemoveRange(0, leadingAffixCount);
}
}
}
private static int ResolveRowBoundaryTop(
RowAnchor current,
RowAnchor next,
IReadOnlyList<(int Top, bool IsAffixLike)> bodyLines)
{
var linesBetweenLabels = bodyLines
.Where(item => item.Top >= current.Top && item.Top < next.Top)
.OrderBy(item => item.Top)
.ToList();
for (var index = linesBetweenLabels.Count - 2; index >= 0; index--)
{
if (linesBetweenLabels[index].IsAffixLike && !linesBetweenLabels[index + 1].IsAffixLike)
{
return (int)Math.Floor((linesBetweenLabels[index].Top + linesBetweenLabels[index + 1].Top) / 2.0) + 1;
}
}
return (int)Math.Floor((current.Top + next.Top) / 2.0) + 1;
}
private sealed record ColumnDefinition(string Key, string Label);
private sealed record RowAnchor(string Label, int Top, int SortOrder);
private sealed class CellEntry(string rollBandLabel, int rowIndex, string columnKey, List<string> lines)
{
public string RollBandLabel { get; } = rollBandLabel;
public int RowIndex { get; } = rowIndex;
public string ColumnKey { get; } = columnKey;
public List<string> Lines { get; } = lines;
}
}