diff --git a/src/RolemasterDb.App/rolemaster.db b/src/RolemasterDb.App/rolemaster.db index 8b07266..183b54e 100644 Binary files a/src/RolemasterDb.App/rolemaster.db and b/src/RolemasterDb.App/rolemaster.db differ diff --git a/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs b/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs index 238775f..4cbaf30 100644 --- a/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs +++ b/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs @@ -5,7 +5,8 @@ namespace RolemasterDb.ImportTool; public sealed class CriticalImportCommandRunner { private readonly CriticalImportManifestLoader manifestLoader = new(); - private readonly PdfTextExtractor pdfTextExtractor = new(); + private readonly ImportArtifactWriter artifactWriter = new(); + private readonly PdfXmlExtractor pdfXmlExtractor = new(); private readonly StandardCriticalTableParser standardParser = new(); public async Task RunAsync(ResetOptions options) @@ -26,8 +27,8 @@ public sealed class CriticalImportCommandRunner { var entry = GetManifestEntry(options.Table); var artifactPaths = CreateArtifactPaths(entry.Slug); - await pdfTextExtractor.ExtractAsync(ResolveRepositoryPath(entry.PdfPath), artifactPaths.ExtractedTextPath); - Console.WriteLine($"Extracted {entry.Slug} to {artifactPaths.ExtractedTextPath}"); + await pdfXmlExtractor.ExtractAsync(ResolveRepositoryPath(entry.PdfPath), artifactPaths.XmlPath); + Console.WriteLine($"Extracted {entry.Slug} to {artifactPaths.XmlPath}"); return 0; } @@ -36,16 +37,24 @@ public sealed class CriticalImportCommandRunner var entry = GetManifestEntry(options.Table); var artifactPaths = CreateArtifactPaths(entry.Slug); - if (!File.Exists(artifactPaths.ExtractedTextPath)) + if (!File.Exists(artifactPaths.XmlPath)) { - Console.Error.WriteLine($"Missing extracted text artifact: {artifactPaths.ExtractedTextPath}"); + Console.Error.WriteLine($"Missing XML artifact: {artifactPaths.XmlPath}"); return 1; } - var extractedText = await File.ReadAllTextAsync(artifactPaths.ExtractedTextPath); - var parsedTable = Parse(entry, extractedText); + var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath); + var parseResult = Parse(entry, xmlContent); + await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None); + + if (!parseResult.ValidationReport.IsValid) + { + throw new InvalidOperationException( + $"Validation failed for '{entry.Slug}'. See {artifactPaths.ValidationReportPath} for details."); + } + var loader = new CriticalImportLoader(ResolveDatabasePath(options.DatabasePath)); - var result = await loader.LoadAsync(parsedTable); + var result = await loader.LoadAsync(parseResult.Table); Console.WriteLine( $"Loaded {result.TableSlug}: {result.ColumnCount} columns, {result.RollBandCount} roll bands, {result.ResultCount} results."); @@ -82,14 +91,14 @@ public sealed class CriticalImportCommandRunner ?? throw new InvalidOperationException($"No enabled manifest entry was found for '{tableSlug}'."); } - private ParsedCriticalTable Parse(CriticalImportManifestEntry entry, string extractedText) + private StandardCriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent) { if (!string.Equals(entry.Family, "standard", StringComparison.OrdinalIgnoreCase)) { - throw new InvalidOperationException($"Family '{entry.Family}' is not supported by phase 1."); + throw new InvalidOperationException($"Family '{entry.Family}' is not supported by phase 2."); } - return standardParser.Parse(entry, extractedText); + return standardParser.Parse(entry, xmlContent); } private static ImportArtifactPaths CreateArtifactPaths(string slug) => diff --git a/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs b/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs index 7965855..8b45346 100644 --- a/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs +++ b/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs @@ -2,18 +2,34 @@ namespace RolemasterDb.ImportTool; public sealed class ImportArtifactPaths { - private ImportArtifactPaths(string directoryPath, string extractedTextPath) + private ImportArtifactPaths( + string directoryPath, + string xmlPath, + string fragmentsJsonPath, + string parsedCellsJsonPath, + string validationReportPath) { DirectoryPath = directoryPath; - ExtractedTextPath = extractedTextPath; + XmlPath = xmlPath; + FragmentsJsonPath = fragmentsJsonPath; + ParsedCellsJsonPath = parsedCellsJsonPath; + ValidationReportPath = validationReportPath; } public string DirectoryPath { get; } - public string ExtractedTextPath { get; } + public string XmlPath { get; } + public string FragmentsJsonPath { get; } + public string ParsedCellsJsonPath { get; } + public string ValidationReportPath { get; } public static ImportArtifactPaths Create(string artifactsRootPath, string tableSlug) { var directoryPath = Path.Combine(artifactsRootPath, tableSlug); - return new ImportArtifactPaths(directoryPath, Path.Combine(directoryPath, "extracted.txt")); + return new ImportArtifactPaths( + directoryPath, + Path.Combine(directoryPath, "source.xml"), + Path.Combine(directoryPath, "fragments.json"), + Path.Combine(directoryPath, "parsed-cells.json"), + Path.Combine(directoryPath, "validation-report.json")); } } diff --git a/src/RolemasterDb.ImportTool/ImportArtifactWriter.cs b/src/RolemasterDb.ImportTool/ImportArtifactWriter.cs new file mode 100644 index 0000000..53c3cc4 --- /dev/null +++ b/src/RolemasterDb.ImportTool/ImportArtifactWriter.cs @@ -0,0 +1,33 @@ +using System.Text.Json; + +using RolemasterDb.ImportTool.Parsing; + +namespace RolemasterDb.ImportTool; + +public sealed class ImportArtifactWriter +{ + private static readonly JsonSerializerOptions JsonOptions = new() + { + WriteIndented = true + }; + + public async Task WriteAsync(ImportArtifactPaths artifactPaths, StandardCriticalTableParseResult parseResult, CancellationToken cancellationToken = default) + { + Directory.CreateDirectory(artifactPaths.DirectoryPath); + + await File.WriteAllTextAsync( + artifactPaths.FragmentsJsonPath, + JsonSerializer.Serialize(parseResult.Fragments, JsonOptions), + cancellationToken); + + await File.WriteAllTextAsync( + artifactPaths.ParsedCellsJsonPath, + JsonSerializer.Serialize(parseResult.Cells, JsonOptions), + cancellationToken); + + await File.WriteAllTextAsync( + artifactPaths.ValidationReportPath, + JsonSerializer.Serialize(parseResult.ValidationReport, JsonOptions), + cancellationToken); + } +} diff --git a/src/RolemasterDb.ImportTool/Parsing/ImportValidationReport.cs b/src/RolemasterDb.ImportTool/Parsing/ImportValidationReport.cs new file mode 100644 index 0000000..f9a4c73 --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/ImportValidationReport.cs @@ -0,0 +1,13 @@ +namespace RolemasterDb.ImportTool.Parsing; + +public sealed class ImportValidationReport( + bool isValid, + IReadOnlyList errors, + int rowCount, + int cellCount) +{ + public bool IsValid { get; } = isValid; + public IReadOnlyList Errors { get; } = errors; + public int RowCount { get; } = rowCount; + public int CellCount { get; } = cellCount; +} diff --git a/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs b/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs new file mode 100644 index 0000000..76475a3 --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs @@ -0,0 +1,17 @@ +namespace RolemasterDb.ImportTool.Parsing; + +public sealed class ParsedCriticalCellArtifact( + string rollBandLabel, + string columnKey, + IReadOnlyList lines, + string rawCellText, + string descriptionText, + string? rawAffixText) +{ + public string RollBandLabel { get; } = rollBandLabel; + public string ColumnKey { get; } = columnKey; + public IReadOnlyList Lines { get; } = lines; + public string RawCellText { get; } = rawCellText; + public string DescriptionText { get; } = descriptionText; + public string? RawAffixText { get; } = rawAffixText; +} diff --git a/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParseResult.cs b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParseResult.cs new file mode 100644 index 0000000..0b5182a --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParseResult.cs @@ -0,0 +1,13 @@ +namespace RolemasterDb.ImportTool.Parsing; + +public sealed class StandardCriticalTableParseResult( + ParsedCriticalTable table, + IReadOnlyList fragments, + IReadOnlyList cells, + ImportValidationReport validationReport) +{ + public ParsedCriticalTable Table { get; } = table; + public IReadOnlyList Fragments { get; } = fragments; + public IReadOnlyList Cells { get; } = cells; + public ImportValidationReport ValidationReport { get; } = validationReport; +} diff --git a/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs index 7e52e80..876c9cd 100644 --- a/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs +++ b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs @@ -1,208 +1,206 @@ using System.Text.RegularExpressions; +using System.Xml; +using System.Xml.Linq; namespace RolemasterDb.ImportTool.Parsing; public sealed class StandardCriticalTableParser { - private static readonly Regex ColumnRegex = new(@"\b([A-E])\b", RegexOptions.IgnoreCase | RegexOptions.Compiled); - private static readonly Regex RollBandRegex = new(@"^\s*(?