Files
RolemasterDB/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs

118 lines
4.1 KiB
C#

using System.Text;
namespace RolemasterDb.ImportTool;
public sealed class ImportArtifactPaths
{
private ImportArtifactPaths(
string artifactsRootPath,
string tableSlug,
string directoryPath,
string xmlPath,
string ocrTsvPath,
string fragmentsJsonPath,
string parsedCellsJsonPath,
string validationReportPath,
string ocrPagesDirectoryPath,
string pagesDirectoryPath,
string cellsDirectoryPath)
{
ArtifactsRootPath = artifactsRootPath;
TableSlug = tableSlug;
DirectoryPath = directoryPath;
XmlPath = xmlPath;
OcrTsvPath = ocrTsvPath;
FragmentsJsonPath = fragmentsJsonPath;
ParsedCellsJsonPath = parsedCellsJsonPath;
ValidationReportPath = validationReportPath;
OcrPagesDirectoryPath = ocrPagesDirectoryPath;
PagesDirectoryPath = pagesDirectoryPath;
CellsDirectoryPath = cellsDirectoryPath;
}
public string ArtifactsRootPath { get; }
public string TableSlug { get; }
public string DirectoryPath { get; }
public string XmlPath { get; }
public string OcrTsvPath { get; }
public string FragmentsJsonPath { get; }
public string ParsedCellsJsonPath { get; }
public string ValidationReportPath { get; }
public string OcrPagesDirectoryPath { get; }
public string PagesDirectoryPath { get; }
public string CellsDirectoryPath { get; }
public static ImportArtifactPaths Create(string artifactsRootPath, string tableSlug)
{
var directoryPath = Path.Combine(artifactsRootPath, tableSlug);
var ocrPagesDirectoryPath = Path.Combine(directoryPath, "ocr-pages");
var pagesDirectoryPath = Path.Combine(directoryPath, "pages");
var cellsDirectoryPath = Path.Combine(directoryPath, "cells");
return new ImportArtifactPaths(
artifactsRootPath,
tableSlug,
directoryPath,
Path.Combine(directoryPath, "source.xml"),
Path.Combine(directoryPath, "source.ocr.tsv"),
Path.Combine(directoryPath, "fragments.json"),
Path.Combine(directoryPath, "parsed-cells.json"),
Path.Combine(directoryPath, "validation-report.json"),
ocrPagesDirectoryPath,
pagesDirectoryPath,
cellsDirectoryPath);
}
public string GetSourceArtifactPath(string extractionMethod) =>
string.Equals(extractionMethod, "ocr", StringComparison.OrdinalIgnoreCase)
? OcrTsvPath
: XmlPath;
public string GetOcrPageImagePath(int pageNumber) =>
Path.Combine(OcrPagesDirectoryPath, $"page-{pageNumber:000}.png");
public string GetPageImagePath(int pageNumber) =>
Path.Combine(PagesDirectoryPath, $"page-{pageNumber:000}.png");
public string GetRelativeCellImagePath(string? groupKey, string columnKey, string rollBandLabel) =>
Path.Combine(
TableSlug,
"cells",
$"{NormalizeFileSegment(groupKey ?? "none")}__{NormalizeFileSegment(columnKey)}__{NormalizeFileSegment(rollBandLabel)}.png")
.Replace('\\', '/');
public string ResolveRelativePath(string relativePath) =>
Path.GetFullPath(Path.Combine(ArtifactsRootPath, relativePath.Replace('/', Path.DirectorySeparatorChar)));
private static string NormalizeFileSegment(string value)
{
var builder = new StringBuilder();
foreach (var character in value.Trim().ToLowerInvariant())
{
if (char.IsLetterOrDigit(character))
{
builder.Append(character);
continue;
}
if (character is '-' or '_')
{
builder.Append(character);
continue;
}
if (character == '+')
{
builder.Append("plus");
continue;
}
builder.Append('_');
}
var normalized = builder.ToString().Trim('_');
return string.IsNullOrWhiteSpace(normalized) ? "empty" : normalized;
}
}