118 lines
4.1 KiB
C#
118 lines
4.1 KiB
C#
using System.Text;
|
|
|
|
namespace RolemasterDb.ImportTool;
|
|
|
|
public sealed class ImportArtifactPaths
|
|
{
|
|
private ImportArtifactPaths(
|
|
string artifactsRootPath,
|
|
string tableSlug,
|
|
string directoryPath,
|
|
string xmlPath,
|
|
string ocrTsvPath,
|
|
string fragmentsJsonPath,
|
|
string parsedCellsJsonPath,
|
|
string validationReportPath,
|
|
string ocrPagesDirectoryPath,
|
|
string pagesDirectoryPath,
|
|
string cellsDirectoryPath)
|
|
{
|
|
ArtifactsRootPath = artifactsRootPath;
|
|
TableSlug = tableSlug;
|
|
DirectoryPath = directoryPath;
|
|
XmlPath = xmlPath;
|
|
OcrTsvPath = ocrTsvPath;
|
|
FragmentsJsonPath = fragmentsJsonPath;
|
|
ParsedCellsJsonPath = parsedCellsJsonPath;
|
|
ValidationReportPath = validationReportPath;
|
|
OcrPagesDirectoryPath = ocrPagesDirectoryPath;
|
|
PagesDirectoryPath = pagesDirectoryPath;
|
|
CellsDirectoryPath = cellsDirectoryPath;
|
|
}
|
|
|
|
public string ArtifactsRootPath { get; }
|
|
public string TableSlug { get; }
|
|
public string DirectoryPath { get; }
|
|
public string XmlPath { get; }
|
|
public string OcrTsvPath { get; }
|
|
public string FragmentsJsonPath { get; }
|
|
public string ParsedCellsJsonPath { get; }
|
|
public string ValidationReportPath { get; }
|
|
public string OcrPagesDirectoryPath { get; }
|
|
public string PagesDirectoryPath { get; }
|
|
public string CellsDirectoryPath { get; }
|
|
|
|
public static ImportArtifactPaths Create(string artifactsRootPath, string tableSlug)
|
|
{
|
|
var directoryPath = Path.Combine(artifactsRootPath, tableSlug);
|
|
var ocrPagesDirectoryPath = Path.Combine(directoryPath, "ocr-pages");
|
|
var pagesDirectoryPath = Path.Combine(directoryPath, "pages");
|
|
var cellsDirectoryPath = Path.Combine(directoryPath, "cells");
|
|
|
|
return new ImportArtifactPaths(
|
|
artifactsRootPath,
|
|
tableSlug,
|
|
directoryPath,
|
|
Path.Combine(directoryPath, "source.xml"),
|
|
Path.Combine(directoryPath, "source.ocr.tsv"),
|
|
Path.Combine(directoryPath, "fragments.json"),
|
|
Path.Combine(directoryPath, "parsed-cells.json"),
|
|
Path.Combine(directoryPath, "validation-report.json"),
|
|
ocrPagesDirectoryPath,
|
|
pagesDirectoryPath,
|
|
cellsDirectoryPath);
|
|
}
|
|
|
|
public string GetSourceArtifactPath(string extractionMethod) =>
|
|
string.Equals(extractionMethod, "ocr", StringComparison.OrdinalIgnoreCase)
|
|
? OcrTsvPath
|
|
: XmlPath;
|
|
|
|
public string GetOcrPageImagePath(int pageNumber) =>
|
|
Path.Combine(OcrPagesDirectoryPath, $"page-{pageNumber:000}.png");
|
|
|
|
public string GetPageImagePath(int pageNumber) =>
|
|
Path.Combine(PagesDirectoryPath, $"page-{pageNumber:000}.png");
|
|
|
|
public string GetRelativeCellImagePath(string? groupKey, string columnKey, string rollBandLabel) =>
|
|
Path.Combine(
|
|
TableSlug,
|
|
"cells",
|
|
$"{NormalizeFileSegment(groupKey ?? "none")}__{NormalizeFileSegment(columnKey)}__{NormalizeFileSegment(rollBandLabel)}.png")
|
|
.Replace('\\', '/');
|
|
|
|
public string ResolveRelativePath(string relativePath) =>
|
|
Path.GetFullPath(Path.Combine(ArtifactsRootPath, relativePath.Replace('/', Path.DirectorySeparatorChar)));
|
|
|
|
private static string NormalizeFileSegment(string value)
|
|
{
|
|
var builder = new StringBuilder();
|
|
|
|
foreach (var character in value.Trim().ToLowerInvariant())
|
|
{
|
|
if (char.IsLetterOrDigit(character))
|
|
{
|
|
builder.Append(character);
|
|
continue;
|
|
}
|
|
|
|
if (character is '-' or '_')
|
|
{
|
|
builder.Append(character);
|
|
continue;
|
|
}
|
|
|
|
if (character == '+')
|
|
{
|
|
builder.Append("plus");
|
|
continue;
|
|
}
|
|
|
|
builder.Append('_');
|
|
}
|
|
|
|
var normalized = builder.ToString().Trim('_');
|
|
return string.IsNullOrWhiteSpace(normalized) ? "empty" : normalized;
|
|
}
|
|
}
|