Add OCR import support for void critical table
This commit is contained in:
@@ -9,9 +9,11 @@ public sealed class ImportArtifactPaths
|
||||
string tableSlug,
|
||||
string directoryPath,
|
||||
string xmlPath,
|
||||
string ocrTsvPath,
|
||||
string fragmentsJsonPath,
|
||||
string parsedCellsJsonPath,
|
||||
string validationReportPath,
|
||||
string ocrPagesDirectoryPath,
|
||||
string pagesDirectoryPath,
|
||||
string cellsDirectoryPath)
|
||||
{
|
||||
@@ -19,9 +21,11 @@ public sealed class ImportArtifactPaths
|
||||
TableSlug = tableSlug;
|
||||
DirectoryPath = directoryPath;
|
||||
XmlPath = xmlPath;
|
||||
OcrTsvPath = ocrTsvPath;
|
||||
FragmentsJsonPath = fragmentsJsonPath;
|
||||
ParsedCellsJsonPath = parsedCellsJsonPath;
|
||||
ValidationReportPath = validationReportPath;
|
||||
OcrPagesDirectoryPath = ocrPagesDirectoryPath;
|
||||
PagesDirectoryPath = pagesDirectoryPath;
|
||||
CellsDirectoryPath = cellsDirectoryPath;
|
||||
}
|
||||
@@ -30,15 +34,18 @@ public sealed class ImportArtifactPaths
|
||||
public string TableSlug { get; }
|
||||
public string DirectoryPath { get; }
|
||||
public string XmlPath { get; }
|
||||
public string OcrTsvPath { get; }
|
||||
public string FragmentsJsonPath { get; }
|
||||
public string ParsedCellsJsonPath { get; }
|
||||
public string ValidationReportPath { get; }
|
||||
public string OcrPagesDirectoryPath { get; }
|
||||
public string PagesDirectoryPath { get; }
|
||||
public string CellsDirectoryPath { get; }
|
||||
|
||||
public static ImportArtifactPaths Create(string artifactsRootPath, string tableSlug)
|
||||
{
|
||||
var directoryPath = Path.Combine(artifactsRootPath, tableSlug);
|
||||
var ocrPagesDirectoryPath = Path.Combine(directoryPath, "ocr-pages");
|
||||
var pagesDirectoryPath = Path.Combine(directoryPath, "pages");
|
||||
var cellsDirectoryPath = Path.Combine(directoryPath, "cells");
|
||||
|
||||
@@ -47,13 +54,23 @@ public sealed class ImportArtifactPaths
|
||||
tableSlug,
|
||||
directoryPath,
|
||||
Path.Combine(directoryPath, "source.xml"),
|
||||
Path.Combine(directoryPath, "source.ocr.tsv"),
|
||||
Path.Combine(directoryPath, "fragments.json"),
|
||||
Path.Combine(directoryPath, "parsed-cells.json"),
|
||||
Path.Combine(directoryPath, "validation-report.json"),
|
||||
ocrPagesDirectoryPath,
|
||||
pagesDirectoryPath,
|
||||
cellsDirectoryPath);
|
||||
}
|
||||
|
||||
public string GetSourceArtifactPath(string extractionMethod) =>
|
||||
string.Equals(extractionMethod, "ocr", StringComparison.OrdinalIgnoreCase)
|
||||
? OcrTsvPath
|
||||
: XmlPath;
|
||||
|
||||
public string GetOcrPageImagePath(int pageNumber) =>
|
||||
Path.Combine(OcrPagesDirectoryPath, $"page-{pageNumber:000}.png");
|
||||
|
||||
public string GetPageImagePath(int pageNumber) =>
|
||||
Path.Combine(PagesDirectoryPath, $"page-{pageNumber:000}.png");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user