Use XML geometry for critical PDF import
This commit is contained in:
36
src/RolemasterDb.ImportTool/PdfXmlExtractor.cs
Normal file
36
src/RolemasterDb.ImportTool/PdfXmlExtractor.cs
Normal file
@@ -0,0 +1,36 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace RolemasterDb.ImportTool;
|
||||
|
||||
public sealed class PdfXmlExtractor
|
||||
{
|
||||
public async Task ExtractAsync(string pdfPath, string outputPath, CancellationToken cancellationToken = default)
|
||||
{
|
||||
Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!);
|
||||
|
||||
var startInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = "pdftohtml",
|
||||
RedirectStandardError = true,
|
||||
RedirectStandardOutput = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
|
||||
startInfo.ArgumentList.Add("-xml");
|
||||
startInfo.ArgumentList.Add("-i");
|
||||
startInfo.ArgumentList.Add("-noframes");
|
||||
startInfo.ArgumentList.Add(pdfPath);
|
||||
startInfo.ArgumentList.Add(outputPath);
|
||||
|
||||
using var process = new Process { StartInfo = startInfo };
|
||||
process.Start();
|
||||
await process.WaitForExitAsync(cancellationToken);
|
||||
|
||||
if (process.ExitCode != 0)
|
||||
{
|
||||
var error = await process.StandardError.ReadToEndAsync(cancellationToken);
|
||||
throw new InvalidOperationException($"pdftohtml failed for '{pdfPath}': {error}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user