118 lines
4.0 KiB
C#
118 lines
4.0 KiB
C#
using System.Diagnostics;
|
|
|
|
namespace RolemasterDb.ImportTool;
|
|
|
|
public sealed class PdfXmlExtractor
|
|
{
|
|
public const int XmlAlignedRenderDpi = 108;
|
|
|
|
public async Task ExtractAsync(string pdfPath, string outputPath, CancellationToken cancellationToken = default)
|
|
{
|
|
Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!);
|
|
|
|
var startInfo = new ProcessStartInfo
|
|
{
|
|
FileName = "pdftohtml",
|
|
RedirectStandardError = true,
|
|
RedirectStandardOutput = true,
|
|
UseShellExecute = false,
|
|
CreateNoWindow = true
|
|
};
|
|
|
|
startInfo.ArgumentList.Add("-xml");
|
|
startInfo.ArgumentList.Add("-i");
|
|
startInfo.ArgumentList.Add("-noframes");
|
|
startInfo.ArgumentList.Add(pdfPath);
|
|
startInfo.ArgumentList.Add(outputPath);
|
|
|
|
using var process = new Process { StartInfo = startInfo };
|
|
process.Start();
|
|
await process.WaitForExitAsync(cancellationToken);
|
|
|
|
if (process.ExitCode != 0)
|
|
{
|
|
var error = await process.StandardError.ReadToEndAsync(cancellationToken);
|
|
throw new InvalidOperationException($"pdftohtml failed for '{pdfPath}': {error}");
|
|
}
|
|
}
|
|
|
|
public Task RenderPagePngAsync(
|
|
string pdfPath,
|
|
int pageNumber,
|
|
string outputPath,
|
|
CancellationToken cancellationToken = default) =>
|
|
RenderPngAsync(pdfPath, pageNumber, outputPath, null, null, null, null, cancellationToken);
|
|
|
|
public Task RenderCropPngAsync(
|
|
string pdfPath,
|
|
int pageNumber,
|
|
int left,
|
|
int top,
|
|
int width,
|
|
int height,
|
|
string outputPath,
|
|
CancellationToken cancellationToken = default) =>
|
|
RenderPngAsync(pdfPath, pageNumber, outputPath, left, top, width, height, cancellationToken);
|
|
|
|
private static async Task RenderPngAsync(
|
|
string pdfPath,
|
|
int pageNumber,
|
|
string outputPath,
|
|
int? left,
|
|
int? top,
|
|
int? width,
|
|
int? height,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!);
|
|
|
|
var startInfo = new ProcessStartInfo
|
|
{
|
|
FileName = "pdftoppm",
|
|
RedirectStandardError = true,
|
|
RedirectStandardOutput = true,
|
|
UseShellExecute = false,
|
|
CreateNoWindow = true
|
|
};
|
|
|
|
startInfo.ArgumentList.Add("-png");
|
|
startInfo.ArgumentList.Add("-r");
|
|
startInfo.ArgumentList.Add(XmlAlignedRenderDpi.ToString());
|
|
startInfo.ArgumentList.Add("-f");
|
|
startInfo.ArgumentList.Add(pageNumber.ToString());
|
|
startInfo.ArgumentList.Add("-l");
|
|
startInfo.ArgumentList.Add(pageNumber.ToString());
|
|
startInfo.ArgumentList.Add("-singlefile");
|
|
|
|
if (left.HasValue && top.HasValue && width.HasValue && height.HasValue)
|
|
{
|
|
startInfo.ArgumentList.Add("-x");
|
|
startInfo.ArgumentList.Add(left.Value.ToString());
|
|
startInfo.ArgumentList.Add("-y");
|
|
startInfo.ArgumentList.Add(top.Value.ToString());
|
|
startInfo.ArgumentList.Add("-W");
|
|
startInfo.ArgumentList.Add(width.Value.ToString());
|
|
startInfo.ArgumentList.Add("-H");
|
|
startInfo.ArgumentList.Add(height.Value.ToString());
|
|
}
|
|
|
|
startInfo.ArgumentList.Add(pdfPath);
|
|
startInfo.ArgumentList.Add(Path.Combine(Path.GetDirectoryName(outputPath)!, Path.GetFileNameWithoutExtension(outputPath)));
|
|
|
|
using var process = new Process { StartInfo = startInfo };
|
|
process.Start();
|
|
await process.WaitForExitAsync(cancellationToken);
|
|
|
|
if (process.ExitCode != 0)
|
|
{
|
|
var error = await process.StandardError.ReadToEndAsync(cancellationToken);
|
|
throw new InvalidOperationException($"pdftoppm failed for '{pdfPath}': {error}");
|
|
}
|
|
|
|
if (!File.Exists(outputPath))
|
|
{
|
|
throw new InvalidOperationException($"pdftoppm completed but did not create '{outputPath}'.");
|
|
}
|
|
}
|
|
}
|