diff --git a/RolemasterDB.slnx b/RolemasterDB.slnx
index 17ff5d5..04b786c 100644
--- a/RolemasterDB.slnx
+++ b/RolemasterDB.slnx
@@ -2,5 +2,6 @@
+
diff --git a/docs/critical_import_tool.md b/docs/critical_import_tool.md
index 4e96af7..fbdb506 100644
--- a/docs/critical_import_tool.md
+++ b/docs/critical_import_tool.md
@@ -31,14 +31,33 @@ The current implementation supports:
- manifest-driven source selection
- `standard` critical tables with columns `A-E`
- XML-based extraction using `pdftohtml -xml`
-- geometry-based parsing for `Slash.pdf`
+- geometry-based parsing across the currently enabled phase-3 tables:
+ - `arcane-aether`
+ - `arcane-nether`
+ - `ballistic-shrapnel`
+ - `brawling`
+ - `cold`
+ - `electricity`
+ - `grapple`
+ - `heat`
+ - `impact`
+ - `krush`
+ - `ma-strikes`
+ - `ma-sweeps`
+ - `puncture`
+ - `slash`
+ - `subdual`
+ - `tiny`
+ - `unbalance`
- row-boundary repair for trailing affix leakage
+- footer/page-number filtering during body parsing
- transactional loading into SQLite
The current implementation does not yet support:
- variant-column critical tables
- grouped variant tables
+- `Mana.pdf`, whose current XML layout and affix notation still need a dedicated parser pass
- OCR/image-based PDFs such as `Void.pdf`
- normalized `critical_branch` population
- normalized `critical_effect` population
@@ -183,10 +202,9 @@ The parser was hardened in two ways:
The importer now explicitly rejects cells that still look structurally wrong after repair:
-- a cell may not begin with affix-like lines before prose
-- a cell may not contain prose after affix lines
+- prose and affix segments may not alternate more than once inside a cell
-This hardening step is important because it closed a class of row-boundary bugs that simple row/cell counts could not detect.
+This keeps the phase-2.1 safety goal in place while allowing broader standard-table layouts that render a single affix block either before or after the prose block.
## Planned Future Phases
@@ -194,9 +212,34 @@ The current architecture is intended to support additional phases:
### Phase 3: Broader Table Coverage
-- add more `standard` critical PDFs
-- expand the manifest
-- verify parser stability across more source layouts
+Phase 3 expands the manifest and validates the shared `standard` parser across a broader set of `A-E` tables.
+
+The currently enabled phase-3 table set is:
+
+- `arcane-aether`
+- `arcane-nether`
+- `ballistic-shrapnel`
+- `brawling`
+- `cold`
+- `electricity`
+- `grapple`
+- `heat`
+- `impact`
+- `krush`
+- `ma-strikes`
+- `ma-sweeps`
+- `puncture`
+- `slash`
+- `subdual`
+- `tiny`
+- `unbalance`
+
+Current phase-3 notes:
+
+- header detection now tolerates minor `top` misalignment across the `A-E` header glyphs
+- footer page numbers are filtered out before body parsing
+- validation allows a single contiguous affix block either before or after prose
+- `Mana.pdf` is intentionally left out for now because its row-anchor geometry and notation still need dedicated handling
### Phase 4: Variant and Grouped Tables
@@ -289,6 +332,11 @@ Each entry declares:
The manifest is intentionally the control point for enabling importer support one table at a time.
+For the currently enabled phase-3 entries:
+
+- `family` is `standard`
+- `extractionMethod` is `xml`
+
## Artifact Layout
Artifacts are written under:
diff --git a/sources/critical-import-manifest.json b/sources/critical-import-manifest.json
index 696030f..97893bf 100644
--- a/sources/critical-import-manifest.json
+++ b/sources/critical-import-manifest.json
@@ -1,12 +1,140 @@
{
"tables": [
+ {
+ "slug": "arcane-aether",
+ "displayName": "Arcane Aether Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Arcane Aether.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "arcane-nether",
+ "displayName": "Arcane Nether Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Arcane Nether.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "ballistic-shrapnel",
+ "displayName": "Ballistic Shrapnel Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Ballistic Shrapnel.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "brawling",
+ "displayName": "Brawling Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Brawling.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "cold",
+ "displayName": "Cold Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Cold.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "electricity",
+ "displayName": "Electricity Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Electricity.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "grapple",
+ "displayName": "Grapple Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Grapple.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "heat",
+ "displayName": "Heat Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Heat.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "impact",
+ "displayName": "Impact Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Impact.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "krush",
+ "displayName": "Krush Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Krush.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "ma-strikes",
+ "displayName": "Martial Arts Strikes Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/MA Strikes.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "ma-sweeps",
+ "displayName": "Martial Arts Sweeps Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/MA Sweeps.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "puncture",
+ "displayName": "Puncture Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Puncture.pdf",
+ "enabled": true
+ },
{
"slug": "slash",
"displayName": "Slash Critical Strike Table",
"family": "standard",
- "extractionMethod": "text",
+ "extractionMethod": "xml",
"pdfPath": "sources/Slash.pdf",
"enabled": true
+ },
+ {
+ "slug": "subdual",
+ "displayName": "Subdual Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Subdual.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "tiny",
+ "displayName": "Tiny Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Tiny.pdf",
+ "enabled": true
+ },
+ {
+ "slug": "unbalance",
+ "displayName": "Unbalance Critical Strike Table",
+ "family": "standard",
+ "extractionMethod": "xml",
+ "pdfPath": "sources/Unbalance.pdf",
+ "enabled": true
}
]
}
diff --git a/src/RolemasterDb.ImportTool.Tests/RolemasterDb.ImportTool.Tests.csproj b/src/RolemasterDb.ImportTool.Tests/RolemasterDb.ImportTool.Tests.csproj
new file mode 100644
index 0000000..cf9043b
--- /dev/null
+++ b/src/RolemasterDb.ImportTool.Tests/RolemasterDb.ImportTool.Tests.csproj
@@ -0,0 +1,29 @@
+
+
+
+ net10.0
+ enable
+ enable
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs b/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs
new file mode 100644
index 0000000..0d1ca98
--- /dev/null
+++ b/src/RolemasterDb.ImportTool.Tests/StandardCriticalTableParserIntegrationTests.cs
@@ -0,0 +1,146 @@
+using RolemasterDb.ImportTool.Parsing;
+
+namespace RolemasterDb.ImportTool.Tests;
+
+public sealed class StandardCriticalTableParserIntegrationTests
+{
+ private static readonly string[] ExpectedPhase3Slugs =
+ [
+ "arcane-aether",
+ "arcane-nether",
+ "ballistic-shrapnel",
+ "brawling",
+ "cold",
+ "electricity",
+ "grapple",
+ "heat",
+ "impact",
+ "krush",
+ "ma-strikes",
+ "ma-sweeps",
+ "puncture",
+ "slash",
+ "subdual",
+ "tiny",
+ "unbalance"
+ ];
+
+ private static readonly PdfXmlExtractor Extractor = new();
+ private static readonly StandardCriticalTableParser Parser = new();
+
+ public static IEnumerable