From 719355da90d6ddf9633461be99bed51a311783a6 Mon Sep 17 00:00:00 2001 From: Frank Tovar Date: Sat, 14 Mar 2026 01:25:43 +0100 Subject: [PATCH] Use XML geometry for critical PDF import --- src/RolemasterDb.App/rolemaster.db | Bin 159744 -> 159744 bytes .../CriticalImportCommandRunner.cs | 31 +- .../ImportArtifactPaths.cs | 24 +- .../ImportArtifactWriter.cs | 33 ++ .../Parsing/ImportValidationReport.cs | 13 + .../Parsing/ParsedCriticalCellArtifact.cs | 17 + .../StandardCriticalTableParseResult.cs | 13 + .../Parsing/StandardCriticalTableParser.cs | 377 +++++++++--------- .../Parsing/XmlTextFragment.cs | 18 + ...PdfTextExtractor.cs => PdfXmlExtractor.cs} | 10 +- 10 files changed, 335 insertions(+), 201 deletions(-) create mode 100644 src/RolemasterDb.ImportTool/ImportArtifactWriter.cs create mode 100644 src/RolemasterDb.ImportTool/Parsing/ImportValidationReport.cs create mode 100644 src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs create mode 100644 src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParseResult.cs create mode 100644 src/RolemasterDb.ImportTool/Parsing/XmlTextFragment.cs rename src/RolemasterDb.ImportTool/{PdfTextExtractor.cs => PdfXmlExtractor.cs} (77%) diff --git a/src/RolemasterDb.App/rolemaster.db b/src/RolemasterDb.App/rolemaster.db index 8b072662857ae9ce1a28552f0df9f17741e12e18..183b54ec72aa2ef6c83414385abf942024f0d3cf 100644 GIT binary patch delta 12986 zcmeHtd30OHbvJOq4a>s=1=18nkpjgH55VIhiIikXmPCpaWr?CBYNKR|qD6?JxIiKR z?j-o22U}iZEwD9qxV={wG)?>MiP*e$ z&HB5qJA0k^wvx8;HQm`)yW1ScyY{{LqwW{i^y&FFyQ-Gko$c-M=wuXoGL!3%SjJtu zEMty8Y#H0yETh98vW%^*mJwEH!sv9q`Oh(*FL+Cg{dd$ash?4AsHQrho>up(+f`jH zQwzNRec9BcZ)!exMD-aq%e1jY-_*G8fY+B=wlwJ*n^U{@ zD!xU_)Ur|E*x1(LQ5MirNL^@YX>E1;=9e9s^p@srJ6*mx%hKGUw={0r>h#SnTblId z=Jgv3d^471LyO+rSidgcH*Hzgx9A(1tMxqJ!70nMu1R0tTwImwo3t!VE&94fUr~-P zxom0Do0{FK!r$2+3o2DV_{>V->FY^ z#G(OKxCZKS&kc_SVyr6&HIDf3AnRKLYuVF5D9N#0YVrchK2tGm05w~$uMgK|htW26dO2v21{{A6|ZQ=jZo?y3!~7pj(f zdQJ4N%RC_ax5uaoV{~XL`|PK>3Ndn>7*z1RC45L4S}U`8FSKMwKIL3dXLfsd8jY%C zGX`lG^g4oC^ZU?@%B$5%vWMFYK`&I?4EHYZn{j$YvMv0EW`eb(Ji@|Kwixn#r z*9Vtr)OQ5#e6d}-*C@Jvzu6@#YBq}Nu%gIhHt^}1>z@v&w>!Zf1)6u&E|2H-!>Kp- z$r>~xmj)l6%&vKzh#jNDL+28sGa~QTU)3W2xd@ZahZZ$kMK1=9 z@k}LpvKK#BoyP~p#nqTyk%WM`Q#M>?s5}XY?9SITP+Zxu*Ggz)>pIa8LT%;Bbt34P zxkFUnvxg@lk$vs83Yi3yN--I{@-}n3@Y~+ZlM&ArzEX<+Mg3OuZRP`adPL$qiHYG6 z94mdx0~cbk2{GVxojF_wQ2s}nC43~+8Qt&8o4IUw; zLxxxcOk!cUL@WepVj394bXX!5ffTVYED{UB0x=EdiRmy$ECRE{!Z1TD1k=Pcm?Eab zBr&WdNjwY*Vj+kV(_n&_4i||<-~zEQ#E6ApoR|h<#B>-X7J(6BVK`4L1jEEMI7du} zAz~3YODqggVj&nLrokCL3LPF6J_Ew%wD9Q{KBtK3&_^r+4-pH)Nn#;5K}>^QVmcfr z7J*~L!f=#W2#yfb;4m>AdWc2f5V0^E%u68;!2#jZEqwM1pDy9Ek5~xy64T&8Vmj<0 z7J*J;Vc1P91RcaQXeXw_E@BaABNm2MVjk8ygL(?>ad zgwqVCW{R^*oTfQ7I9=jYlrD04fzx?T=Qy3^bcWMuPNz7X3L3vIX%be5T|E3jdD83=^0KR=5&D5)13BmdWzFNP9Ng*B&R2G z9Vu+Smn+9PJ;v!#PLFVUnA09k4{>^s(*vA#bGo0?E>8Dxx|h=jIo-o)C#SnP?clT> z=?;KhTxsL9mD8P^?%;Ggrw?$tjnl21ZgC+R_A7f>{*v9?(&W{=mEKZsq1WlPD?d^G zLHTRtyUHIb-&DS)yrXhN&c+lO!6C&Kf}2RPfMPa{JP}VB)`h}CAcE_Q<8s7@+T#Kg7Y+_ z9+$#nl3$kmQOO_S+<=VarsS6-PfKoaz648>i-9e{qSO~8=a`xjn3{rFj#w!Hswn}g zDFLdB0#p})W9p*7)J1`*ivm*@1g0(sOkEI|x*#xhL15~8! z%?VJQ6QDXLKy^-l>KvRQ0GSn-Itv_AXMtntEO1Pn1&*mR0#j!Mrp^dVoe`KiBQSMZ zVCuBM)M>)hlpUr8s7?z|of4orB|vowK^2>y5|}y#98)KOW9lSuOq~RdsgnXzCk3V^ z1*RqirX~fZCIzM@1*RrArk;m{0M)r0`mgaFkzfvO$i0#oC_F*ObxQ{y0B3ir4T ze?tD6IyFbb5uiDF+zU_I=bKFzn z{yq0I>L25-$DkVY{+;)AZ^FAt`G3k+6jSNMU!vdeJn7l%ak;5Nl%JYEx z-`rnz&#{|ic+)fCX>xz)e$743-oHxk)Kz+?uG%MEDNi2$It6g*^kP{`1eIqkC5#HQ zln^R6EJZ`*8B5V^_{LdE1eL6%gi*O}DIrv@S&C*$nQ+z8bQ`{lmJ&hbDN6~X@-a&Z zq4K1qXsA44DY`8Uk6TIvmB%b4jLKz838C_+R8me2wMVQX-G*j+Sm96^+_ z&^D(nBe z~ zuj7t%%9V@Tt>7FSv}L4t`i#w#;>mv7B`F>~VM|N#;1Qc4#eD~COHQi6?%cgMC8f5G zMJctmE=XzH&Uq5qr(}Xdm{Yeb{W^j!p89 z_F;{W_F=6Z;*a)W|JUuqp8K!%VQF)iHozM@<;E;GSWyaoE(>i_Ht&y>A9()8Q|mtJ zT6F$m!B_HM&ii=o^Et0Lo`)YmfjwlKx@L4084YGq9&?1FUAS2vS?p!+j6=0+EEzvD z8jWK^>=VNfuygZTXTY*^YpK{jKVc^1{`nE3a4*&pUSyw$L0yI78XA}wi;gGafgtW? z2T+MkXo0g6Q5pd&9ECOH7+m%9-N>oN|F9XCE%X^>d$5Jj;xm5(mCt+?>T)m@8=2yO zRnV-VvC}TJRn78Lu)&p$>_z4gmiIKx*_T@1= zLQChH7e&))htW@miuwZE2r%Nq&?f8Hj*i{PTFrh7priIKyU5W+^MV|~n9)~EnOAVLu-vnYnYM?8|rTl z>W*?=ZQ5;)%K_yZN9b5Kw7~u?7pmBoa-j{+JXn(*ipdgX>o7${X}>up161QA^+Q`= zujj!q`JS@FeWa{moAO}~>&H+zDUO??viCiPN|QpLX9WdN$G)ErWfbamU=&(fna2TD z#l|UfLk?`7XCfNF>0nqvPRfO2}&8|YD&IT|%yR%7QS6?S1Rvrd8!o9AU+ z-G)N*B|OhgRbt~yPMjy$_>NqtAWM+_aV4xIwW`S2XAaA#-9|2-W%DfN!j$%{g_RUF zkcUxd*~NzQpsdK)VV;u#TMQ@9%sgwVLLXlhR8qiFKBfmP9qe*G_>0mub4Uiv8O|9> z${hQ66`UsD*YYv#Z&%@zsHld`mPKSUooAjEfoZ$pPf~K`Sgab(kXtH%f$w=lEnI1= zgxsQZjv1AWr3`fW^+tdmn6L7c@)+s%O|cVOU*cjF}I~%7%^g0bXYOa&QW>nlll_NBV|ad@IEQy%-VMSQ=Uom}9$AK7 zQ}t~1eXv13zee+z>|@waYAEH?1hpmXl?W6f7RE=}YxPjc5;!X<<`H|z5qgP$ay*_t(h;K}#dBn@F z0Hn)|O2dvVgr?b*a@b3g^{G<0pOo4g6*$dFTPb>_n1^MroD2Nud72GWz-B}Rnj9#u zVPC9((nf0-wAS(2qBb?+9Z{w{=iR-oPUld;x#_qGi}nX>(QC%GV&h0A zmuC}B$T7pJ#oazu1`qtVw- z&jO4%7BHonl zgdLQV%Hs&d-#&q%XdYm1o`l+BV?G12%TvboZIna|bGO1tiunFXYz&nkd#V-Iu^*ho ziBaDNdH#-!oo5k@oeT6C#XVY9-X^{%f7^#SL?yr`+wlGVyFUEzrgJhj*-XkfCNi_c zzS{;vlo@L&L(3z|4BG2hY!_^2-#P`w#Xd77N9Hz;(c6yuz8$Q$9jfrFJ-`;)A%H>$ z`{i~h$D16uP7;~kV6C<=<5zeL#F--+n>V2pJSL+LhzkJwlgKcnMAgjJ)_f$_k=(7^C`JV71V zBpy?X^_YTqlTo{?&IpDS0_HNq2$2CAU(LV(I{pGJF>hgr+Ff7s^R5FjfdBHI6N^zir1i16_jbI1=?92VN`z`hqzv z>NZ9UjjtC|YGdod!et;T8vE`j)Uwq>P%cq!n>i&>ZgY~qpDFg}7&>1VgA$6o zc@E!Ev^2Bl&q1~SfH^4}*=vO9=AFLCemn*T@H*9nCTwcC z((Y++J6*X2ujil3JDyvgGvQc-r|lEAiEBn9JA42t2y*s!`(bwlsp564$gB`SxIBq9P>wO^(QaH9Z0?5rnB;JioKMRDi=`9Lw26bO zaW+#Z8yzqXQlofK#$G>wC#$kil(DXZ@IblMrg0(TlQAPkjet3m*$=^fO5w$Qm_k%4 z*w`Uln4nr+Y;4M?GGe_^MXlhO8*}&I>8NZ4MeXCs9`hW*Q*7Bj%0YJP2<*Sz z{;rHi#%wo+`gmq8vA2#wkVZLi6xzkF-|`_SRq)=ZctQA&QFPMCBfuQT5VSTj@?n2@ z9{j}zGj9HIhV)*e*(k$W@g$MGe+)5${Lrodo=_-n5Rb-jS%DBZ&OUn_ zr#4LtnW-Rknj+uDf>l2!G9Uxd`OYwr-nAL0OjWrtMJqZy1Z7LTP)Dh1Iw4ZlcLJ(u zi6?&^6THz!4c&s|qp{z|mzSMCf#1@v9fsnf6`2BAsbchS^uS8HPvXQXI0?I{=6^Vh zX+_H*`^UquGR6CH>2M}rgk9>sR7_!b>?$wsA=dJnqX8P@Pz*D2VInpj4J0OJBoy}~ z==R@#>4%6u85>(%meZM>N&Ew9U?{}|f=FpkqLuZO_sSt_}&+Qb7Pe<>lD!18ePAO`W&PmTpk4Zm} zjz~91`=#gu)xI@SK3ZQET3F&J(1qoblij_;TjLW`Ba;)0%8Cl=h0fIAkVJgQJZvBItrCJqy{2 z@vA2vcz4rXReKGGO^~E{g7dP=CgS_!Q1OAnfX)+FuH=dKfW{L$J9%P3K;?-mR`5jR zS6E`P^MSu5eLmwfyT!+)f0OQ(Zk2LUN{UJwrKM6>s*sAk?|EPH{>J-r@978X4juHS z?QN~$_6BbX>Ieve;B0JYh{XcFakwrg*B!0lj*e)r-#0ds(irZD?Acf1OY)R<@M+kw z+viKnq%?-xBU`#8-zZO^&8*+-^^MG=G=|$E>DB8*-!M;VZ40+GbguGeoN;Ijw?-B(clic*N~AU1+R(nJ*w;Uk(io0Jn%kVdKAzIj8jdv7H9357p3>YJ zZiy)2BHxx6PikrmH%H2Xg}z>%(%2eqYVeiXeLXWNjp4?KTPpB%^OT0xaAQM}3wfg? zrC5N6aQ@~JU%e`wm)@1$lwOt27XQ)tXXio3GmdD{HwwRQ|Gj->!35eZG})iFcNE-) z-at!)r}7Jrtg#2|C8haqe^=2(SHxW6wh1;ts2A%?0`^iZ)Yz~_K6@xHH#VKU{_xrB zZ-xYHZbg9}n@yTaUCYEWa1DTK(;D)-=3uIO+Gg{9Omip13CQ(pXAgZ%{_MX@U8X^# zKAELvM#(0ui-mjs-8FMd0OzIN*Z6v7MZP-k1HiS@*QJFlgUWXiImR-qDeI0S{X z-cXC>^~50<#ahvo|HXI4;iu#W?p&YmIbN!4$d0f==2|mI3aPy_RYM`D911;k=LYyy z=XV@!)p>JfAUo`#9X6$@YAC4mh8kIGPaJ|W$B#zfC+GclmDSCp(~g?0*&(aVMzfq0 zHF|hQY0(pR8To7Os?49b>oVoj40e;Y?4VWl8nbK^%IZC9@acLKw$q}c$JG3iV{(4< z*h+z=&wb($Yqc{wV3q7QB^#8~SVz?)j zu{vn5S%$3I1*D-1GFZLyvprV5s#!swLwBeN>N;`n8u%G``Gj26#9WPRx8)i%%jvLp zvti$JVpV?6iJJT)CqngRFdMbJs>~8P=ux=K6buh}`N)G4@^rj=xK4D1n#rSBeA1{8 zU8JL*=OsgaZ5zo2Z`zAYNp>HrKeNHSj6R*{w#n4sa9kcC?=N{QnM{owfch_ZW`z)a z(AJ|TYf)DsfB(r+EQC?{yj1e@O3E-|pab0d?URAIFqCWv_GYiL8W+r3`V^vh>AM<> z3O?r>p{dGK3S=lnq_RetT+U`SjAr**!!u~^rQ;nX!dL|3(E#P1I5l6`mOp)JHc1p@ za=mh!+!OChrs8y_Oe9m2^5CSLoSLi)HPAX!*(=#3PxjKM65R;{EKkruA>-UY?tT4z zmCYBX%d3;gm<-jzw}X}o1M0ad!0Ma?Sd+aDP|ha4cmH#@2c4H04!a-;&Xqc|of@+% zRA!ee4{jEQ=cNs_u&6*lI_e-sL+!+QJ5G7Eg;4~A!2kipBN3*5~HS(L9BvwVhp4aqa&3V4JpK^C`gQgYKSpV zH8DDpiP2CMF)FGgMnUt4G07cmMdCdNQcVszvnMngr!sHl(_ z1=-WY8mNF69U)>gBoLz_8!?LT2{DH7F)_OE5iy$ZAu+1(cVZObJTZpw0WrGpJ~5i` z9xY&(8v5jIY#R$a~ip><8C^k}Tpjc0_j$)YN0*WDu^C{L+G$`s6 zHHvDQ8iitzVhzP=iZaD2ij@@SQJhP$g5n&CvngIev7F*8ie(f_DF!I|DV9+56{gAE zOZ37^QKaah=%(nRSWMAL(Lu3@Vj)F4#R7_mqCn9`Q22!6#}q%J_#wr=7Z#);`#in) zfa3cU-=p|9ihrf}F2#2!{)OV(6wgt7i{hIU-=O$9#n&kQnc|-)zDn^GiZ25$L&8h+ z;*S*nK=CZa7b%{h_yWb}DLzN>Sr@3`KCvG=4hgwSo4g^f;+^N6<(0fnuOOZm-w|II zUlN}epAw%G9~J*Y{IU3b@sxN>yj{FaJS_f;m=&)Tr^EzQ@MKgl@R_LKdg%Q_!WK_j zv3<{GL*G|yr@@?3Z1;mX7PNgA%nWm02O&w>N zI?gn8jA`l^)6_AhsbeTjMRkmcYLba+l8I`PiE5H*YLaPcf@x}kX=;LLYJzEMf@$g~ z)6`L>siRC&N13LMGEE&}nmU3isA7#UQ5|8TI>JPCn2G8zqMAC)GM+yPA*QKA zOjC!LrVcSp9b%d~$TW43Y3d-;)Ip}HgP^HIu?Co^4lq$2V4^y}M0Eh!iGcJoP3>oz z+Rrq#pJ{48)6_nuseMdS`JaK?qT0BX>q^r{t2$TMW|?e)^o%Y^BC^e-SG_)H3eQ~k8TV2AB`D^8*mJ#Sy~pSNt@}27ge3gXbFF8Y$L@Z_eG`8277|mp zkeIqf*zbZdwU-2KCv7(SjRJ6A&~^fNn()#Zo4bho@rj%U1QzEyL>K28gc#>4#2M!b z1RLiDL>%WjgdOJ^#GX5CHK0NS@(cySkaGiKk#ijal5-8Bl5=$og*jIsJUKTYMmg6Z zP&wBiS~*Xv5VQP3fxzY5fav91hY;pmgE;0~g<$4ffr#eZfUxFVhuG#^g8=7Tg(wG} zro*d1tn&;50-kdnqMmaNBA;^=!k=>mG=Oshgn)A$)PQphB!P1kbb)haIE@1Q!T^cj zTnC-tTm!M-Tm{A8TmjkO+yL$1Tn7Q+Tmu#1Tm>oNTme0y9Oj?X08!x?Iw%Y08psRh zDrgMn3J4A72B;0^I!F%Z8t4w^Du@r~3Mdfg2FOsFU+5r4oNJ&+oU0&9oGYMBoEsoe zoa>-coNFLeoU0&LoGYMNoEsoq7Ee2MkT0I0frfFef{<~pfSPe`fTVG*gRXI|fw*z5 zg2HjGfXs1jfYx!YgW$P~(k#9;kUgHEg7$H)fB)G4(|p@-^@2yb}P0$sN( zw!@+&S+Y#I?XXsU?6kugB&l!G#_76Zuf@rVVh`hVEwS6;jp2q! zlzdSIq)wakp7e(F2k9B<3F*J2A4{jDvg6XXq&zHOr==-rSn8H`N*kokhwAQ!#pU#t zt{Ny;TnOu@f-@FNSuEN+Zm~W4#w@mDchX{8x)K&!zj@SRtJjTKtaH_{#a65sve@F~ zgBELFG+?pjw*KOktl_$*J}XfP$1PSCj9JWA+G{bl)MK$CSGTiJD27fKMj!l{@Cp1a z{os>=U7jBhnrzaI-hUN;j8$i2mN=RaCgJ#E^B(l6F9zgWZkVh!`t#Z9)R_Qe|J z#jBtHZ`Uxz&#Ym>s%F)^2DUIP!i%BAHf@tG@%qFT&lT>B>tUA@JAJ78p=lpFTx>Vn zbA=Ci=Ad^A4dU#xows>kgTFh5Ja@bQ-QD22zW9LiI>)vmXW`}c&Vp4aXghz{{GZZ{ zn!V0i^sP3BFD2UuefZ}gRE}?`MH9HB83pk@&1gMIn_HTx${w`5E6h0Y*8A{V?cjZT z7>(hpTab<~uScu!xpp)Qk9DBJQu9FeT5C;s%3R(7MYKMgT!>~N(S<)~K@XCP3N=%Z zy~c7&o4e?)QXk*QF<@7znd~azjjgB_za2qK3G2h>ROD>5a#^q}%znwLC1RG4T9m#E z_oi4a3))aMcD13HaZw(vBeObtz`DmybK4XdkvQJG2({qiTGWqko{xIj<+(+0dl@}@ zwdEZ&n~Ap`$M@BvR@@jyT?AAd>OgfQtwmv-oX$>Lot!XtRgn>i^Nzsg1iqmI4dCL1 zsK3VAuwy$E8FzNSmu>eInr>5oa%vnmEJX{+NWQfYY53uVXbxdZN%FEwlp`!_thb}1-3Wn9w^%ZWo<3V zrmPA!nT{YCgxCdc(8-@HMp~J?Dk-|A2V-%$cM5g}60oKpucP}LXkvuAtOpm^k@~EM zJTY=XWvs!=v*T8SmzmXcg2ni#lPN^^I`O_GXdM9!dzK<4f9PnLQJ)>N1|@Vb0{v8G zial|N-jxAcx@2Y3cPxcR0IWUCE5Fc-_5?P0lB_2g(Ts=5IQQaG10BL2)}oR8 zxx32dLI9J00{j%~r-Iesr$01TMlVTN@S&^yS!)B>TxV9m19QkbabpRL{}CTLRg$f@ z`dxnz!p>2(6JJ$=j`_27R^zIG>BC7CrlLM>Pfpb8uf zpb!1o1y;6TE?x`S)jROAGE|9gDn-xuvmq;Uw%J88Mp!|HhQ|iA$Q@^HK-If%t5#Mv#RA4F^ikQtEvmX*n|T3Tn+k$KdV@o z8_Z%l9^3K99u&prgDB?D2Cd9(W?(gGcRNlh=rMn`=CkS5+ld&>!?hBK-zgRK`?J-S zN4nR9>FtoW<3}}^Tw@wK;m^ue<~}n7D(a}(j>RUl2oF@D+n{EfO)UDd&HH!pJD#t* z_q)DYjGebRZg7k`b{74<=%+{<$$)w}SE>(Oj%??a{< z$t`4?d-S*jtXloU;}T63xen$^k4rdpaJ}HT#DQEp%caL9oLUNwOAwim1F-Q;qqMn! zsSnSHUYtQ=GmvXz^@t+b098{ob#|_mr3OSQqoj8D&!X9#i_F~q0g`QuLmueCB}3?H z0wMhCA$ahwCy;ZFc}=c`d7IN_e;Ih|2934SzgzdxWt!UQlXs*dBy2mWsOYVm^wqz7_V*m|5 zHVTW-Ru|5Uq8hf7o2ke(T6a}$Zh^3HD&u(Lc2q^)HPs0eBOv2@6KDlVR_0`ibNnz$ zp*clv%oww3El#5S7(mA-l4wIENo6C~m8-Yf*kbl4p@=@VhCVcZsL^_A9qge&qaSXD z&Ys$g))F=fd(!0T)?Mt#Di(5<#+e7rBzYtn=?hdMZiUzJ$t`FjVJS$X{v_bMwv^5L zidPbw-OsqbSG?T0#&KEEEc529CuJ8h}MW&_^78C8~M!a1&ywdNK&sAKreon(X4 zh40x4BLkq~_HAeherqSv=ddm<%Pp{8-j|x>ECLMZ7_QufR+D1+ZO|jM9lo+i%B{Om zp<=GfS#oNwHiP5^Yb0lGmcMa+w;f*LcW*}vFtGJ)e9&k%Yc`nUl3QgS2$3O9hFaO& zX`LC`Uv(h`_jjS~7}zwPdnGCjL~@oKXBy3Eh+C(Y#3!yqvq^VHcYqgwf&Dw7<0r2~ zm*8c4$;VJAXX!K?E*T}I)Fl4oDu{OYbn@e`>_l_W-~?WB1$6trcfu$ggmEi?Qxha< z)vfzbWgwm7ihhk*4>IIblK7c@sD`xri!0Dtm*^Vk9@!s{)z*>gYWz>Tkm0bB@YDN{ zp_x-T&6@V(=34rGP4Hm9AYkV0MrwInj-`@g6EYmGfD;yD6Y&Z7$bhEy?uH=&k6^j{ zxh^vGK%odd%T<~g%~`LQ%xK2FkBoByZ(f0FNw+Ka!1j{7A>IxA*LxssRT3xnAk9I~ z&P*k!gPl0wU+qEj3*dVOkMBWBfed+2Nqr!gv))meDsvkd6fJ=dbh1HNc_oVDhp&Wr z_oYsxm1cJ2tXEcMPo`usc?1a}xTRh2yy4q#+ax(mGMSuA(qlm+fgUd*eW59!r38waxX~P^H(7s?f5*>aXOpLD{`N{b|orG3H;VdCQ7&M13lUbGX(=%fzPc( zl_6GYdyc<~H<`6`I*gKc2YV~b1i|l@fk54{3??%IKry z0nH-cTZQYwUP#mG6M zzIeQ6gnbK_7EdHc~XK`|)#E!n=Oa2ITdRKyc2q6iZGg=%@N@5NlEZ$=h)C z22>jukuUO+*Rapl4+P{3jO9wL=p8m|;XBf)4&z@UR7pbV#tmo_fm$5g2(N?35egc5 zE?|AP1gGft`7l?wW_Y-?+i>g>3SYMoMF>>lzidS1i!2$u(232LtvSEdfemJeD(}$D zeBsKwY!ljy0Tle7n^0)}XY*+_say&BW=M9^*F4lgPUn67Mx;2VCVEHW6Lr9ARN1;9 z*{uD&IiJ;j+?-8jnKpz=ir62_OW+WQUET$E^O%6)U=)*VB=5qH3aA3fqqJBd Ht@VEaY`zsP diff --git a/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs b/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs index 238775f..4cbaf30 100644 --- a/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs +++ b/src/RolemasterDb.ImportTool/CriticalImportCommandRunner.cs @@ -5,7 +5,8 @@ namespace RolemasterDb.ImportTool; public sealed class CriticalImportCommandRunner { private readonly CriticalImportManifestLoader manifestLoader = new(); - private readonly PdfTextExtractor pdfTextExtractor = new(); + private readonly ImportArtifactWriter artifactWriter = new(); + private readonly PdfXmlExtractor pdfXmlExtractor = new(); private readonly StandardCriticalTableParser standardParser = new(); public async Task RunAsync(ResetOptions options) @@ -26,8 +27,8 @@ public sealed class CriticalImportCommandRunner { var entry = GetManifestEntry(options.Table); var artifactPaths = CreateArtifactPaths(entry.Slug); - await pdfTextExtractor.ExtractAsync(ResolveRepositoryPath(entry.PdfPath), artifactPaths.ExtractedTextPath); - Console.WriteLine($"Extracted {entry.Slug} to {artifactPaths.ExtractedTextPath}"); + await pdfXmlExtractor.ExtractAsync(ResolveRepositoryPath(entry.PdfPath), artifactPaths.XmlPath); + Console.WriteLine($"Extracted {entry.Slug} to {artifactPaths.XmlPath}"); return 0; } @@ -36,16 +37,24 @@ public sealed class CriticalImportCommandRunner var entry = GetManifestEntry(options.Table); var artifactPaths = CreateArtifactPaths(entry.Slug); - if (!File.Exists(artifactPaths.ExtractedTextPath)) + if (!File.Exists(artifactPaths.XmlPath)) { - Console.Error.WriteLine($"Missing extracted text artifact: {artifactPaths.ExtractedTextPath}"); + Console.Error.WriteLine($"Missing XML artifact: {artifactPaths.XmlPath}"); return 1; } - var extractedText = await File.ReadAllTextAsync(artifactPaths.ExtractedTextPath); - var parsedTable = Parse(entry, extractedText); + var xmlContent = await File.ReadAllTextAsync(artifactPaths.XmlPath); + var parseResult = Parse(entry, xmlContent); + await artifactWriter.WriteAsync(artifactPaths, parseResult, CancellationToken.None); + + if (!parseResult.ValidationReport.IsValid) + { + throw new InvalidOperationException( + $"Validation failed for '{entry.Slug}'. See {artifactPaths.ValidationReportPath} for details."); + } + var loader = new CriticalImportLoader(ResolveDatabasePath(options.DatabasePath)); - var result = await loader.LoadAsync(parsedTable); + var result = await loader.LoadAsync(parseResult.Table); Console.WriteLine( $"Loaded {result.TableSlug}: {result.ColumnCount} columns, {result.RollBandCount} roll bands, {result.ResultCount} results."); @@ -82,14 +91,14 @@ public sealed class CriticalImportCommandRunner ?? throw new InvalidOperationException($"No enabled manifest entry was found for '{tableSlug}'."); } - private ParsedCriticalTable Parse(CriticalImportManifestEntry entry, string extractedText) + private StandardCriticalTableParseResult Parse(CriticalImportManifestEntry entry, string xmlContent) { if (!string.Equals(entry.Family, "standard", StringComparison.OrdinalIgnoreCase)) { - throw new InvalidOperationException($"Family '{entry.Family}' is not supported by phase 1."); + throw new InvalidOperationException($"Family '{entry.Family}' is not supported by phase 2."); } - return standardParser.Parse(entry, extractedText); + return standardParser.Parse(entry, xmlContent); } private static ImportArtifactPaths CreateArtifactPaths(string slug) => diff --git a/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs b/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs index 7965855..8b45346 100644 --- a/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs +++ b/src/RolemasterDb.ImportTool/ImportArtifactPaths.cs @@ -2,18 +2,34 @@ namespace RolemasterDb.ImportTool; public sealed class ImportArtifactPaths { - private ImportArtifactPaths(string directoryPath, string extractedTextPath) + private ImportArtifactPaths( + string directoryPath, + string xmlPath, + string fragmentsJsonPath, + string parsedCellsJsonPath, + string validationReportPath) { DirectoryPath = directoryPath; - ExtractedTextPath = extractedTextPath; + XmlPath = xmlPath; + FragmentsJsonPath = fragmentsJsonPath; + ParsedCellsJsonPath = parsedCellsJsonPath; + ValidationReportPath = validationReportPath; } public string DirectoryPath { get; } - public string ExtractedTextPath { get; } + public string XmlPath { get; } + public string FragmentsJsonPath { get; } + public string ParsedCellsJsonPath { get; } + public string ValidationReportPath { get; } public static ImportArtifactPaths Create(string artifactsRootPath, string tableSlug) { var directoryPath = Path.Combine(artifactsRootPath, tableSlug); - return new ImportArtifactPaths(directoryPath, Path.Combine(directoryPath, "extracted.txt")); + return new ImportArtifactPaths( + directoryPath, + Path.Combine(directoryPath, "source.xml"), + Path.Combine(directoryPath, "fragments.json"), + Path.Combine(directoryPath, "parsed-cells.json"), + Path.Combine(directoryPath, "validation-report.json")); } } diff --git a/src/RolemasterDb.ImportTool/ImportArtifactWriter.cs b/src/RolemasterDb.ImportTool/ImportArtifactWriter.cs new file mode 100644 index 0000000..53c3cc4 --- /dev/null +++ b/src/RolemasterDb.ImportTool/ImportArtifactWriter.cs @@ -0,0 +1,33 @@ +using System.Text.Json; + +using RolemasterDb.ImportTool.Parsing; + +namespace RolemasterDb.ImportTool; + +public sealed class ImportArtifactWriter +{ + private static readonly JsonSerializerOptions JsonOptions = new() + { + WriteIndented = true + }; + + public async Task WriteAsync(ImportArtifactPaths artifactPaths, StandardCriticalTableParseResult parseResult, CancellationToken cancellationToken = default) + { + Directory.CreateDirectory(artifactPaths.DirectoryPath); + + await File.WriteAllTextAsync( + artifactPaths.FragmentsJsonPath, + JsonSerializer.Serialize(parseResult.Fragments, JsonOptions), + cancellationToken); + + await File.WriteAllTextAsync( + artifactPaths.ParsedCellsJsonPath, + JsonSerializer.Serialize(parseResult.Cells, JsonOptions), + cancellationToken); + + await File.WriteAllTextAsync( + artifactPaths.ValidationReportPath, + JsonSerializer.Serialize(parseResult.ValidationReport, JsonOptions), + cancellationToken); + } +} diff --git a/src/RolemasterDb.ImportTool/Parsing/ImportValidationReport.cs b/src/RolemasterDb.ImportTool/Parsing/ImportValidationReport.cs new file mode 100644 index 0000000..f9a4c73 --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/ImportValidationReport.cs @@ -0,0 +1,13 @@ +namespace RolemasterDb.ImportTool.Parsing; + +public sealed class ImportValidationReport( + bool isValid, + IReadOnlyList errors, + int rowCount, + int cellCount) +{ + public bool IsValid { get; } = isValid; + public IReadOnlyList Errors { get; } = errors; + public int RowCount { get; } = rowCount; + public int CellCount { get; } = cellCount; +} diff --git a/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs b/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs new file mode 100644 index 0000000..76475a3 --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/ParsedCriticalCellArtifact.cs @@ -0,0 +1,17 @@ +namespace RolemasterDb.ImportTool.Parsing; + +public sealed class ParsedCriticalCellArtifact( + string rollBandLabel, + string columnKey, + IReadOnlyList lines, + string rawCellText, + string descriptionText, + string? rawAffixText) +{ + public string RollBandLabel { get; } = rollBandLabel; + public string ColumnKey { get; } = columnKey; + public IReadOnlyList Lines { get; } = lines; + public string RawCellText { get; } = rawCellText; + public string DescriptionText { get; } = descriptionText; + public string? RawAffixText { get; } = rawAffixText; +} diff --git a/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParseResult.cs b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParseResult.cs new file mode 100644 index 0000000..0b5182a --- /dev/null +++ b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParseResult.cs @@ -0,0 +1,13 @@ +namespace RolemasterDb.ImportTool.Parsing; + +public sealed class StandardCriticalTableParseResult( + ParsedCriticalTable table, + IReadOnlyList fragments, + IReadOnlyList cells, + ImportValidationReport validationReport) +{ + public ParsedCriticalTable Table { get; } = table; + public IReadOnlyList Fragments { get; } = fragments; + public IReadOnlyList Cells { get; } = cells; + public ImportValidationReport ValidationReport { get; } = validationReport; +} diff --git a/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs index 7e52e80..876c9cd 100644 --- a/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs +++ b/src/RolemasterDb.ImportTool/Parsing/StandardCriticalTableParser.cs @@ -1,208 +1,206 @@ using System.Text.RegularExpressions; +using System.Xml; +using System.Xml.Linq; namespace RolemasterDb.ImportTool.Parsing; public sealed class StandardCriticalTableParser { - private static readonly Regex ColumnRegex = new(@"\b([A-E])\b", RegexOptions.IgnoreCase | RegexOptions.Compiled); - private static readonly Regex RollBandRegex = new(@"^\s*(?