From ca283d3ad99d35eb776617c58f66d6cd7eb2f7f7 Mon Sep 17 00:00:00 2001 From: Pip Liggins Date: Thu, 1 Aug 2024 14:42:19 +0100 Subject: [PATCH] Add validate CLI --- fhirflat/__main__.py | 6 +- fhirflat/ingest.py | 65 ++++++++++++++++++ fhirflat/resources/base.py | 7 +- fhirflat/util.py | 9 ++- .../invalid_flat_bundle/condition.parquet | Bin 0 -> 14360 bytes .../invalid_flat_bundle/encounter.parquet | Bin 0 -> 18309 bytes .../data/valid_flat_bundle/condition.parquet | Bin 0 -> 14975 bytes .../data/valid_flat_bundle/encounter.parquet | Bin 0 -> 26792 bytes tests/data/valid_flat_bundle/patient.parquet | Bin 0 -> 3911 bytes tests/test_ingest.py | 29 ++++++++ 10 files changed, 111 insertions(+), 5 deletions(-) create mode 100644 tests/data/invalid_flat_bundle/condition.parquet create mode 100644 tests/data/invalid_flat_bundle/encounter.parquet create mode 100644 tests/data/valid_flat_bundle/condition.parquet create mode 100644 tests/data/valid_flat_bundle/encounter.parquet create mode 100644 tests/data/valid_flat_bundle/patient.parquet diff --git a/fhirflat/__main__.py b/fhirflat/__main__.py index 7832ca3..7dcec77 100644 --- a/fhirflat/__main__.py +++ b/fhirflat/__main__.py @@ -1,6 +1,7 @@ import sys from .ingest import main as ingest_to_flat +from .ingest import validate_cli as validate def main(): @@ -10,16 +11,19 @@ def main(): Available subcommands: transform - Convert raw data into FHIRflat files + validate - Validate FHIRflat files against FHIR schemas """ ) sys.exit(1) subcommand = sys.argv[1] - if subcommand not in ["transform"]: + if subcommand not in ["transform", "validate"]: print("fhirflat: unrecognised subcommand", subcommand) sys.exit(1) sys.argv = sys.argv[1:] if subcommand == "transform": ingest_to_flat() + elif subcommand == "validate": + validate() else: pass diff --git a/fhirflat/ingest.py b/fhirflat/ingest.py index 5e93bcc..875066a 100644 --- a/fhirflat/ingest.py +++ b/fhirflat/ingest.py @@ -580,6 +580,49 @@ def convert_data_to_flat( shutil.rmtree(folder_name) +def validate(folder_name: str, compress_format: str | None = None): + """ + Takes a folder containing (optionally compressed) FHIRflat files and validates them + against the FHIR. File names **must** correspond to the FHIR resource types they + represent. E.g. a file containing Patient resources must be named "patient.parquet". + """ + + if compress_format: + shutil.unpack_archive(folder_name, compress_format, folder_name) + directory = Path(folder_name).parents + else: + directory = folder_name + + for file in Path(directory).glob("*.parquet"): + df = pd.read_parquet(file) + resource = file.stem + resource_type = get_local_resource(resource, case_insensitive=True) + + valid_flat, errors = resource_type.validate_fhirflat(df, return_files=True) + + if errors is not None: + + valid_flat.to_parquet(os.path.join(directory, f"{resource}_valid.parquet")) + errors.to_csv( + os.path.join(directory, f"{resource}_errors.csv"), index=False + ) + error_length = len(errors) + print( + f"{error_length} rows in {file.name} have validation errors. " + f"Errors saved to {resource}_errors.csv. " + f"Valid rows saved to {resource}_valid.parquet" + ) + else: + print(f"{file.name} is valid") + print("Validation complete") + + if compress_format: + new_directory = directory + "_validated" + shutil.make_archive(new_directory, compress_format, new_directory) + shutil.rmtree(directory) + print(f"Validated files saved as {new_directory}.{compress_format}") + + def main(): parser = argparse.ArgumentParser( description="Convert data to FHIRflat parquet files", @@ -637,5 +680,27 @@ def main(): ) +def validate_cli(): + parser = argparse.ArgumentParser( + description="Validate FHIRflat parquet files against the FHIR schema", + prog="fhirflat validate", + ) + parser.add_argument("folder", help="File path to folder containing FHIRflat files") + + parser.add_argument( + "-c", + "--compress_format", + help="Format the folder is compressed in", + choices=["zip", "tar", "gztar", "bztar", "xztar"], + ) + + args = parser.parse_args() + + validate( + args.folder, + compress_format=args.compress_format, + ) + + if __name__ == "__main__": main() diff --git a/fhirflat/resources/base.py b/fhirflat/resources/base.py index 59690e4..8d1f47d 100644 --- a/fhirflat/resources/base.py +++ b/fhirflat/resources/base.py @@ -88,7 +88,7 @@ def create_fhir_resource( @classmethod def validate_fhirflat( - cls, df: pd.DataFrame + cls, df: pd.DataFrame, return_files: bool = False ) -> tuple[FHIRFlatBase | list[FHIRFlatBase], pd.Series | None]: """ Takes a FHIRflat dataframe and validates the data against the FHIR @@ -100,6 +100,9 @@ def validate_fhirflat( ---------- df: pd.DataFrame Pandas dataframe containing the FHIRflat data + return_files: bool + If True, returns the valid FHIR resources & errors as a parquet file, + even if only one row is present in the dataframe. Returns ------- @@ -115,7 +118,7 @@ def validate_fhirflat( lambda row: row.to_json(date_format="iso", date_unit="s"), axis=1 ).apply(lambda x: cls.create_fhir_resource(x)) - if len(flat_df) == 1: + if len(flat_df) == 1 and return_files is False: resource = flat_df["fhir"].iloc[0] if isinstance(resource, ValidationError): raise resource diff --git a/fhirflat/util.py b/fhirflat/util.py index f778bde..f62c23e 100644 --- a/fhirflat/util.py +++ b/fhirflat/util.py @@ -70,8 +70,13 @@ def get_local_extension_type(t: str): raise AttributeError(f"Could not find {t} in fhirflat extensions") from ae -def get_local_resource(t: str): - return getattr(fhirflat, t) +def get_local_resource(t: str, case_insensitive: bool = False): + if case_insensitive is False: + return getattr(fhirflat, t) + else: + for a in dir(fhirflat): + if a.lower() == t.lower(): + return getattr(fhirflat, a) def find_data_class(data_class, k): diff --git a/tests/data/invalid_flat_bundle/condition.parquet b/tests/data/invalid_flat_bundle/condition.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f92c847aca2ed9fa029452739b5897f118b8f962 GIT binary patch literal 14360 zcmdU0U2G%O6&`y{b{DeCvh29d5|-5L4oj1b$99~J5j>2Y*s)2ni8t{dMUiJbPMnO# z#vaFU0I8~~3RS2=RaK!rRI3VARaGG#D6J~A>H|-R$7-K=KovrLM186dLOth>XFMK{ ze**EcHLNpp@44rG=l7m-v%AhEytIR6>A4l!PE!{t%Jt(*6h-B#<+_}o;wsgmG!>kg z^08h&P0<%==M2zYpvUO3$$9GfB`Z3Y{Q1;XWPM;C|&DDr&@_3)E1 zT@MTA_;S8^yQqk>LOCxwq+(4uElNZ&L?!Rqogdhl_Z-Z}r%)+__8SbyQdN`ml%}fl za}NXP#%@^>rx2Tk2Rz)Kbs3&vnbF#xoYkdtwWUJrdy{47;}re$mmx*0mWpy&D(^IBccq)NBxd zj{f90WjkeO?mVh%df}V~-O`^N&UYNn-#eJE#+h#(Mt{1m*ez}8q%Ui(_sI3rAk(_4 za!~XXeHG)nm3i#fPq(IA+5=Eucpmj|%6tDv>3QVmHs>#G%o~@PdlM)}Cl^aZSLy2-qKk{?VW%ZofRhix*HeHq&jQ-{P70Ul z-30dTlLoM&#+xL)4{gqmY|PzB=Bpl7e#xAVRKLtXTRD-dv^T!VBvTd9gQP=v_w_zgPNsq>f++jFt+t$T$d3 zvCtDF8Zw0$hdfwHedpRxQd?ypyYqK8=O1j$dsmsSo<^y)joLd}$UxEO>5DU1ypf{J zWwXKmPU<@O2@p@aNU>qFG54OCq^XH(W7LG}iB5Y|M4?zHiusMYSZxyLTz76VKYiwT z6SoOq0RJIF$i-*qaV>Tn<9~Sp#!LmG%YmcnHy|>+0Rv-5|KdlJw7pdfo-4~WQCWmT zce7X$37EHR&bMvMZ=YrUbA7T8PYdOkK>pPdB#ycq_>CajZzhocoN0p`uZlvsniunk z(}3~wY39Rez}RMDVrhx-GJQo;^%Y#Y{!53@$+sq1I;Nmu*P1oclI-3eU)wN|cI z1#z=kQK70{c6XNf*(`cln~Ra5CD38hjSowdIqNdISGv*H^bBR40;KIuH^1`hGB2E~ z;ki;&ntij&T`zNw?U!Bj4Tpeasjmz4CC#vBCLI8KwJ7Dmrsfpvz;PG(wv_GTc`Mnh zbI6-E=dW$d7Xjvj0Lo@$Js2bGPCjP8bDhy<+E3uw7NbCCjO@F?N!rnh?8mmZi*jB( z+?K#P(l)!jO(N%SHs?QV%)1NBH#aBS+}lq*_OTLX;%j$+a*fr5U9ps--4s23&EC~r zUZYkf0cF5$=tQtKf)qNo@;uHsT%fY8S89f3@kfJ?qOLX7JZ>w%?yVH z7PjlUVRs_}v@0j0SZDPTr7hBcvwPTG0csyk#}S&Qz$pDrFgT%rqVpiRHohW(ZOKeW z_SPoQ(jB?7t%1&Tatwjn3y&qA1E6#V3+1#=Oiq0L*nPb^dC=}Bm{Hv%{2w>Yqi%AU z2da%^POkrBM%kS)4}Sl{-0x=PWLpqsRBZz@qEeq3NnOr7aFs7JvU-)dpY^2kqe-Wo zIpe(hB>YBXV#YOA$;tU#&2{hbSKVqME0m>rNv^q{n|jIJRouE?o|$s16l&co62e#A za;_x861gr(KmmQXl;7?gQ#aaHpko6-Gf$9coxn8O?^@%dSCkhIVmqaC(&nY<| zf7OjeTpoUpf@`-nBR3Y@ufN^~(}}`u`rQjM>wsiM@SX6X+^^37%$CgVj^BE8@830T z+8wXsUuvo7P`e%Cz@)w7lMLbxYhab;7&$y^XbiGLYVc??gPZ^NteK zq2C#6w&*m|VEWOc?6^rCq{uE}6F$CNmXJqh($_`}DZeYBbljnK!|0keZ44*Lu%}Je z*2bIB?w!rEC%SR_p3`dM&RNsO-!Y6k?i;%|@0vF4ej?$bZ(iWKXeyH z-5IoQ_59R@km9!nyWUk;Hflo)oQtc%-{|ff8~Uu-oPw7 z%etZN&<+p7q;J-TbCheGOC)Zko>T7+U2i|@!H;ieo#VI-_=BGpIc|;%?Qju}+u_1H zWGocwm~Y_VQhUAuYw#UDgbSIEaa@Cgb?_6JEbfP44&FI#Efa^Qo3WBq&qkATY2Vrb zznoNfd9`?}7~*46u#igy_Ot2T0v}EKvZ=$uY9v^QNo*|^j_>ilLoc5SXf%@0jLpX) z3h)c;@~JK4QO~CiYb#-{m@P>XA59eaQbft#2D)g#%SRi?r;&j@GU>HKepzaexp3Ue zmn7hykYdHg4)F8x=};i;PXvT0^5u%WFaAn4wZ^LRUMU}q0FT6OY_Gf{HscasT06)_ zK`z7;jwh2#8Rb?u9xNm~upc3P%3mw$xWe(>Oew-ESSeV{m6Ch;Fqv1fAP?|hGrGPGgDYXQ zm!@B?m?;JK#pGHgAKj`Oc$adiWGx#B)-xc7$~%6LU6wX;>4daGbhAZdvckznrK5aA z(QS|(5#BubTuavec;$p7E^qY2I}*M6&3peIUsaKjG!1A}mYBoN|yU zRiunx^&>u&WHYG**ltPBQ-#|N%pHc9!5p$xAD~Y&&57qTkfp{iL*82M$WtW8f!u61 z9qW~!x@fH8UDw?S#yOlB5I=#VTq;;!kweXFdJVBi8>#pajPGS{_qYAA*57x(fV>q= z)&-w*z!w7zp_Ei*+KLnfUlQ`gAfGwbdMkgb<cVnd68s@4t>;UmkE)qn%61Ce%RR z>sf8Y+xKiS;{D#v>OM7mI9^EmwBF$mbz4gxg$n5=sb8)9v%e{6fv=$RpNV*^2>PJP2J<6k8?Dm*Fvi??G&^VjIwFg*LF4THsbWF3v5& zBMa>o^jGk{wh@M>eQdje*p{szX=o0YZjd&u0q@}&^4r8O(zJzkrlA#tmk_s02FX0> z*T}27f7%;_z69#xNU)YoXC*$e&`5h1kb?Aku<^3m?~z3z(hk13dDG)Tnx$1}Dq(9f z+U^%aq+eXasV*PI!T27m_hdADA)*N`gHr~4gdg@9JYm1c5&HEbj|bUrG`5f)8c+BT z0C`qi?su6G|xPIXA9BnSN@JGld&>X`@#g9*_{-761D&#eE1oT0{ zv##p1g_kTc-~~g0yl{r|{$w-bOKxF5M%FLk`sr0|eOO)8YyBSd0Q=cgVhM9FS-*BaRv#5Fr8lh!RqFUmA%eX)GYs1t%KzeK4@A3Heg2%E~bP?#McTN%y>4`zN z6~r^Wc|1I;M@YZw0oOy7wERbaXQRIpQ({dE{)4?j6X&OKzoR{HeG+dieu-N685ID_kby8Of0tAX@~99|Bp`JsUa4oQZZku`rc;6Vxw?-;pEDvtH80)ESG z`4dgJ=abRDKyHJ+UU-3WO;vK$SL&j2L#R}4kPFYL)`jMjdesSkQ4Ic;<8l9|7N3QG JbOZk;@PEJvi~axr literal 0 HcmV?d00001 diff --git a/tests/data/invalid_flat_bundle/encounter.parquet b/tests/data/invalid_flat_bundle/encounter.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c743369b5d0faec6707d1b7f0aed5790d57a031d GIT binary patch literal 18309 zcmeHPeP|=+nIFlH*SpzlHrpLV&N*Afw>$B6ZEHrdYEd@A;Zk zuQY2#Y5kxo(QePM=U-f&AG(+bGwjT6WCCDOw3~v|g0+1j~?`3?(v5yV0 z9QhO=*C3EzroA0_-CWS?=2##iubSZ%#0nO4I&jI67qfxEj^!w{-r7Ve4vbtIE*NcsB1!uW%ILNZBkJ`RQc>~ko z5X;U2(kt{>2h!&b<>2)>lIUy!$CF{ACPl6Tu7L~8`)5pD!{>^MP_IuFE21>0$o0k< zNs-Eu+8_paWx(@$H}lzJ%*TUhfMWq9gn8p5wjJpQo)Q|Lc5@B^%Oy zqt}vuc6&Z`Gj|_nK01q*d~h_jW=ztN1mq>g;PWJLS2YXzxv3|p=^&=E@npIuHQeK8 z%|WmEk%Ku}q6|FY9=J*|zr1Lw>^)O&7Ph6L7A(JUdER$1zk8DT-wUHNPRwj!|Jp^r z1>VsYHVD^|XMLebj$P+wZw4dJ zQtN?9c4m?btp_+ORga?7_z1AKC-$CeOExvvOT^DFw>7^c6dO%JS(R#XMVzWPgqmiA z&)l95-OM{*VgB?K`uT{0Y^|cu-!yD8^UYxZ!}&vgM5o(_*o1rbLKJKe(w`Bf0<4Y7 zsFyzK9X2?1Yxu>}Z4EDWXeg;#N&Lj+`5zZ^_i5&J59(*n)mEL~yXbF&leOqn?;chm z1G6m4sn@8h)7%_8H^;JbK6TkzEq(OWqg6|j!I7%v#TTsA(mimOV%`}wtrGrgv))XJ za=lm*YFkoNu2(B{xdBDUp^knq;Q6zg`S2O$rz3sq=)FL-HS*6~^p8v<=ieOz2kSZQ zGvSIpL6LW%mTtD43E)^5{}9WKLrfk-fAf=C-nfAQSWEp1r7(s1vB=x_t&0pTk#p_Mv{vCajGGg!P~1Pp)NUuDHsph{ZFqkKfc;GOz#PytvO$F zVR6$|OABv*6~nY9O`5so8-EFktGp>oHQ%nFG$qpWZ>c_Ea}1Y&-7BT99x~f|1?RR` z3f4^#Yys>y696xJPdWQ`LF#mF50ImZUU60|fpZwx+%02KZ#k|Lw z7J2tcQ5LpTD0}rgA_Q3H;_+;;0;L5DV#iA83_LDM)y5F4HX#%{#s+Z(3X9J3ogqjKv ze06zCtv@?XJwG!}4Og0t!VQeCZZkcvGs3Ar)H&P2-uqOKL}zRME_8C%JOdh672$vw z^xXl^f4iC2rkT$IXwU~y#@6mL^i5;C_*bQ;L1BkY2^f~4zt$0dPdku$od$sWR~Ea2 z6StV3-aI*L4%H9p4XI3w`5m|Cdv4~vIp&XHG^P_yX9a9c=%q&u6P};IY!1(H&_dXd zjm?IQF|YZl$0~2WbB;o*y!m&gRo?EKu>;Mn6z-p+KXQ3~=3;*Ex6DU>gH|wsSRmQz zdDlh%BdkxfvxUWv<555!lU3NqndO9}leVzB?i+`SfkhU&fzK@3@YwvUw+9GT1nct+*NvBCcJ7MV_uej;dvV{=w!9qu`3 zYNI&HY0FXJfhi7ScQ4ukyFPpCdjRFYzM zaHlU251TopMHt!MS-oy7R;uVJ6)%5%>N%YX>SfOU#e4{M{pv;oX#T;<}Z7b@3|ZB3^T)?Z?l&vYGlGY zR25WFsCz$r>g6%`qF&m+Q>-Y>vRWUz>HGROCVXSs?Hw{Ebv-4wFOR80SpxDg>nnF- zFcrR7Rm3|^qnhA54R}8fCpT3YznjB(YO`EDFb68+-5-2jmKuU6Gz1`5n~L)H6TY@= zS?ralJv&}G?f2Mv<#mpA$;G$czOF0zjr-M7lYl!hC+Wu6Nl4cZU3x`Z2D;=vpjSIy z4kE8V^$NGAIYAuolF}h4bc$ofV7yr1{7R4wtfNipLV7H*YQ6|zkJkv)&LVNdNhg8V z`4yI4v-YQbcNCR*=z|vW zHd5moo_^S@G$)V72$asu>_lwP92Xk0(x^#7qf&$J{_>b27ivQ702c<;1EE%{?4f;j zr5e6thEKWB6g$O^!5Uyp$6y%@Oa*X^@}B2Ysau(w+7}w$Hz)n%hhO7)@)v&bFY^2} zAKBt#Jio=yZ{Z_866qM(){vvA+XOC zG9m3*ur3$Mu}1#((hQ6p|LF}u8-Pp3k~_=cRA=jD7^Q3~38E#KX&#_WAq9CTF@|-M`)7#=a8E@pXsS@yDbBHG%;tKKo zxx_r`8!8nt8=xaE3z;=1T-ChBXD^ot?c}n_O>t4#({yQH2O$gJd^Mlh-^m8rcyodMQZcyF zwAjH8UtP#1*-S9q6f((O@U3_PIN5x*r0eI5Q-rwE{qeAIw#1Cp2fY}J!L&7A?C_n0 zpG&!5ipyp8*YerrY9X-JECy4hqUsojed!-op5!2w#Z0We5l%uC#dN6q9pnS)1^B#I02_#LGS<#}HBy3}P%6T|P-xfSeXv)mVAkq1 zOX@4syhd?RvDVPtT)C>`f?8Y@GHEuKNtML-0`?xL?fdmj=s(iIvL&vdmc-Ld9V7UU zgPtU=0nS$bcK^DGwJHyNiuhawpJkxuaS@1#q>_LhXT*SAd=|jhVyt1-g6uK&pT4+M z=(mfGbpggwNC;XB2xDrcDPqp{&y5a$cGLHXtz*e?ecgPJ&&1e5a0%-ftU33qIdvKO z{0>=nLM?>P%e&$MS*r$MEkJ4$S;uZKaEWccxiY^D>s4NE%%^Q=NI*Q49or~F8!DIMHpF%z~@;$$Meg49PW3JHi7r6Yp}PN=OZvb%5UI~1NIU; zi+h7L{J4TlA{Dr3`-PR(wBH|wyM+~)2vbognWODDaAU#qxA6k`Q_2TI*k@Y_HI4O| z#6Cz`xUuT@M^;t8AK9;Mkevzu!y9tN3)!DQ4(2l~c{aPc?e`zFLJuIHtgC_9&W%mK zKYx$%*O7j@OMbLV|0sb<9(L%Ddnj0g==~FLgtqh92&>01nWP~9xphOo7&=08s%~FG zh4zU6pLB!VCJ>K5jQZd}6deir;f2^wn_9$s5)W}p4d7rO1^qC;7Tv@7E2}l#A1nk0 z*(}19@FQMLAuo+Tfd;irDQNE6y0Po`=gj$g5J)@*8vGCf0)8H16 zbblsEJYk?(@Y9w@_+gCm5%fE(6Tl2FMnRsR;KhMxipaCri{Pub zIT3zc`bIZUery#k{o)s+U@;kjwGgqBAMv(Yv%0{WOl#`(MEe3tmr-oD7s z5|1x#173f{z#B~wc_jWU@ksa)SI03gNN?pil()Yg?#OTZcp%VGUtI410NxMvWNo!& z#4nMD2O+w?Br6Mes|BGZAMY*oC5iKcrhEtgka%iIu(u|4Db@nKH;O19xH8!BD|!AF;aB(T^`l>W zS@L}Y>C>x0-QHS!Vdhxl3l6WqBG@`2eb=c*+!B7F^|}7}(G_otEug;h4 zi{@~W~jm@Sn`k9Yo64bBk$jI zTd{~oH2L?7pLYMuB7LX@sY&8Ti|=`opIZDNJd&i5p7RFi6QoA+%TY}2wJv*D>L)VM z`hU-PftqAt{|(JNO6(bd9Ozp=(RPbTPTwQ)vxxrya=;q@^F&@d{>dT;&rP97>gQx+ zAB3z>9%s&X$nUQBT3ClkHx}SR{IACsR(5NDCH<15UOZWc{bWnuPl8Y~pEc$mEx+cA zNFVI7kJitKzfin(lp9yT_E8?`!3a>-ws6S+^@fD}TDwUs*!!E%MY_*v33~_yVA+;O td3{x(_EJ-7OctxvNpkS*(+{?N+L1TBRts<7-}fKtkAC=9(C{Dd{2ve3Nw@$2 literal 0 HcmV?d00001 diff --git a/tests/data/valid_flat_bundle/condition.parquet b/tests/data/valid_flat_bundle/condition.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f213484721ce977ee26bc82d686915dabc7ac32c GIT binary patch literal 14975 zcmdU0TWs6r6{Zx`Nm?h(vQU##H}#x^^%}>z*m9&SeW2t>mTD`uZ0pt)hIp|}o4QmN z%W_*_7=~dOilG>aVc5eE48<@E!_bGWD~2N2!ybmBk3*mOPz*(}hdmU-Fcd{m?3|w> zMN-tojg@9fLXrH>fBx@W|8ssjD{RtBJ7^C*w?x}%>O4iczITD5sC=bVFA5WExgyCE z!RZN~$2&_?^m*Dj4Rq(|5qe~No|?K~Mdz}73rtT>*x_oo+3XbascoDd9rIHo=f+%* z=ec}MEQrP0TCE}#w`sf6?)=E+{HKk{FsF{X&e${vMA3E%F6ZEjqTo3~$5jC@|M)3* zw^OT?Z@S&pVyPesGg5J@NCaFj{sJEJU}!+k?RT6 z!}mutJp|5hC84<{)x;UTB!~`Ks@6`5GEods$sg>_x9!ZE4(7cRsFZ%|^#^3Bs&RTk zRn^(KM*(zWrzDFLh|R(S9&L}hboVe#Z|(O_Yf`$}Qr5(S8WHRboAdWJ=EGCWU1Dt= z9|KQIxc{)xAKI4Cr|8R{hH7G^AQek;X}dYIBj1=QRkqz> zQ2Sc53cl#(OGQDdNu}a-9(;4VRB66ku9P^i#&xvIb+A%FbrMHAAmS}2IR@@s&8Nzt z`O^tI`jg{??S!4V`?#j*z!?>~r9U~GZ#bO4b}*lhGG9K5{`5exTiVh|UsPT1v8j_F z(~6>UQ1k?S1>?GvdF)dsTSG4O2B^>7LOqZ6=I7_V;C`yt=+fv!zd5eH6tHk-a{2SO+R~rK zoMPz+cIR7m=eu_16Po$*0y;w9#Rh<5sVmq%R9&5(cmyCv#C@@1kyRcc11{wY==8bd zY z;~>16g`ObMkRi-C_*S_BBHa@((1-L-o*Ad@tzz&@saO?j z3$W>~O9hdDdClhhg^l^;cbI=qjrZVbp&S#)KUsprw=M@>5oG(x1oH3GZIGiCkuOyQ zQ9ztJj2~ZP-n|AG+e}O>EiqoCFR6+=<2en>hSlQ3tkHRD>a;fKYTufm{?4sAp$ni^ zOZ5sbt~bjHRK?5g%`iWlK`(1_(KECJI%vA_VTm$lT}F3HH`<)8p{!AWw4LeZXS15j zfiqRySEO3AXLh;gWj^rq$}ZZ113fb4uHKZ$pYBaoPr(r-bJpKvb{HN zC7X2)dDZ58-Nt<4XWsUsYVB9Zbu7$K z+6(5PE8v--mEFv6N@M0hmafkKJn6JEr=0)21V1ktn|6(q^F<+Fb^YzaOOr~?%$MYP zp;(=~Ir02tM;$%+!t}(XLZOt{A|ZTfvY0Q3Fr`?RWuSml30c@QzEc+3tU*5ugp>86 zgthP_P#5cka#I^9k&Xl37sOg#$k*~f{?a7Yivs+V3FmICN3I7ZUwNes#)x9l^ph!M zW`JZxa9{XP?pLM(W=rNt!_R~|*Y6lMt&Tsec)q2g7Y5W(cMMn)_Q0SHn=l;BLLIAX zZH8)QqXjyKO{*V^2DC*~5xZbehlheeJ;3j@0BVMub>Ps@)>efx$&PEQL#DOw)ACwt z*D0}K=$KdSc~)%!r7yh+-?4b4@dOvtp`EC!wlJEhKmG7g8qS&fDYApugpVtgWaME? z`s!O<%I`=hh7;ya7#+i=g<&TdbhqhPT7Pcbxw3imSQl=e0$VL?95pR`pJ8k`jqO~# zW7xF%v4o4Zh;?*y3=ai^at3Olf%brD-TTu#x?e}d(pidi?io_CHa~|rjlre8Hb)n! zpDJR$R;rL1OP07wzS6`JtK7_2Dy0T8gEM*j5g@+x`e!^do=K>5)a^<)=$rB3808ve zlgT^jo66y+>tD~f@zdB|Wmy*g;paJ)onu4WY=mXE+3+@b7YZ518~F0JHXg${_zWMy zg^b5ow!y+Y_(mq7Eb@Y9pxg%f(2ZC@uIHkuxr{Hd&n>2ETya^tBZat_9NfyM{d>90 z&K4I<`Eu!lt>s8?D<*rYv2c8s^Bs7(v|pu>`DScB7O4S0{|=YlKpu4=eNbHrvr?`g z%Um?M#T6p8+#1kD{a!BGKt7EutdY$mwuD8wLB_&yFISL(e^QP~jcwrP)$i`^#Bha1Ufexj{GH zhL{R^#7sy=9I--B$`?|*LYR!#av%@z@MJZ89R!!cXfIX2tduPT_rz49EJQczI^KnR zI#ta@g7qxOq419H=N9Gld?qO`5#4MMnXGVfQTb4a)HEAphJ;rDpKHn58*ecwi;JsW z@eW0=KJ)=$pD*t9(Q9G{tpU$_< zG{w*kS4i&gUVY3Ql=;~tr};q2L;Qp*rfU46tj{U?*+N;)&MJPyrBj}4ItjMBt>vk$ zwFc%6UCdw(*{Ju?riIPKXiE4h`(!u25*L%O_a(}lZ?4WS#$|psBxijx=IFhR zOiG5F*wx<#oonTXu$S#~%HC(OH^%olUqgxQfnqlZ_DOk?uz%)K$uia>UH0&OkX06L zV=hQ-};kEqc6P<6h0rru22G zg?}cz`n~nR*u6(@5sq(Vd}>c~fHtpY4ntd+CaJ5f;-<_MSAB`Dxks)G3t7uvLx~%& zfN_zXg?P>+_jw6=4&1Awvcwl6O%dyN=ySw$q{$b8vr2x+K^`SJmh>~r>l^ju@LVHS zB>hPnBlxSb&mfOjb7?mQbnzgzZ!DXIf1wubGTcIo2TfiGdcP_*fQt?61$pfC_E{K@ zvI}tAfIsko=QtaNn{8|h=df*jZWTyFWa=G!PLO_a4yo7iqBNCr*PvB}hY-6%URLo+ z`bqN0Ch;rd4MN`pb$cXO&1G^j7YQ^n-T)4feiRm7RQgeT#Uf;Qb9lq;Mw;6Z(!@gZ ziw}5FK0JJd4u!PR`?1k=pue`K;#p9*>>;g!hfKvu>;f*ZsEdCkp5SFTi}QUe`_O;G z7SgN%fV5-JshYoO5p-#a^Lpq%R#O9;JVWcwGlP?!BYoSy)E;g%xl; zoxejc!l$ioVoK08*xw81ATp1kxPtNr=G+h}@DytKD~T^%{Kt_K@$oh6O?F7Y#Bn{} zI)oaD?^e7ZrS7~+iFf^c(jOvG3g`Q^`Cak@;t#=DNUDZ&AfLc`*8(cuzWE{A(HjGk za0BNDO#14VAACrEunXqV%S-fYnIE#c{$0xtEX24wxC|G)zDdyN^li=$9;6o_#`X5? zh;M`v)tw2cKF)^raDG{vufz{AiY|T>e$hTodJd q<;A*KyUv%(*U2I7MC*`tLOH&L-{ON`OZ^Iesn4E)-!q2)dip;D@h>v~ literal 0 HcmV?d00001 diff --git a/tests/data/valid_flat_bundle/encounter.parquet b/tests/data/valid_flat_bundle/encounter.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a1cdf7a9f8f827680d85e0cfc67d93aefe31d848 GIT binary patch literal 26792 zcmeHQU2Gdyb|z_6v0d3w5|g1_HOmUlGz;6}k3^fyx?2xPiK3z(ind6R+0-RDBXXpf zA;TX@WD;NzEEdKh2=Y)EMiFcmi^U=j!6FYq5EOYRieM2GMIRPLk%yot7C|41pa_C( zABsHmoIC%+ONz2Z$%~eV$DFz6p7Wh^&pqefbB9iv%>>LYpAAH#flwe20yUp=PaF2W?s&m>P#^Ds6d?G?2pJm(xt_ui`tpe>_k=P+ zkxNBR5}Hk+CePrv*6mumQRZjLHI5$>g=XssUgRZSRt)l?%kzngd1st??+>Pq2p`pW zx7-QMc)r)CK!>;}$U?a&`qt!fO{`Tqz64)~VIs(S4L3E2-aD|BYOr5V9B8l>zu)RL z*t;&zFI>!jKj)APQQ`QS@V}t$nGY0HQ19q(;R0P?C z_@_@ZzkTY^h#?RygGDhsVnDj&e$JSw@aSB?G{>9{<~_F_UCcAA!L#~oqtCD(cuLHd z7nomP_%1E{^$d4|CzH0tbCmFL)@b#bz60Feknj_)ZoPhh=^p-p)AO;DdHY%BHy&ct zJ~F9paMOKD^WNuLmjKsozOgHm`3(PR8@$agbsBRa<(iMo6SCn z>;F#D71%jZl&wUvKk+K8?*^_o+?|I`1_5psAH&4O!)EHTDjGcC#axtHM zS6B7klblej$hD@>yvy-&)4I$!Q`W#(lIfWSzr{CTD0t%=#Pl)Ge>**2Ihl`MV7~ZM zG|j>As3!5cr!C)bqhLUC;Lh>UpYdQ+yoq0R%gX z)?=6>h?;LZJ@=i=-(F?jyozdS7^uqp!2K7RGBd=N^_=6=E$@7GF(EX|)ncQ<8&4_P zK0_h%K11P1x5vl9qqN5poY1U`#SSsaeV6B57jr+ryz3{EVrZaLqdjCq)(v{CQ4@KJ z{f3NeRzAZJWz9*R>da z@h{zAjrqFJgbB2^6)!e;Kfhm;>ST=U%VIZ`&`W;|e9g;;|GBA~*L#jP3j`avJWAyL z*Y7cJeGhF!XW35@Rm(~D1x?G-xHE#!g$MFK?DbShKd2nmWeOBGO zUxpnG#AnW1U{iWJP4TAR=@r}c+OiXytaEA#?RLcjN@3qS=%CUQnB9WY38reh||WThba~Nu=8p* zIv1IX9vS=K%JiP^tPfaFn@p&O+d|-cr^6C{I(HK7sz;>G=mI^Q%qfs~oCr7Rp?8n z`#+qTN{z2yl>yV8D(};Fa!sC>n!Z-e*KBXWZo;?Ks7bz(0I7{{Kxwfq^KGdn3q@a1 z=6tZ(fMi%<$`!s21KD7G}e z#mtZjv?njDH7Z3}==QHQpSMzYZTB|Z_qGk(Pd1e*V7gJ;6*#E7Aao@`+!L5js|el3 zt%o~RiJrAXS}E~hYQtKGWeE*JCd}<#%bgAWk0k^C6U}yMn=dO)u2x4y=AU*DwuOg{ zBNg(nIV~8Ojo@1GFsDd*pS}r1rvP<(bK2K+UFpb~7Wmk}eD-dQ`A|k1^jjM0!iIoD zwN=ldQA)4j!I56aw^eKSWI={{N3qkSq_?R-^xlE3RQr6;w%doE_>?oEubRwTO|;KI zK?lH~+QqW61f=9+cXxds4xJ%IMWZ?R5^sA z=Uj^l!hXs`RRs;FsUm*TqllTup%u~2Q4DmZ!vHE8x?$B`hmjA%VXwQ=1Ol0+yn77C z(5O2fMD8NwdOccm3!^S=XpKeQq1dU#--Bq!6%Qn3VY6Fkby4#(wNht(WUWtWEoZ?I zhAQ!ciO`o<3SAb|7QSwPKO-56)on$ zC<^w>=*#KMXiDD9V^vgU9*Yt#bDSks&-*WWT+C_DJ2x*lz+bUUB6G#2_f6kx(@Jd@ zet*%Hb=viT=h*W6iO+WCxov}%SDNYE^=EGfnrPGMeeTknIh;e9wY$a zbQ^x}0WWV&1GU_i>K(nIMow4oxx}}MT(MOI^4F%}fR?;KiG4TjEL@9Bzww3v#)@Lv z@_QfnRT6TuV4_mk5Q6Ao;i1}ZTm_V>%2xK|2S*1$&}ohmg!bM{&o6%{U-Vy)P^K8G#VL||$#Hl^K_F?gHeg3G z{S0%OCDNzkVA@fj6DW%@<(W;|BhCQj<pgHMCunZQh78aAn#cbghRGa+nqcqiCQW-a%k^7@tc z-;#d(Tw#kW%dW#8{Jg}nb8M`_F0gEcjaSICSj^JChY#oV_Cy8y!aaNl7t)?$**zBe z!QX_*IugJ=d{RIayOEN_b|IOa+YGJlmKL+ElDs0U39(X2jBXWk;hn-}b*q%jh6=g; zt(Ar7R!R&sQ}LzkQfNO|%7rx=vD`^rPc5{7U$|PzZ6J>}m)mbH$628uiDD_4*(ym3 zt-@`fONN7`I@H`^3+ zT=Au9el`=#=l0hNn@jalXuVwke2Sbx9c-|(8PfD)g*;&3atqCkWVTbxMR%5^Y`YLo z_0tP6K9o)+Iom8P0`|qhSj)4SYFSHY^6V;%$y__1Lq3^1rP*vJAKGZ=L(v@-C(WOlATyAg&9;j<^ouO_ zFn-l<3_OFjME`C%B<_}kFxW0w=N49Xxy_~R!tHPX=AGi- za;#dCU|fS{+wX%dG9@k-t`ps}0b)16*eWlI=DD<+hhJ;vXBD3)<+1^=U6o7D>+^H# z_8!ieez;)%uLd9vKp(_OeBmHnXnkno@<{W1nDHphF4YSmvE6_En*A{fbC|2EI-UsL zvXqTk{K@d^gKXX@Y*_rsh@S)ervP&r;_k@igI%9P%m=$ZCpaILs^w62=T2y@Uu+T} ze~E?psN!cdd&^qvQR3Ut%+u8-$n72wH@8a=)61RkQ2sEQc%tHzGa|p(H#ccGPjH+w z^?Z(NmmtSLtlfdU-&J!8W$yK*rNs>;|Af?hxp0)RI=ctC4Rcz$u(Mh%OMUaFSWb#v zTvOc%6%zT)C1o9QY`zM4Fqv(uxvm52D9k6JLN+FrC2_Y94;#-m^feU4?hnO>HYQRj z8Le`Oyt;<$U1Juc{oVZLdT8|!>l@73=r?42Bf~lr)7I5kA0Y_b^Vup8$XR1aqkw@8EoD#(Q!(72+Ml1Ki()U{4){xd7{s@OEJ{ z7FZsPAM(KhwQ#PitRybHCR6cR<{S#IMliyteIfl z8SSWb!e%kI)Pc2dnAEf6SQx%{O0#7$XJD<>QP!(iLrm?j-k#fA-ev`?mzXeyV9t^C z9^tW_YOe@$dnq{p_0>H5iK#YU!LL|!jbJS|!tP*2b_wn`;!uSRVQuy$tkJT8I9xhx znPr#u7O_%`-NMLgsRE_j*bdfi7qGy)vYvqZJd}6Y8`yPa9lFNz$Z8GmS1UjY_dr^} zvT%ZwedqE1Rp7zS6Fj6|oJp7be!N}BnlQ^Q<3%YVlW+6*6l%Y;dN2}zU{J7vSWyS% zvw?rbQ_)?&AHKvAF~XV@p_TeG5sgM-(FW|I$*1vQ`c4Rn;&WR6SOy;wqpJ05a!;5* z{uWDwJzI`KDSb}YXOEyzKoJ5WI!U_17_c!?&s1|i-mhT)`Sg}*j}6jEp;rW8SA~fP zYkeGNmgSJXyFKgoN8oOLfq;VIJ>K9&`M{X6u`ZWWYS-*i7wDrEl!pzfekS{nUTM5S z{hi&l;GYnWpx(@`VE-%Y2lR(=f&NmqtJF%f8(6=^XKWh#`&nE6CN?SKgWte) z;Dhr9@?U#dE>z)+fUNJK#x9l@*OeHAM2YP=;1|*OspE}CR{0TApb^*vnsU*!_EOdY zNZGuS_WM`T+lR8hi1a~}Pwc+}hbF31na)F$`L~KqR>uKIBmG_? zY|#&9KYSE=?5Z3R74e~UA8Om1X|`VM0>3DG1zZ=p45@E?2kDCh5A<4B<`;1?C0-Cy z!QmBftl?$go{H420ecwvUEkL9>ob2)$ygn7UJKc?Z1+0&+q#CI-6s#{@M;<_3Gc1; z7;0Y*@w24jNwJIRhU$+iWRTX7sVP2UAZM}Gf0!Bv_F-A!iV`2=Y;qg)F~tuEs_-%S z1I8Mtm)Pn!NEAeTVZHw#|0nST&c>>RP&NUBDY%$NoU92tN{vlj@lu$Z4azS?o{jy-MVfB&6CKu_W=LaH7gz z!~R$HH2HnDnIELi zi!#~d9i*?JJh;>BV;WClA0z*Qec+r4<`P{rO2JR24&)aJ)=_wFOL9{L``^&}tMLGhVxA8qf5Sep05!xc#BnmzWk1BH zIbtuMR^t;r|C9J+sXK$2RexS|_a83)t#m=&m371ma{9$@;@Wu$>QFgYmKxukaP3U;=f zPm%ox2wzQO=r|<)g)U5;NX4(jKg~bMAe-hd4icM0e}prBs4;5irS|h7)$ikFWe`><&`1$Cq3YwK^onE Oj{izI9BKG}6#Xy5HKd~e literal 0 HcmV?d00001 diff --git a/tests/data/valid_flat_bundle/patient.parquet b/tests/data/valid_flat_bundle/patient.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c27c24b17f8a8e3918050c6daacb690e2b2e141c GIT binary patch literal 3911 zcmcgv&2QsW5O+*AZ6QFlT1N@Wp^DUK58Wk6n`N^CBqwcBhqN?qYbRz=<*z1gY$t0w zNz>IzZ~}2c91!A&I3oTCjvP4h2k^epZZokhP^~i&CX#$W-CY^0;*$kfuWisk=}?HK z(m#ZfOf-?FBI}8G)Y08;-%<6GQBP;+7#;g26#G7u{xH3|+*M)=z)%cL0a=F^MFGFg z+?jw|O@={sp2=jFAPb{BPUsw_Uthb%L>n4G@>wYMWhniePX9t9mf+@>Lt)9Lz6=G* z<6)$8D1N_njUv+0ZB2IqNd69|e+v^N0S8NwtTNF_KElzTHz7xJRf>mE>eCn;?V6iI z^w&y~p~tmaQw+yz@5-KEsINn@??UOXR?>e&lK5!9+yE*6So}Mr$VSqd+B+VxyYVkpo~8`AOqXYB*X&!in|ieI z$8bHbd6k9xStDsgvciPl56Moe}5y*zK&B zb3-?^vssxB>&%0Zhw=Nip>{PNLEq|)ri!j|p{pJuvvg0^WKRb3rzzbAbHi@o-qVxv zy@#o10Wh;DQo;F(o@VeYXkQ5#=Iz-Q034q-WoU~Lec8>%@h9bCtZVtr#+%N;#gH?q zxu{UUmo4%_D(8mZV&EkPbKw=3o4C)O>9L74&|xGv>261ty{+p}y zM=ZO;7F%qYWm{~iMP$X|jDCnh*fZAGhPyrwlz1U}j%9}|?1MKtDV+j~JV;cqFTTfF zW?!lZJB{4oK-m{O#Xc}THj0Wqsyecm?{F907H3!6ilv#HF>G;GzO9I-Xw%okOSe{H z4RxDut9Bh^>~PEv)DriA+ud`yQuRu!2t)L9(8y}2Z{`ZFCT9DMoF7|iOk?(&V%9`k z#THzpgz==FBwltJxg~KcWx!s(YHag)wSt&fLlg5o@ZXV+IgAY$vqN1dWb{Un_~opE zAzQ+QRw8;&YVd9Fk!cfQ0{xQI&@eLw0==e3g0DwWGZt%Ut{oi8aT zVDF7ygl9(ozvZXeQ*FM>Lr;u*$dX0DmCA)c4i{{OO}Hz~#$m8F!1I$klT{kUe6UZI zT=j`09%lTrXHBhAM*H@(_xCd{_M>8zy>YEOU~fVUQ`)Nzl-#A+6n9A7CCfCG3V;4G zw##bUv~7U9@ZmeTP1!p zOBks+bD$X5!`$WJ@y@Vz!5Y}BU^}@7IjK>-lEp%w__*Nu2gc5jvokmsS$2PHhw}{Q z-64{a3WOzOmkdpusqEj zP4~B!?k}OV66lFKTfun?IN?2hYA4|7AuxMo0*V|0=lA4k!lG!y{OL#yKv=!P&?*vC z6nSEIij>T6!m=fN!M$bKgI%zESjS|55aOyVej%XoG)VoF;IE=Qcz$H}tEQSOWL4|X z6mY%}OWX=BQSUz=Q1OkP>^$k~-aWO~yGOnRH^yIb8~)c{;xYV`2!5!~qwrH1enS5P D@VFzY literal 0 HcmV?d00001 diff --git a/tests/test_ingest.py b/tests/test_ingest.py index b3eea81..aaa2cbe 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -9,6 +9,7 @@ write_metadata, checksum, main, + validate, ) from fhirflat.resources.encounter import Encounter from fhirflat.resources.observation import Observation @@ -1225,3 +1226,31 @@ def test_convert_data_to_flat_local_mapping_errors(): ) shutil.rmtree(output_folder) + + +def test_validate_valid(capsys): + folder = "tests/data/valid_flat_bundle" + + validate(folder) + + captured = capsys.readouterr() + assert "encounter.parquet is valid" in captured.out + assert "condition.parquet is valid" in captured.out + assert "patient.parquet is valid" in captured.out + assert "Validation complete" in captured.out + + +def test_validate_invalid(capsys): + folder = "tests/data/invalid_flat_bundle" + + validate(folder) + + captured = capsys.readouterr() + assert "encounter.parquet have validation errors" in captured.out + assert "condition.parquet have validation errors" in captured.out + assert "Validation complete" in captured.out + + Path.unlink(os.path.join(folder, "encounter_errors.csv")) + Path.unlink(os.path.join(folder, "encounter_valid.parquet")) + Path.unlink(os.path.join(folder, "condition_errors.csv")) + Path.unlink(os.path.join(folder, "condition_valid.parquet"))