From 69051959498e4c9888d5d9aff8e76dbfa8abee1d Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Mon, 16 Oct 2023 12:31:22 -0700 Subject: [PATCH] Fixes an issue with merging PDFs and the ordering --- .docker/docker-compose.ci-test.yml | 6 +-- .github/workflows/ci.yml | 9 +++- CHANGELOG.md | 6 +++ src/gotenberg_client/_base.py | 7 ++- src/gotenberg_client/_merge.py | 11 +++- src/gotenberg_client/_types_compat.py | 4 +- src/gotenberg_client/_utils.py | 2 +- tests/samples/a_merge_second.pdf | Bin 0 -> 6723 bytes tests/samples/sample1.pdf | Bin tests/samples/z_first_merge.pdf | Bin 0 -> 6714 bytes tests/test_merge.py | 69 ++++++++++++++++++-------- tests/utils.py | 12 ++--- 12 files changed, 87 insertions(+), 39 deletions(-) create mode 100644 tests/samples/a_merge_second.pdf mode change 100755 => 100644 tests/samples/sample1.pdf create mode 100644 tests/samples/z_first_merge.pdf diff --git a/.docker/docker-compose.ci-test.yml b/.docker/docker-compose.ci-test.yml index e47ea4a..c01b639 100644 --- a/.docker/docker-compose.ci-test.yml +++ b/.docker/docker-compose.ci-test.yml @@ -4,9 +4,9 @@ version: "3" services: - gotenberg: + gotenberg-client-test-server: image: docker.io/gotenberg/gotenberg:7.9.2 - hostname: gotenberg - container_name: gotenberg + hostname: gotenberg-client-test-server + container_name: gotenberg-client-test-server network_mode: host restart: unless-stopped diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d166e59..6977b96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,6 +64,11 @@ jobs: docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml up --detach echo "Wait for container to be started" sleep 5 + - + name: Install poppler-utils + run: | + sudo apt-get update + sudo apt-get install --ues --no-install-reccomends poppler-utils - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 @@ -72,7 +77,9 @@ jobs: cache: 'pip' - name: Install Hatch - run: pip install --upgrade hatch + run: | + python3 -m pip install --upgrade pip + pip install --upgrade hatch - name: Run tests run: hatch run cov diff --git a/CHANGELOG.md b/CHANGELOG.md index de445a1..97c805c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- An issue with the sorting of merging PDFs. Expanded testing to cover the merged ordering + ## [0.2.0] - 2023-10-16 ### Added diff --git a/src/gotenberg_client/_base.py b/src/gotenberg_client/_base.py index 41a69e8..4beb317 100644 --- a/src/gotenberg_client/_base.py +++ b/src/gotenberg_client/_base.py @@ -79,23 +79,22 @@ def get_files(self) -> RequestFiles: files = {} for filename in self._file_map: file_path = self._file_map[filename] - # Gotenberg requires these to have the specific name - filepath_name = filename if filename in {"index.html", "header.html", "footer.html"} else file_path.name # Helpful but not necessary to provide the mime type when possible mime_type = guess_mime_type(file_path) if mime_type is not None: files.update( - {filepath_name: (filepath_name, self._stack.enter_context(file_path.open("rb")), mime_type)}, + {filename: (filename, self._stack.enter_context(file_path.open("rb")), mime_type)}, ) else: # pragma: no cover - files.update({filepath_name: (filepath_name, self._stack.enter_context(file_path.open("rb")))}) # type: ignore + files.update({filename: (filename, self._stack.enter_context(file_path.open("rb")))}) # type: ignore return files def _add_file_map(self, filepath: Path, name: Optional[str] = None) -> None: """ Small helper to handle bookkeeping of files for later opening. The name is optional to support those things which are required to have a certain name + generally for ordering or just to be found at all """ if name is None: name = filepath.name diff --git a/src/gotenberg_client/_merge.py b/src/gotenberg_client/_merge.py index 52848a1..70ff94e 100644 --- a/src/gotenberg_client/_merge.py +++ b/src/gotenberg_client/_merge.py @@ -4,6 +4,8 @@ from pathlib import Path from typing import List +from httpx import Client + from gotenberg_client._base import BaseApi from gotenberg_client._base import BaseRoute @@ -13,15 +15,20 @@ class MergeRoute(BaseRoute): Handles the merging of a given set of files """ + def __init__(self, client: Client, api_route: str) -> None: + super().__init__(client, api_route) + self._next = 1 + def merge(self, files: List[Path]) -> "MergeRoute": """ Adds the given files into the file mapping. This method will maintain the ordering of the list. Calling this method multiple times may not merge in the expected ordering """ - for idx, filepath in enumerate(files): + for filepath in files: # Include index to enforce ordering - self._add_file_map(filepath, f"{idx}_{filepath.name}") + self._add_file_map(filepath, f"{self._next}_{filepath.name}") + self._next += 1 return self diff --git a/src/gotenberg_client/_types_compat.py b/src/gotenberg_client/_types_compat.py index 91bfbc1..8ccf339 100644 --- a/src/gotenberg_client/_types_compat.py +++ b/src/gotenberg_client/_types_compat.py @@ -4,7 +4,7 @@ import sys -if sys.version_info >= (3, 11): +if sys.version_info >= (3, 11): # pragma: no cover from typing import Self -else: +else: # pragma: no cover from typing_extensions import Self # noqa: F401 diff --git a/src/gotenberg_client/_utils.py b/src/gotenberg_client/_utils.py index eec25ff..5c80146 100644 --- a/src/gotenberg_client/_utils.py +++ b/src/gotenberg_client/_utils.py @@ -19,7 +19,7 @@ def optional_to_form(value: Optional[Union[bool, int, float, str]], name: str) - return {name: str(value).lower()} -def guess_mime_type_stdlib(url: Path) -> Optional[str]: +def guess_mime_type_stdlib(url: Path) -> Optional[str]: # pragma: no cover """ Uses the standard library to guess a mimetype """ diff --git a/tests/samples/a_merge_second.pdf b/tests/samples/a_merge_second.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2955c8d5d133267d97e5279dc7a30886fdd95fb9 GIT binary patch literal 6723 zcmb7}2{@GB+s74!K`47fcWsN+zW7!)dY6r!XA z!4n;5tHXbf!q)>2NUPzfI5!djTntCRQvd{LVEo$znZG7z_)xV>sNjMSX%iKIO7^1N z=mStGC>$bfj{{&*P%sOX%~l0#0yyH_D0CYf#TAe?AQADu;z4e?w+5G|Qt@OWAgzw~ zcDBbGYpL4)2paWQ476Bj5tH#m>Q>0oY9t~R>{0+(dJ59UcnZmjY!BuHgKmzf5Q!x4 zIgGXy#t>{(p*aaEFyinmI)4^6`Ef7pOpX|(@ZtNyy#Y%XaN1w z$kOnv3*Mgo05tklP*wM42w=A{aC%x)>%3L#8F5JWOY2np6u)a9=^?@8{pi*^KnvN1-y=H!KEWu@S7I0~E!0d=%FC<#Y^&Jh?{83Z`0hV#(II}@D#!`Ot1cQ*%Q z5Kg$9y zo|k5_FzRNWxLG9^DG`Q{E0Ab?FnRx5Y4()dRHLkX?EAaXlOxdg(XFwQCu-CEXVwK- zohKO&rVdIk`L=#mn5p~}Bz%<%9y2A(f1zyUZlbQTW-xTA8>l@5eCR)5#XT5R&=N)onW~9lqKOLjpo=iA3)A)@G~+E9)fQ+w64nWbGI} zbWq=BrE?FDY+Y<{nd?;J#h3$jpFdCc`rHX_-!sQ%Q3RRqPvuQ?;1diAZ&i5sITYzW zKb-ykU;fkMLgL3tzGumN79H>GADEfFeO0Q^!ygqQ70a+Y=kkk?+Pg0~GjzBFsP}uC zm&qC(WbHTO7XN;>@trJG9;$r5bmDG#W!{Rab(QbCXBS=j1jlCm_6r`RRxJ*0%r^Od zrc^l*gL=o!*mye%zmWPK4-8rVd!D!T^t!p&*cl;WW0PW_X~qQX3N)Ud_?G+L<_Dy@!S|--SDI7wUl3wjS+$Q3`qRH&kM*W zSi%NH(iA?p-K5q|*Bl6Du(>+LR85J-9FFU0BPE;hcI&)#S;;wO;(4j~*6zgmM_5EE zY$!7Qs#s!7`Pwy$kE14Xabr(*j7yJ*nuc1CGbe(R(KWOtJuq9kfAaj zJ$`XV#f0i9lY!34BV`u{J(i)8l9TaMr%%gyeESk_yykMRAGh@2fO%4)@jk18;l-W< z1B0E5Z{Xp}OxlmQ7hmcbGm!a&w|_K#@gnE!sl4F}U978p7E8#N==K5sqJTwIV2gXH zq?`MK;5Ugmg_xqTG4{ZH19CxnIv<)PcQ#c#s2kcbu@>qlZ}duZCHi6LfudX|ZliSh zR4nJ!C6nXR0o%UsuHW-%>`lQ_1`gAKimy^V59b#WE-7B}w^R0==R0>2?cQ6iW9Q*} z)oN12Xl7?)JO6-9PrXr;w4-_*TIcE4(=`dc-+gYbE2;rG7rKoXQ`z@~A&A+?t_4imFpJb_13Cp9k?<4B3St!*}z4qVj z=jck)n)t`u9A;_S%c4iZyp1SGLMSDOy1JOf&z6Jzr08eol6Qaz{;N@Z7`Ji4?-qMh@b+?12KS z%E9S!`L;&gu8m~yFd#Vd3FX4%5$ zqiLkZ6wNri{&7klty3l?mbb;5HlgCmZf19oOXjYDEa#Kp>Lk+4b>zr$W-kHQ`g_iv~Wn-tR zy1FN3<_m>vD$fq;^BTHtEH1{5c5*e%hfow0e?`MpIL9;C!Rfkn2$OLeuK1MXt~EH@n5$K2=ZL4ZRp-Q_<=qQb zyLHEt&^C`LU5y(hwQGIKZ6-|BS9fRk?Qku5c1iXAliSDjEtd9+Xk#}B@EeC1jatvKWO*!nUiHJINJNA3xWvO7N-P=Zv6^u_ z_L?`AgcZW~AillqRp0;Q=JsA>Fgl)ZI67x%PyYq{_G$x_p3<9AD%F059SH3sE0!G` zA4F3QOt+N3d{O^i>`2;6^NdLeR^YAiKyWCEcbM%C*e0fOU*lQqR{@whz;*B$N% zgb0YL-gIE*7ZrYExKmqT_OkICw#xJDme;O~e&EerIn%|`JbpyQ35j!!WXdEkmrZ8v zjVR}Ks?T)42;Vz^zXfqg^$28azEs;N+q`SH^3)gdnAf%lyB^`_??8aVo1!;SxZ^BC zDwd@j>D!Wf*9p^}^Z&4W6Y(SMSOdcb%Zl9t$KxZX0~yW#r+`6b@tJ6>fl( zY)fx?XQID4Y=w0h?x;>P%~TH)-&^}|yZMWE?ab~ur7YTewx6<*?>}?%U*~w{=+axOfepHXr7C_L}>PFq=_ih`RZ{PI374 z1)dt^9R#$@AL=Vg?7ge>y**8BFwt-zp zuk%~Z2Ug?GH?ytfGd@#CSJh@gK zXKdjdv|A;xsi#R1xKMTZy*QtI37=CegO>(pUcq&-x%E3qD({L`7)!;qGPr7z05 zkPo(li=aJRpc_>il)3MzPgbu+O{&$@WwyeJ^2{j){XMIXQmT)I`~O3*NWJhjCBoCU za%olUe(U{Xf$WMzmb|qqXL;(8D>)@O8ZU?R1Z4Yma_2FY#P~&K?27JF@+k^6Kk!_I z?}qlSW=b=(R;;eYDNZW-atoAnuujV$`(S!un;oZObVam#7rFMEdFG2QIpB^}m_VR* zImbjVbmpQRXMp^3g^Fjp6elxVCJYpWWNXYVU%W5TeeM)n#<+?%d*)y&^&uCSQ!O|+ zhQ9pTFLPSek3ARzxIFVKh$>~h0>7RdyAQ9~zAI9p=j@}&VSkS1IpGbztJkyLHTYA` z+-Mp(!4zGp@v`v~Y@Hg#?RY5s6{Ct%v&&c>d)y(jnQm^p@N=K5#Jnhlh{Pq4>%o8*pr)yur9OSFm#SQv5EYAMHE zwyupW+*P*sfFxgqn-<2n(mP8`&cj?$XUatJ7VD$#%eE!tcH%yDwPs+`A^$ zd+0_K{IP+h+}^C{Q}?vepf*a0rDtE5HtG=TxR#Cf?CN+FJ=hj&_emw#{_#7t{U!uq z<>!(f`4W#|`_W0~vZf22Dn?cM6N{Us} z-H-XOW{;pqJ)Nd-u~&JPoHf2oA!@S?Nm3ha^H5mCWQN62C$YwY$JT<0N~hz+T9QPQ zR{b0d_E3zxYYj0GZ)7~&54RC?V6*mk%kk+3@i`xuo{yu_-OEBl&v`xnx$$ngR{V*R zb-WC+_f$^8!DB(wc3j4X_SXvXos3)ZxTSYzTUyJ4UH$D?5xyen(X=a~CmF85{m19( zgOclS@wbg;>aZ_lFEPESy7D14iMKu{9xL>IJ65m`(NBs6t{dhny}=#lTNHfmm4&^z zaz9c*<@sKNn+5aFP2)@#btlHTc|4_7^qGU%c#`J})elccefXCrCF!eT!fLF;o|&b{ zU6YYL%uzcko;r!g)SX`@!o{n*IJ+XF+MD9Ik-0@}{b}~;0e9HXTuAyVFAClHIze=C zkCg4h9>v#Hx2UHQy;9oyzf{e1?$gvCz1!uTT9RP-5t{o^u=enPe?qQL(s(_lvz0Tuil$7^f!q8tK`~Iax!qx zs_1h0s$Pf|POO#VPIJ58LJgDCxr2c-s9w@Bg{1BhyCZAE#s2E+ff@~djL8{u0?-`C zBj3J7=3Z|Qk98r))$9oO4F8_ar}o&Ugu7TI@!BFWopSm1>yHj%rV}mDs85QY;86sr8|S(9p3CEMNghq$RXLX|z6947(_;*j+3y7WkDxUU@YJa*k&*^4o9KEMKX>Ef=&e*mQ(^1G{K8r zg#UOB-<);U@lW%uS@eeP0|xp4$uQ{38f?)BdKZP~%qMuy%%G4uvWivbFi z*&LKX(R=949tuwHq0~1Aw|Z!L4-Pt`v&bI-g4;+h^w09xdy#1*m%bbHqS1=sOvL}( zFb|RkZS(0L+Si|mk4B|7$<_a&Z(4;xDE+?)TZ0G|lqTArqLbAAf!-W&L@MntPylNf z9npf|&XiWYh0p93Z*hKLVXOCJpEd|KZ$v`(R8WQ2`o=rbAc?6-TDhw+RK81rhI&BN~R9 GkpBUk(v literal 0 HcmV?d00001 diff --git a/tests/samples/sample1.pdf b/tests/samples/sample1.pdf old mode 100755 new mode 100644 diff --git a/tests/samples/z_first_merge.pdf b/tests/samples/z_first_merge.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4f74613f97a8cf297ce4c453e346b34c54a5364c GIT binary patch literal 6714 zcmb7}2{@Ep`@k&~O`*t69$C`DEM|tW4`YzXE<_E+G?p2Sv4&KNl9VOc${Jr7E2Zi3!Q}wzT8=eEYABNP#qePK&R3GIq=?uMs@afB7zT~ zs3|A}u%K8JSm4Ze^bcV&m4YMCL3=nA1Pp~UVt()1gII&i|S2t0%L;1)@QI33Kd+3GfH6w zff^B=$pmexA7IB=M`2J3Fy;j)BM}?7_P=FnCNgY!(ZMG4 zfcPb+L=Qvifn8y9PtB7BTk7P+ah*LqOPzw@y5~<^f4?fJRKfXrAZG-#@^FqX_V^%I z*jE3Up_heaKETa=I1%NigLByBnJ(9+=fw|$Lqk|q-o0Kq&hCCSR(T0J(#jpjF(eWz zvdT{6z4Kb{=2y;HzbgX%+H0E{@{Z*@N?n#tls+B3OB!>n@mgba=Hm~YzN(~5OX<6c z&lH5oi(iq#WO?1pqxx^+p?wcI`2Cit%-06;C6M}U!G z9MQ{(M)m|x-g?H32_E3#Fg3!0*8zQklbgI5k>u@8U@W^6NM2yQ)&;eh9VCxLqZE`8 zFh$V3JOXSbBpe1up#d}+u7F0sVHiaKql{5dLZTVB_7JE6cyWz!@j%_fyVX|FcEl;IUCcGF;ne8TfkhLI1Cwd17R$>dzfi}Fppz#)BUtOdOcYF)L|HB(-0-%nAs z5`Wb3zN5TAwWN@;b~3wcx--99QzfNiimsy4LC;o9DYpwhJzM$twDgvu{2BcR$7dJR zf+q&No~~-kNjh)o(=+2F&)?jCNb|t*mVn)1&t51MmH58BDp-{koU&u^QgB^w@4~f` zlxw9KouuiotHKc|DE6zz$>DB^E3-$}ZeOWLIPhMIn&%&gIU@kt1Ir zT|pXQ$!j-E&uXjXYtiK;?~DHP_^51lO`+bjx&zR^@eCxiWNonoUHl1=s0!QO5UOgVq zORq=TTUlYP&en$4N>t$t$*21zGX7D^RI*x6`%k<`aG!n*`kjNJPBFR7Bq90DEJfER12y_1kBVnrjk;162vZ2FJ7R ze2kR1b7Q2%x(y|2&+M^1Ze12Y?cUbW{BYz#3ADy@JR&d8 z(y|Mms9O;=*0oaSkyYwgnO#tg^zGPV-B3I1?eo04{8Fu>opVcD>#drPx#pjJ&u-!w z7V7O_nP#cK8*;&t2R40RY5TE}ttYZ~t-0x_vM;LzVSMUZAFQUA=P%hu&T-mTD}2h8 z;(MfLZK1YRD5fN&$t4w;L{KXXX-!JWGVJE(XWhjghb|Bv%K)nDQ~W5@M;47Yq<#r{PO9}?pm=JotW>_NeW_D;TXi{N@VTz& z=REk>8P$4sPkuRHwzmma6mlYiy-_mA`!;WGf0?Pe8{g4*kR*EyXkaa}Vm~@fDGtv- z9btF1^fbMEVX_m$s`BK9TgGX*LB)Wj67`IX3~QbT$;Q2BA0SsQ9$oWAuJRzjm63;G_C}p+qW6q49^8n=>K)S^Sev0z^q+E$5_)j==XEsJ<9P zuh!IiG(mP)O?8`XU1(mKUX{AlbNMaCrVrD8z*hZ{s>hA$&BBwD!rML`EblOm@E|w@ zY{?Ry^xsApdn{=`-gsQ~Mx04WS$+nzvm%Sye$gjlX*o;glSq7S&(PRCn{64T&V8?3 ztz!m9U2WZ4X1q8Dv#Lryq(C{CLpSFEBx>t7g#cFL~ zUFv@pv!mZHCu$B%(Bk6EY;9+0RJQgl4~r$QZ5q4DQN{O(_ds7j(DmIy8CjkgjuD2e z`6Kr+pH{=pxSaVIlK-)kg1=uvPSfz%qMm|wN+^L_7bankkgNW|^N3GzHEP@v>Qei% zEiA*!`+AK}Z==JL^QzO+Xfs<&3p-83eZfv;f!tmZ>=voJF4s9q%h+~|yO$=!c|>Vb zlyfhLMjPBTC-?=fS)mD3`!YiHbOK`95{Gir>P&wbFDiKv`V=KRsKA$-c#M*9sc1o{Epd8cC}3pv#qr{X<#Ej- zH_i{KxV^p)6H8AVeYL~PY;Cg4DN~|tpE8kzg=??!#)hTskRzpuh0-ho z>sX0Ou|TYo37+sINo%iYx;C`$$k4gG5tS!dT#Fhj(yN8d_k_CwV;v31x*Y*#>^JzL z2N!(>{e?SD+`I5V@1?=h>0Plkz?4Y%TZ!-h$Xy^_*WPfv`>^Se3;I^FQ!0q#-N2M$ zi?&gG&Qob(;$ZqMtSd5GH%Q==TfwCjd;jKLNjXnttF~*jm}*NO_KB9{1%fhf<0Q|> zNveBViD=z;>%7@!D~ji_$VH5LBTv4HI~T9S;`>k^RB3tXxgxBVp3W3h#N%8UQG>axf&;soR)@;S@DeV7SD^^Z=TPv9yl4|8w2u5pdb z@=E7a@gqZr%@&y`%l$!F8aixQK4Su``?R$hSVV*_$mX`l`W1vkZB}#HY*Jj$T^kjf zfoIR*kMZWuyIh|w_-cE}j+1;WwqhwWMbGs2@64b*e|fP?($DrRPi}Afm-{{3eg!+0 zw&WFt*4vhSa5mF3eMTD*8s@GqG3hB)!* zA8R9O{|Y}3q`@!T{p`?g+`*9ABzNDy;lc4HxXU0*fe`M9lX_sB#HwC&-quf+qJdR! zT(TQTDk-j`m75=wu@5FKiC}q@IK|F~dswtQ{B|sj58qf>x!rmYmSzHN z1CX7$t@-|6->78#!(GYI6%nW067-M6qi=ied`^o!U(az=ZLZ0*d2eV}4u@3{XBS5h zu{gd6rP^iam-kjoTMMsC24yHDB4mn($+u~YkO~T*Z_20$z&t+MS(qWbsfx0MO>wUqz+SOby+tq=P zs)%+|Ss!PfPO&Xhyxf;^6ZS5wX7^xU7kptak&CAb)!aFJVQ(%+TE~OMHjlbC+cpK4 z`<2_5xLVA7GY$s2;L3(&c;sVd2-jJ~0pE^&T6Vx5U zNQGN@9!lD@`%QrUC%1N3*~D)>u)Ui_(S--cBvjm zbJLca6|8Thvb!KpT~1O%cjNm*JbGvkzqzC~^{4>ZHeq7w`eooBbJ)`|9JBdl)` zw&Y^dp%NQA*$8ut6*f}UA8Qtv9%LDwK5>g<**c@+*ejRkY23|~3f!?Fr_d1lWwnb( zs)Pt=$B&hxQsg-987{W3R>GP-_zISZNd3Cq_&!(kqe!bUMK_cEcci*|BIR`tQ1+?1 zHkc66ITr%>VgrP$tO98^HwUF%UEd3RbU&6TGifb<%LiAm*=|6@$Gt2q|K-VqBI1Fm z>n&yM>Ro3`?wLA8y_fWUS7US%+p3C*5{Q#dmMF=$6===nGJn6*SXX2iIG7@r#%fxK zKYa7p?t~KOCS02trCIs|z{}k@6f)F#*bfCY9;$#J!G2A9Je63)Bl^CHSB7^bH3~9X0M=^ zvcrxyR(??u|0aKp+E&4vmU-ayu$i1g^XJ2_Rih~bbCPj+4UQ7}0x!8U&Ps(W1mK3f~dd{zIvwE|)v%uc*v&vQ(?KKtIzDnI$z*srCMqZ%pSxF7d4y*V%=MIOp; zyxy>7%u`k=npOSNsB6gyS4MJZcNXkU>>%aL z`R^QSM~Mm?;3e;y8^=`W^CGiV(s-#C32Jkg_?>xrx-RvL=D|8!ui0;{7)=Z9e@;hV zqbG2m`Myp2=s*@*IIMkdL-E5P#PXr;BmsXi0U%i;kwMPl56EFb zs7TxgK#$Ozi8L~W1jrdUgBXKM_Xj7`4G;)} zrtH8Gjb=WgG0Z^;u|6oS4@l+!TVF#lHiyBG>yyY0Jv?&~y{?Bwtv}(`2c`94T>+u1 zv5p{rI0%-JndqPCaq^}y04}o{%%m}rL8cIY7R;0C$tXVa#d!M@?lE}OIR<&Rzh-4zdpEz>RgN-ryO6Gk*_Yb~Q-Lad`*WMU)ekay`G_bc|09+ZuXomF*K%-EKD8L2yImZZwaRHQ{b8sXGPXC-k zg9!ECb1*mtM9Y85!4M$d{P&!qB1qW(ltW>Z|K1mk`b!Rm0X_SdE)0nVYx1W(82ax$ zU>Nvcalql2zv4h4kblL2LZZO_`g0F79Pw9dCUx2M;>!>0N5$YAV6h) kaBlqQU`D0V0S1C*;#V&^fktPR35`$!iL#80?h!oXznpN;>Hq)$ literal 0 HcmV?d00001 diff --git a/tests/test_merge.py b/tests/test_merge.py index fc869fc..3239dba 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -1,7 +1,7 @@ import shutil +import subprocess import tempfile from pathlib import Path -from typing import List import pikepdf import pytest @@ -15,18 +15,27 @@ from tests.utils import call_run_with_server_error_handling -@pytest.fixture() -def create_files(): +def extract_text(pdf_path: Path) -> str: """ - Creates 2 files in a temporary directory and cleans them up - after their use + Using pdftotext from poppler, extracts the text of a PDF into a file, + then reads the file contents and returns it """ - temp_dir = Path(tempfile.mkdtemp()) - test_file = SAMPLE_DIR / "sample1.pdf" - other_test_file = temp_dir / "sample2.pdf" - other_test_file.write_bytes(test_file.read_bytes()) - yield [test_file, other_test_file] - shutil.rmtree(temp_dir, ignore_errors=True) + with tempfile.NamedTemporaryFile( + mode="w+", + ) as tmp: + subprocess.run( + [ # noqa: S603 + shutil.which("pdftotext"), + "-q", + "-layout", + "-enc", + "UTF-8", + str(pdf_path), + tmp.name, + ], + check=True, + ) + return tmp.read() class TestMergePdfs: @@ -37,12 +46,15 @@ class TestMergePdfs: def test_merge_files_pdf_a( self, client: GotenbergClient, - create_files: List[Path], gt_format: PdfAFormat, pike_format: str, ): with client.merge.merge() as route: - resp = call_run_with_server_error_handling(route.merge(create_files).pdf_format(gt_format)) + resp = call_run_with_server_error_handling( + route.merge([SAMPLE_DIR / "z_first_merge.pdf", SAMPLE_DIR / "a_merge_second.pdf"]).pdf_format( + gt_format, + ), + ) assert resp.status_code == codes.OK assert "Content-Type" in resp.headers @@ -58,14 +70,31 @@ def test_merge_files_pdf_a( if SAVE_OUTPUTS: (SAVE_DIR / f"test_libre_office_convert_xlsx_format_{pike_format}.pdf").write_bytes(resp.content) - def test_pdf_a_multiple_file( + def test_merge_multiple_file( self, client: GotenbergClient, - create_files: List[Path], ): - with client.merge.merge() as route: - resp = call_run_with_server_error_handling(route.merge(create_files)) + if shutil.which("pdftotext") is None: + pytest.skip("No pdftotext executable found") + else: + with client.merge.merge() as route: + # By default, these would not merge correctly + route.merge([SAMPLE_DIR / "z_first_merge.pdf", SAMPLE_DIR / "a_merge_second.pdf"]) + resp = call_run_with_server_error_handling(route) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + with tempfile.NamedTemporaryFile(mode="wb") as tmp: + tmp.write(resp.content) + + text = extract_text(tmp.name) + lines = text.split("\n") + # Extra is empty line + assert len(lines) == 3 + assert "first PDF to be merged." in lines[0] + assert "second PDF to be merged." in lines[1] - assert resp.status_code == codes.OK - assert "Content-Type" in resp.headers - assert resp.headers["Content-Type"] == "application/pdf" + if SAVE_OUTPUTS: + (SAVE_DIR / "test_pdf_a_multiple_file.pdf").write_bytes(resp.content) diff --git a/tests/utils.py b/tests/utils.py index 0db432b..29f97d7 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -33,16 +33,16 @@ def call_run_with_server_error_handling(route: BaseRoute) -> Response: one attempt to parse. This will wait the following: - - Attempt 1 - 20s following failure - - Attempt 2 - 40s following failure - - Attempt 3 - 80s following failure - - Attempt 4 - 160s - - Attempt 5 - 320s + - Attempt 1 - 5s following failure + - Attempt 2 - 10s following failure + - Attempt 3 - 20s following failure + - Attempt 4 - 40s following failure + - Attempt 5 - 80s following failure """ result = None succeeded = False - retry_time = 20.0 + retry_time = 5.0 retry_count = 0 max_retry_count = 5