From aebaa8748c733d8493210f5106b0efd9b7adab42 Mon Sep 17 00:00:00 2001 From: Thomas Steen Rasmussen Date: Sun, 10 Nov 2024 09:23:23 +0100 Subject: [PATCH] refactor a bit, introduce setuptools-scm, liniting, preparing to publish package --- .github/workflows/pypi.yml | 31 ++++++++ .pre-commit-config.yaml | 19 +++++ pyproject.toml | 11 ++- src/.bma_client.py.swp | Bin 24576 -> 0 bytes src/bma_client.py | 145 ++++++++++++++++++++++++------------- 5 files changed, 152 insertions(+), 54 deletions(-) create mode 100644 .github/workflows/pypi.yml create mode 100644 .pre-commit-config.yaml delete mode 100644 src/.bma_client.py.swp diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000..d8deb26 --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,31 @@ +--- +name: "Publish PyPi package when a new tag is pushed" + +on: # yamllint disable-line rule:truthy + push: + tags: + - 'v*' + +# https://docs.pypi.org/trusted-publishers/using-a-publisher/ +jobs: + pypi-publish: + name: "upload release to PyPI" + runs-on: "ubuntu-latest" + environment: "pypi-publish" + permissions: + id-token: "write" + steps: + # https://github.com/pypa/sampleproject/blob/main/.github/workflows/release.yml + - name: "Checkout" + uses: "actions/checkout@v3" + - name: "Set up Python" + uses: "actions/setup-python@v4" + with: + python-version: '3.11' + - name: "Install build dependencies" + run: "python -m pip install -U setuptools wheel build" + - name: "Build" + run: "python -m build ." + - name: "Publish package distributions to PyPI" + uses: "pypa/gh-action-pypi-publish@release/v1" +... diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..889b153 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +--- +default_language_version: + python: "python3" +repos: + - repo: "https://github.com/astral-sh/ruff-pre-commit" + rev: "v0.7.3" + hooks: + - id: "ruff" + args: ["--fix"] + - id: "ruff-format" + - repo: "https://github.com/pre-commit/mirrors-mypy" + rev: 'v1.13.0' + hooks: + - id: "mypy" + additional_dependencies: + - "exifread" + - "httpx" + - "pillow" +... diff --git a/pyproject.toml b/pyproject.toml index 9b38b9d..43dea79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,8 @@ [build-system] -requires = ["setuptools"] +requires = ["setuptools>=64", "setuptools_scm>=8"] build-backend = "setuptools.build_meta" + [project] authors = [ {email = "thomas@gibfest.dk"}, @@ -16,11 +17,11 @@ dependencies = [ "httpx==0.27.2", "pillow==11.0.0", ] -description = "BornHack Media Archive Python Client Library" name = "bma-client" -version = "0.1" +description = "BornHack Media Archive Python Client Library" readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.11" +dynamic = ["version"] [project.optional-dependencies] dev = [ @@ -30,6 +31,8 @@ dev = [ [project.urls] homepage = "https://github.com/bornhack/bma-client-python" +[tool.setuptools_scm] + [tool.setuptools] package-dir = {"" = "src"} diff --git a/src/.bma_client.py.swp b/src/.bma_client.py.swp deleted file mode 100644 index 2ee11f4016aa33621eed06034b2f31c0f105cfcb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24576 zcmeI3e~cu@RmVGtLkNz8aZr#9B28~3&xXAo-Ge6j)BUgw>t)ot)3ejw z+39h2&+V<(OJu+{Awt1KLW&drL4<-W3y>_~NU>ta!RCkm2m)JzK$H-agh-&sV1lrf z2tMys^{?5zy>k>JA-Z+notf_Hs#o87_3FJ>b<5Y@y6{2w{r&-m>q^IYx!YQO@xe3B zn_qOCaT+Fa`(oE}oDBQH%G$necy6z0el|(7PuWugR8;!-U2t^ zaj*tv!FPVhaXt$k0QZCUf>%g-d>Q;6_;qj$w1E%)k%Y>}!Ow%s!3!i%o&=u-V{i-j z7m_l63cdh70#?8bkYtUF*Vlk+uW>SP=iD?L^!()@4LkDtXWeMnD@sH?cNk~xD9HML zcs@$Av^70jJeAvd1^T@$FUN~sFB*iyU_JDfin8(`FS{CM9Vyb0n=Sn|9!K4|;@fob zn&)|kSC566yB=miH^}I0oY*ecdRiy%BHisq zE7@Y2B{MF6mUij4?P(#zAYNS!6TcfSk5^kg@08LL4OiVQMzoa^GrjHJ884?^DSZ9} znqChsrM31Je?@PyFxkE&Dpg@99d_LTy6eV0H|vKgI$xsHm9s@|KSr zEiGH6Dx04bI_au+!ysCZvbk$LRW4~{u}=AJoLgNhIM)f7hFJ#B=^g|wdU>JByxLQx zT&~IKT~|^iHm|nr%@jiIrtrl>W}c`jlaT7I+&V4c^IV9w*=Yr{LdOnLO|ee?uJWr4 zmgMdBP5Bi?=Bntj_R!2r*z0H6=)8YOkBPN{an_&H=kl#o*Wwm099$0$hCvc0^Au@i z@me^vuc%UV^iKJzogo`RGGx4%BdZw5VYp#+$P}M-w@ee2D$v=2z#R{fLni&AXEf2& zQW@q8wwi=J=3c+vZ0+Wbc56?IhHG~#EvqdmXLZqqiz?oBsZ6fDywxNaW}R$v6ne9+ zS8sBc;`IW|I$5e}H+Ol-XhJ9I(uBSc%h=l^g50jZBHf$YDtaY?>PwXc(QC(TRf5U+ zVN`7tr&+70fT?ksse$btkElB|wN*zIt0M+=XJ2a5#Jt_9zF1C^TjF6jIRRu{n4DJ= z3YV@$qYle>uagC8X-*$sIN3RJ`@+%AnfYUhrOIXC}W7rLf2NMHM1FFK(v z6rQ+K6qm6M&V_kpM!Ff7kti1wJ zDstZEVqFr=OlRBe^{^WSety>0@2*uWyo~Iudy>CpMKxNqw({8o3lmReWKCId$2z!n zQP%0SjHg9M&#>^W2hmU$jYDdk;UY*=)q4FVme@Q&w+oUtqj1oL;=9ISniY5LZ@(V~ z-7rZPJu%*K65Wlq#6xchih7(&&j>TDa2QAkFn`EJ*>W%MZnm^b9wifeOH?MQDB zZ&-)G3V|iN5x!3u5sfQQyE8deO=ELLUy3dv&`o2!6wwlKQ;e?XI#Od*$u+ z)k)umb|n;I+;qHbD5i(Og?l|Q7?owwm^!Ho|KD-yh6Az}y_-%RId-N$USA#t(ZHK= zy)z3(kJfG+Eu(@dbYXLuKK4UQMcR+YjPXjmZoPYB$Ak&>iHtI+gVDV+7_P>?h~q}< zqd3Xj3ip+QQJgH+18!*` zU}ho4#?i)4F5H|~y09LshBK~yoEW8!l|^aFe~6_`&`%6~=Zy|0&@M5x(b% zgJ(FvfnH96WK-vZbmrw5r;nyR>5)b zZg4qxl^Fa*@H}`DJPh*q{7SyROg#Pq=!28s1h@ixftdUma5rd!?-7ST1O5a&2yO!h z!PVeDh{>M>p8^knUj~Q47vbqqa38oG2yedtn(O~51G4_eWy7S^Bp#2#uA}$tAA4_6 zKbIEQ!cAGz2!2}Ug26Z}{iXb+`U{lZ+OAcI`O_kr&|w|6<;!f2RAdZd5&j<&!H8wF z?6!M6+~fzMY|psj!@FB5i{FzaTD^K6kS8$7f>PFLZfi;VLNy|hR;dwr`MbDpf0;3o zC))6g`yNvQuhmaSgD7ixZp8zXf>XA0%bV`P;iHb+s$MJckd>MpA&fRt>RJXTtbFPaZ!O}99(5k2VVk1p-xtC(uQS|>I#U1CYe27 zMH1&s;xBp8`i43R#=X8^W$NBWR3cfJf?}zDDsMp7x;!?M6-gzWVPOi@zYtww2}zlU zMFl&x;S!`(QFfQE=!kYOU=di!I$FLn9Iwmz8zWN-dU;E%wA;2t13fNQ{8z&D8d{}wz8J_Wi! zasgL?%fPpY{r?&KCb%8E8+@6#|M$T*m;=+G1!S-Oqu?%Z7W^c5mA(0|gACjWB5)n} z2_PK*q44~DUznxcE;GQDl}2>4ld(a==64#JMQ2(}+6pc(DKt00;Ey(qJCUeHG2e%A zvL3K8R|#J)U8^VbvW0aR;;Dw+>4{lcP8sP+&pSbMqaF|`Ut&VE8*e3L1ElnNYf9^; z@i-ywxK#Zw)5Nuz0rMSG^Z|Q^c!8BlTJe zp>WS`#82Ufgb~jy&l1z^@Xz{MuA}8TWjS%!N>6Q8L!$Edz>GFriD@$2yl`#Pf+|&aXl-CqTP`g_VUnFpa=QwFHs zHUoJ$R{ualVqFt$SH)?tUM!__=q32H(jmm_F?+3oB{ruA#L@S{jE1EUulkCr3DhjeJiI&H@l{M>De$~tFh(e{M3r3>yP;-yI-sH?*US4vuMPathhQ_w! zn~mo5qNKM|Z)wrfw)p02KC_m*Eh<4nDts^QpIs^%o07h3*MgxL#xf=^(<)NYQm2O`btkeec^in3 zFK`z;2`Yl^x} z#6o0IqHJvUt|dvcRq2hazd4NaWrMMaAcsWSI^%_*nnvzJ`OKdY9ccmQl8j);9S2)G~Y12f=N;{C6JuYh~O zZD2q6B5}UN{vQDe2!O=?`+)~815Xq4|2EhFzXWE%UlHej20Q^WFb!JZYsB}z3nb=m zfu92ZMSTAhcof_V7J&!e3cgK@{~UM>ycb*v{uQ2{2Ex}L2xraJFz~;?KyLkP>}WTe zo&2_Z+I!70gYl?2X3%Yp8L)6R#|+H>{b`OF4BP7dmRsq~F$0@?Y>pXJ{+nEL%%E^7 z_S(FdjGUTd2D list[dict[str, str]]: """Get a filtered list of the jobs this user has access to.""" r = self.client.get(self.base_url + f"/api/v1/json/jobs/{job_filter}").raise_for_status() response = r.json()["bma_response"] - logger.debug(f"Returning {len(response)} jobs") + logger.debug(f"Returning {len(response)} jobs with filter {job_filter}") return response def get_file_info(self, file_uuid: uuid.UUID) -> dict[str, str]: @@ -99,12 +101,12 @@ def get_file_info(self, file_uuid: uuid.UUID) -> dict[str, str]: r = self.client.get(self.base_url + f"/api/v1/json/files/{file_uuid}/").raise_for_status() return r.json()["bma_response"] - def download(self, file_uuid: uuid.UUID) -> bytes: + def download(self, file_uuid: uuid.UUID) -> dict[str, str]: """Download a file from BMA.""" info = self.get_file_info(file_uuid=file_uuid) path = self.path / info["filename"] if not path.exists(): - url = self.base_url + info["links"]["downloads"]["original"] + url = self.base_url + info["links"]["downloads"]["original"] # type: ignore[index] logger.debug(f"Downloading file {url} ...") r = self.client.get(url).raise_for_status() logger.debug(f"Done downloading {len(r.content)} bytes, saving to {path}") @@ -119,39 +121,44 @@ def get_job_assignment(self, file_uuid: uuid.UUID | None = None) -> list[dict[st url += f"?file_uuid={file_uuid}" data = {"client_uuid": self.uuid} try: - r = self.client.post(url, data=json.dumps(data)).raise_for_status() + r = self.client.post(url, json=data).raise_for_status() response = r.json()["bma_response"] except httpx.HTTPStatusError as e: if e.response.status_code == HTTPStatus.NOT_FOUND: response = [] else: raise - logger.debug(f"Returning {len(response)} jobs") + logger.debug(f"Returning {len(response)} assigned jobs") return response def upload_file(self, path: Path, attribution: str, file_license: str) -> dict[str, dict[str, str]]: """Upload a file.""" - # is this an image? - extension = path.suffix[1:] - for extensions in self.settings["filetypes"]["images"].values(): - if extension.lower() in extensions: - # this file has the extension of a supported image - logger.debug(f"Extension {extension} is supported...") + # get mimetype + with path.open("rb") as fh: + mimetype = magic.from_buffer(fh.read(2048), mime=True) + + # find filetype (image, video, audio or document) from mimetype + for filetype in self.settings["filetypes"]: + if mimetype in self.settings["filetypes"][filetype]: break else: - # file type not supported - raise ValueError(f"{path.suffix}") - - # get image dimensions - with Image.open(path) as image: - rotated = ImageOps.exif_transpose(image) # creates a copy with rotation normalised - logger.debug( - f"Image has exif rotation info, using post-rotate size {rotated.size} instead of raw size {image.size}" + # unsupported mimetype + logger.error( + f"Mimetype {mimetype} is not supported by this BMA server. Supported types {self.settings['filetypes']}" ) - width, height = rotated.size - - with path.open("rb") as fh: - mimetype = magic.from_buffer(fh.read(2048), mime=True) + raise ValueError(mimetype) + + if filetype == "image": + # get image dimensions + with Image.open(path) as image: + rotated = ImageOps.exif_transpose(image) # creates a copy with rotation normalised + if rotated is None: + raise ValueError("Rotation") + logger.debug( + f"Image has exif rotation info, using post-rotate size {rotated.size}" + f"instead of raw size {image.size}" + ) + width, height = rotated.size # open file with path.open("rb") as fh: @@ -160,10 +167,15 @@ def upload_file(self, path: Path, attribution: str, file_license: str) -> dict[s data = { "attribution": attribution, "license": file_license, - "width": width, - "height": height, "mimetype": mimetype, } + if filetype == "image": + data.update( + { + "width": width, + "height": height, + } + ) # doit r = self.client.post( self.base_url + "/api/v1/json/files/upload/", @@ -172,18 +184,47 @@ def upload_file(self, path: Path, attribution: str, file_license: str) -> dict[s ) return r.json() - def handle_job(self, job: dict[str, str], orig: Path) -> tuple[Image.Image, Image.Exif]: - """Do the thing and return the result.""" + def handle_job(self, job: dict[str, str], orig: Path) -> None: + """Do the thing and upload the result.""" + result: JobResult + # get the result of the job if job["job_type"] == "ImageConversionJob": - return self.handle_image_conversion_job(job=job, orig=orig) - if job["job_type"] == "ImageExifExtractionJob": - return self.get_exif(orig) - logger.error(f"Unsupported job type {job['job_type']}") - return None + result = self.handle_image_conversion_job(job=job, orig=orig) + filename = job["job_uuid"] + "." + job["filetype"].lower() + elif job["job_type"] == "ImageExifExtractionJob": + result = self.get_exif(fname=orig) + filename = "exif.json" + else: + logger.error(f"Unsupported job type {job['job_type']}") + + self.write_and_upload_result(job=job, result=result, filename=filename) + + def write_and_upload_result(self, job: dict[str, str], result: JobResult, filename: str) -> None: + """Encode and write the job result to a buffer, then upload.""" + with BytesIO() as buf: + if job["job_type"] == "ImageConversionJob": + image, exif = result + if not isinstance(image, Image.Image) or not isinstance(exif, Image.Exif): + raise ValueError("Fuck") + # apply format specific encoding options + kwargs = {} + if job["mimetype"] in self.settings["encoding"]["images"]: + # this format has custom encoding options, like quality/lossless, apply them + kwargs.update(self.settings["encoding"]["images"][job["mimetype"]]) + logger.debug(f"Format {job['mimetype']} has custom encoding settings, kwargs is now: {kwargs}") + else: + logger.debug(f"No custom settings for format {job['mimetype']}") + image.save(buf, format=job["filetype"], exif=exif, **kwargs) + elif job["job_type"] == "ImageExifExtractionJob": + logger.debug(f"Got exif data {result}") + buf.write(json.dumps(result).encode()) + else: + logger.error("Unsupported job type") + raise RuntimeError(job["job_type"]) + self.upload_job_result(job_uuid=uuid.UUID(job["job_uuid"]), buf=buf, filename=filename) - def handle_image_conversion_job(self, job: dict[str, str], orig: Path) -> tuple[Image.Image, Image.Exif]: + def handle_image_conversion_job(self, job: dict[str, str], orig: Path) -> ImageConversionJobResult: """Handle image conversion job.""" - # load original image start = time.time() logger.debug(f"Opening original image {orig}...") image = Image.open(orig) @@ -193,29 +234,33 @@ def handle_image_conversion_job(self, job: dict[str, str], orig: Path) -> tuple[ logger.debug("Rotating image (if needed)...") start = time.time() - image = ImageOps.exif_transpose(image) # creates a copy with rotation normalised + ImageOps.exif_transpose(image, in_place=True) # creates a copy with rotation normalised + if image is None: + raise ValueError("NoImage") orig_ar = Fraction(*image.size) - logger.debug(f"Rotating image took {time.time() - start} seconds, image is now {image.size} original AR is {orig_ar}") + logger.debug( + f"Rotating image took {time.time() - start} seconds, image is now {image.size} original AR is {orig_ar}" + ) logger.debug("Getting exif metadata from image...") start = time.time() exif = image.getexif() logger.debug(f"Getting exif data took {time.time() - start} seconds") - size = job["width"], job["height"] + size = int(job["width"]), int(job["height"]) ratio = Fraction(*size) - if job['custom_aspect_ratio']: - orig = "custom" + if job["custom_aspect_ratio"]: + orig_str = "custom" else: - orig = "original" + orig_str = "original" if orig_ar != ratio: - orig += "(ish)" - logger.debug(f"Desired image size is {size}, aspect ratio: {ratio} ({orig}), converting image...") + orig_str += "(ish)" + logger.debug(f"Desired image size is {size}, aspect ratio: {ratio} ({orig_str}), converting image...") start = time.time() # custom AR or not? - if job['custom_aspect_ratio']: - image = ImageOps.fit(image, size) + if job["custom_aspect_ratio"]: + image = ImageOps.fit(image, size) # type: ignore[assignment] else: image.thumbnail(size) logger.debug(f"Converting image size and AR took {time.time() - start} seconds") @@ -243,7 +288,7 @@ def upload_job_result(self, job_uuid: uuid.UUID, buf: "BytesIO", filename: str) logger.debug(f"Done, it took {t} seconds to upload {size} bytes, speed {round(size/t)} bytes/sec") return r.json() - def get_exif(self, fname: Path) -> dict[str, dict[str, str]]: + def get_exif(self, fname: Path) -> ExifExtractionJobResult: """Return a dict with exif data as read by exifread from the file. exifread returns a flat dict of key: value pairs where the key @@ -253,7 +298,7 @@ def get_exif(self, fname: Path) -> dict[str, dict[str, str]]: """ with fname.open("rb") as f: tags = exifread.process_file(f, details=True) - grouped = {} + grouped: dict[str, dict[str, str]] = {} for tag, value in tags.items(): if tag in SKIP_EXIF_TAGS: logger.debug(f"Skipping exif tag {tag}")