From 396b2f1bf08a09f24b6f17cda2c99556ad1519b9 Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Tue, 23 Apr 2024 11:37:21 -0400 Subject: [PATCH] refactor: simplify parsing input_text in web_api /assemble --- readalongs/web_api.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/readalongs/web_api.py b/readalongs/web_api.py index 8403f33d..9e30bc63 100644 --- a/readalongs/web_api.py +++ b/readalongs/web_api.py @@ -28,7 +28,6 @@ """ -import io import os import tempfile from enum import Enum @@ -48,6 +47,7 @@ from readalongs.text.convert_xml import convert_xml from readalongs.text.make_dict import make_dict_list from readalongs.text.tokenize_xml import tokenize_xml +from readalongs.text.util import parse_xml from readalongs.util import get_langs # Create the app @@ -191,10 +191,7 @@ async def assemble( with capture_logs() as captured_logs: if request.mime_type == InputFormat.RAS: try: - parsed = etree.fromstring( - bytes(request.input_text or "", encoding="utf-8"), - parser=etree.XMLParser(resolve_entities=False), - ) + parsed = parse_xml(request.input_text or "") except etree.ParseError as e: raise HTTPException( status_code=422, detail="XML provided is not well-formed" @@ -208,13 +205,11 @@ async def assemble( ), ) elif request.mime_type == InputFormat.TEXT: - parsed = io.StringIO(request.input_text).readlines() - parsed = etree.fromstring( - bytes( - create_ras_from_text(parsed, text_languages=request.text_languages), - encoding="utf-8", - ), - parser=etree.XMLParser(resolve_entities=False), + parsed = parse_xml( + create_ras_from_text( + (request.input_text or "").splitlines(keepends=True), + text_languages=request.text_languages, + ) ) else: # pragma: no cover @@ -391,10 +386,7 @@ async def convert_alignment( # noqa: C901 Returns: a file in the format requested """ try: - parsed_xml = etree.fromstring( - bytes(request.ras, encoding="utf-8"), - parser=etree.XMLParser(resolve_entities=False), - ) + parsed_xml = parse_xml(request.ras) except etree.XMLSyntaxError as e: raise HTTPException( status_code=422, detail="ReadAlong provided is not well formed"