diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f1031ef..80cc959 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -43,7 +43,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7] + python-version: [3.11] poetry-version: ['1.2.2'] os: [ubuntu-20.04] runs-on: ${{ matrix.os }} diff --git a/Dockerfile b/Dockerfile index fd11183..1db82b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.7-slim +FROM python:3.11-slim WORKDIR /labelu diff --git a/README.md b/README.md index a4bb078..d05ff88 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ LabelU offers a variety of annotation tools and features, supporting image, vide 2. After the installation is complete, run the following command in the terminal (you can choose the default 'y' for prompts during the process): ```bash -conda create -n labelu python=3.7 +conda create -n labelu python=3.11 ``` > **Note:** For Windows platform, you can run the above command in Anaconda Prompt. @@ -80,8 +80,8 @@ labelu # Download and Install miniconda # https://docs.conda.io/en/latest/miniconda.html -# Create virtual environment(python = 3.7) -conda create -n labelu python=3.7 +# Create virtual environment(python = 3.11) +conda create -n labelu python=3.11 # Activate virtual environment conda activate labelu @@ -143,7 +143,9 @@ Welcome to the OpenDataLab official WeChat group! ## Links -- [LabelU-kit](https://github.com/opendatalab/labelU-Kit) (LabelU is developed using LabelU-kit.) +- [LabelU-kit](https://github.com/opendatalab/labelU-Kit) Web front-end annotation kit (LabelU is based on this JavaScript kit) +- [LabelLLM](https://github.com/opendatalab/LabelLLM) An Open-source LLM Dialogue Annotation Platform +- [Miner U](https://github.com/opendatalab/MinerU) A One-stop Open-source High-quality Data Extraction Tool ## License diff --git a/README_zh-CN.md b/README_zh-CN.md index e10026a..7a92bbd 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -47,7 +47,7 @@ LabelU 提供了多种标注工具和功能,支持图像、视频、音频标 2. 安装完毕后,在终端运行以下命令(过程中的提示选择默认 `y` 即可): ```bash -conda create -n labelu python=3.7 +conda create -n labelu python=3.11 ``` > **注:** Windows 平台可在 Anaconda Prompt 程序中运行以上命令。 @@ -80,8 +80,8 @@ labelu # 安装miniconda # https://docs.conda.io/en/latest/miniconda.html -# 创建虚拟环境(python = 3.7) -conda create -n labelu python=3.7 +# 创建虚拟环境(python = 3.11) +conda create -n labelu python=3.11 # 激活虚拟环境 conda activate labelu @@ -143,7 +143,9 @@ git submodule update --remote --merge ## 友情链接 -- [LabelU-kit](https://github.com/opendatalab/labelU-Kit)(本工具都是通过 LabelU-kit 进行开发) +- [LabelU-kit](https://github.com/opendatalab/labelU-Kit) Web 前端标注套件(LabelU基于此套件开发) +- [LabelLLM](https://github.com/opendatalab/LabelLLM) 开源LLM对话标注平台 +- [Miner U](https://github.com/opendatalab/MinerU) 一站式高质量数据提取工具 ## 开源许可证 diff --git a/labelu/alembic_labelu/versions/bc8fcb35b66b_add_media_and_pre_annotation.py b/labelu/alembic_labelu/versions/bc8fcb35b66b_add_media_and_pre_annotation.py index 94dc625..416c5c0 100644 --- a/labelu/alembic_labelu/versions/bc8fcb35b66b_add_media_and_pre_annotation.py +++ b/labelu/alembic_labelu/versions/bc8fcb35b66b_add_media_and_pre_annotation.py @@ -43,119 +43,127 @@ def upgrade() -> None: Session = sessionmaker(bind=bind) session = Session() - with context.begin_transaction(): - # Create a new table task_pre_annotation - if not alembic_labelu_tools.table_exist("task_pre_annotation"): - op.create_table( - "task_pre_annotation", - sa.Column("id", sa.Integer, primary_key=True, autoincrement=True, index=True), - sa.Column("task_id", sa.Integer, sa.ForeignKey("task.id"), index=True), - sa.Column("file_id", sa.Integer, sa.ForeignKey("task_attachment.id"), index=True), - # json 字符串 - sa.Column("data", sa.Text, comment="task sample pre annotation result"), - sa.Column("created_by", sa.Integer, sa.ForeignKey("user.id"), index=True), - sa.Column("updated_by", sa.Integer, sa.ForeignKey("user.id")), - sa.Column( - "created_at", - sa.DateTime, - default=sa.func.now(), - comment="Time a task sample result was created", - ), - sa.Column( - "updated_at", - sa.DateTime, - default=sa.func.now(), - onupdate=sa.func.now(), - comment="Last time a task sample result was updated", - ), - sa.Column( - "deleted_at", - sa.DateTime, - index=True, - comment="Task delete time", - ), - ) - # Update the task_sample table - if not alembic_labelu_tools.column_exist_in_table( - "task_sample", "file_id" - ): - with op.batch_alter_table('task_sample', recreate="always") as batch_op: - batch_op.add_column( + try: + + with context.begin_transaction(): + # Create a new table task_pre_annotation + if not alembic_labelu_tools.table_exist("task_pre_annotation"): + op.create_table( + "task_pre_annotation", + sa.Column("id", sa.Integer, primary_key=True, autoincrement=True, index=True), + sa.Column("task_id", sa.Integer, sa.ForeignKey("task.id"), index=True), + sa.Column("file_id", sa.Integer, sa.ForeignKey("task_attachment.id"), index=True), + # json 字符串 + sa.Column("data", sa.Text, comment="task sample pre annotation result"), + sa.Column("created_by", sa.Integer, sa.ForeignKey("user.id"), index=True), + sa.Column("updated_by", sa.Integer, sa.ForeignKey("user.id")), sa.Column( - "file_id", - sa.Integer(), - sa.ForeignKey("task_attachment.id", name="fk_file_id"), - index=True, - comment="file id", + "created_at", + sa.DateTime, + default=sa.func.now(), + comment="Time a task sample result was created", ), - ) - - # Update the task_attachment table - if not alembic_labelu_tools.column_exist_in_table("task_attachment", "filename"): - with op.batch_alter_table("task_attachment", recreate="always") as batch_op_task_attachment: - batch_op_task_attachment.add_column( sa.Column( - "filename", - sa.String(256), - comment="file name", + "updated_at", + sa.DateTime, + default=sa.func.now(), + onupdate=sa.func.now(), + comment="Last time a task sample result was updated", ), - ) - batch_op_task_attachment.add_column( sa.Column( - "url", - sa.String(256), - comment="file url", + "deleted_at", + sa.DateTime, + index=True, + comment="Task delete time", ), ) - - # Update existing data in the task_sample table - task_items = session.execute( - 'SELECT id, config FROM task' - ) - - # Update the task_attachment table - attachments = session.execute( - 'SELECT id, path FROM task_attachment' - ) - - for attachment in attachments: - attachment_id = attachment[0] - attachment_path = attachment[1] - filename = os.path.basename(attachment_path) - url = f"{settings.API_V1_STR}/tasks/attachment/{attachment_path}" + # Update the task_sample table + if not alembic_labelu_tools.column_exist_in_table( + "task_sample", "file_id" + ): + with op.batch_alter_table('task_sample', recreate="always") as batch_op: + batch_op.add_column( + sa.Column( + "file_id", + sa.Integer(), + sa.ForeignKey("task_attachment.id", name="fk_file_id"), + index=True, + comment="file id", + ), + ) - if filename: - session.execute( - f"UPDATE task_attachment SET filename='{filename}', url='{url}' WHERE id={attachment_id}" - ) - - for task_item in task_items: - task_id = task_item[0] - task_samples = session.execute( - f"SELECT id, task_attachment_ids FROM task_sample WHERE task_id={task_id}" + # Update the task_attachment table + if not alembic_labelu_tools.column_exist_in_table("task_attachment", "filename"): + with op.batch_alter_table("task_attachment", recreate="always") as batch_op_task_attachment: + batch_op_task_attachment.add_column( + sa.Column( + "filename", + sa.String(256), + comment="file name", + ), + ) + batch_op_task_attachment.add_column( + sa.Column( + "url", + sa.String(256), + comment="file url", + ), + ) + + # Update existing data in the task_sample table + task_items = session.execute( + 'SELECT id, config FROM task' ) - - for task_sample in task_samples: - task_sample_id = task_sample[0] - attachment_ids = json.loads(task_sample[1]) - # attachment_ids 存储的是字符串[id1, id2, id3],需要转换成数组 - file_id = attachment_ids[0] - - if not file_id: - continue - - attachment = session.execute( - f"SELECT id, path FROM task_attachment WHERE id={file_id}" - ) - attachment_path = list(attachment)[0][1] + + # Update the task_attachment table + attachments = session.execute( + 'SELECT id, path FROM task_attachment' + ) + + for attachment in attachments: + attachment_id = attachment[0] + attachment_path = attachment[1] + filename = os.path.basename(attachment_path) + url = f"{settings.API_V1_STR}/tasks/attachment/{attachment_path}" - if attachment_path: - # Update the task_sample table + if filename: session.execute( - f"UPDATE task_sample SET file_id={file_id} WHERE id={task_sample_id}" + f"UPDATE task_attachment SET filename='{filename}', url='{url}' WHERE id={attachment_id}" + ) + + for task_item in task_items: + task_id = task_item[0] + task_samples = session.execute( + f"SELECT id, task_attachment_ids FROM task_sample WHERE task_id={task_id}" + ) + + for task_sample in task_samples: + task_sample_id = task_sample[0] + attachment_ids = json.loads(task_sample[1]) + # attachment_ids 存储的是字符串[id1, id2, id3],需要转换成数组 + file_id = attachment_ids[0] + + if not file_id: + continue + + attachment = session.execute( + f"SELECT id, path FROM task_attachment WHERE id={file_id}" ) - + attachment_path = list(attachment)[0][1] + + if attachment_path: + # Update the task_sample table + session.execute( + f"UPDATE task_sample SET file_id={file_id} WHERE id={task_sample_id}" + ) + + session.commit() + except Exception as e: + session.rollback() + raise e + finally: + session.close() def downgrade() -> None: op.drop_table("task_pre_annotation") diff --git a/labelu/internal/application/service/pre_annotation.py b/labelu/internal/application/service/pre_annotation.py index 7c7310c..26783b1 100644 --- a/labelu/internal/application/service/pre_annotation.py +++ b/labelu/internal/application/service/pre_annotation.py @@ -112,17 +112,25 @@ async def list_by( sorting: Optional[str], current_user: User, ) -> Tuple[List[PreAnnotationResponse], int]: + + pre_annotations = [] + + # 指定 sample_name 查询时,需要对所有的 pre_annotation 进行查询 + # TODO 优化查询逻辑:将上传的jsonl中的每个预标注提取并存储到单独的表(PreAnnotationDetail)中 + if sample_name: + pre_annotations = db.query(TaskPreAnnotation).all() + else: + pre_annotations = crud_pre_annotation.list_by( + db=db, + task_id=task_id, + owner_id=current_user.id, + after=after, + before=before, + pageNo=pageNo, + pageSize=None if sample_name else pageSize, + sorting=sorting, + ) - pre_annotations = crud_pre_annotation.list_by( - db=db, - task_id=task_id, - owner_id=current_user.id, - after=after, - before=before, - pageNo=pageNo, - pageSize=pageSize, - sorting=sorting, - ) total = crud_pre_annotation.count(db=db, task_id=task_id, owner_id=current_user.id) diff --git a/pyproject.toml b/pyproject.toml index ec7306d..048cce2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "labelu" -version = '1.0.5' +version = '1.0.6-alpha.3' description = "" license = "Apache-2.0" authors = ["pengjinhu "] @@ -12,7 +12,7 @@ packages = [{include = "labelu"}] labelu = "labelu.main:cli" [tool.poetry.dependencies] -python = "^3.7" +python = "^3.11" fastapi = "^0.86.0" loguru = "^0.6.0" sqlalchemy = "^1.4.43"