Skip to content

Commit

Permalink
unimernet 0.2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
grimoire committed Sep 13, 2024
2 parents 85498a1 + 09fa3bb commit b6eb2a3
Show file tree
Hide file tree
Showing 143 changed files with 10,044 additions and 14,215 deletions.
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ body:
-
- "0.6.x"
- "0.7.x"
- "0.8.x"
validations:
required: true

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/cla.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
path-to-document: 'https://github.com/opendatalab/MinerU/blob/master/MinerU_CLA.md' # e.g. a CLA or a DCO document
# branch should not be protected
branch: 'master'
allowlist: myhloli,dt-yy,Focusshang,renpengli01,icecraft,drunkpig,wangbinDL,qiangqiang199,GDDGCZ518,papayalove,conghui,quyuan
allowlist: myhloli,dt-yy,Focusshang,renpengli01,icecraft,drunkpig,wangbinDL,qiangqiang199,GDDGCZ518,papayalove,conghui,quyuan,LollipopsAndWine

# the followings are the optional inputs - If the optional inputs are not given, then default values will be taken
#remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)
Expand Down
44 changes: 20 additions & 24 deletions .github/workflows/cli.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,24 @@ on:
push:
branches:
- "master"
- "dev"
paths-ignore:
- "cmds/**"
- "**.md"
- "**.yml"
pull_request:
branches:
- "master"
- "dev"
paths-ignore:
- "cmds/**"
- "**.md"
- "**.yml"
workflow_dispatch:
jobs:
cli-test:
runs-on: ubuntu-latest
timeout-minutes: 40
runs-on: pdf
timeout-minutes: 120
strategy:
fail-fast: true

Expand All @@ -28,27 +32,23 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 2

- name: check-requirements
run: |
pip install -r requirements.txt
pip install -r requirements-qa.txt
pip install magic-pdf
- name: test_cli

- name: install
run: |
cp magic-pdf.template.json ~/magic-pdf.json
echo $GITHUB_WORKSPACE
cd $GITHUB_WORKSPACE && export PYTHONPATH=. && pytest -s -v tests/test_unit.py
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli.py
- name: benchmark
echo $GITHUB_WORKSPACE && sh tests/retry_env.sh
- name: unit test
run: |
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && export PYTHONPATH=. && coverage run -m pytest tests/unittest --cov=magic_pdf/ --cov-report term-missing --cov-report html
cd $GITHUB_WORKSPACE && python tests/get_coverage.py
- name: cli test
run: |
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_bench.py
source ~/.bashrc && cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
needs: [cli-test]
runs-on: ubuntu-latest
needs: cli-test
runs-on: pdf
steps:
- name: get_actor
run: |
Expand All @@ -67,9 +67,5 @@ jobs:
- name: notify
run: |
curl ${{ secrets.WEBHOOK_URL }} -H 'Content-Type: application/json' -d '{
"msgtype": "text",
"text": {
"mentioned_list": ["${{ env.METIONS }}"] , "content": "'${{ github.repository }}' GitHubAction Failed!\n 细节请查看:https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"
}
}'
echo ${{ secrets.USER_ID }}
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ tmp/
tmp
.vscode
.vscode/
/tests/
ocr_demo

/app/common/__init__.py
/magic_pdf/config/__init__.py
source.dev.env

tmp
4 changes: 3 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ repos:
rev: 5.0.4
hooks:
- id: flake8
args: ["--max-line-length=120", "--ignore=E131,E125,W503,W504,E203"]
- repo: https://github.com/PyCQA/isort
rev: 5.11.5
hooks:
Expand All @@ -11,6 +12,7 @@ repos:
rev: v0.32.0
hooks:
- id: yapf
args: ["--style={based_on_style: google, column_limit: 120, indent_width: 4}"]
- repo: https://github.com/codespell-project/codespell
rev: v2.2.1
hooks:
Expand Down Expand Up @@ -41,4 +43,4 @@ repos:
rev: v1.3.1
hooks:
- id: docformatter
args: ["--in-place", "--wrap-descriptions", "79"]
args: ["--in-place", "--wrap-descriptions", "119"]
30 changes: 18 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Use the official Ubuntu base image
FROM ubuntu:latest
FROM ubuntu:22.04

# Set environment variables to non-interactive to avoid prompts during installation
ENV DEBIAN_FRONTEND=noninteractive
Expand Down Expand Up @@ -29,17 +29,23 @@ RUN python3 -m venv /opt/mineru_venv

# Activate the virtual environment and install necessary Python packages
RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
pip install --upgrade pip && \
pip install magic-pdf[full-cpu] detectron2 --extra-index-url https://myhloli.github.io/wheels/"

# Copy the configuration file template and set up the model directory
COPY magic-pdf.template.json /root/magic-pdf.json

# Set the models directory in the configuration file (adjust the path as needed)
RUN sed -i 's|/tmp/models|/opt/models|g' /root/magic-pdf.json

# Create the models directory
RUN mkdir -p /opt/models
pip3 install --upgrade pip && \
wget https://gitee.com/myhloli/MinerU/raw/master/requirements-docker.txt && \
pip3 install -r requirements-docker.txt --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple && \
pip3 install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/"

# Copy the configuration file template and install magic-pdf latest
RUN /bin/bash -c "wget https://gitee.com/myhloli/MinerU/raw/master/magic-pdf.template.json && \
cp magic-pdf.template.json /root/magic-pdf.json && \
source /opt/mineru_venv/bin/activate && \
pip3 install -U magic-pdf"

# Download models and update the configuration file
RUN /bin/bash -c "pip3 install modelscope && \
wget https://gitee.com/myhloli/MinerU/raw/master/docs/download_models.py && \
python3 download_models.py && \
sed -i 's|/tmp/models|/root/.cache/modelscope/hub/opendatalab/PDF-Extract-Kit/models|g' /root/magic-pdf.json && \
sed -i 's|cpu|cuda|g' /root/magic-pdf.json"

# Set the entry point to activate the virtual environment and run the command line tool
ENTRYPOINT ["/bin/bash", "-c", "source /opt/mineru_venv/bin/activate && exec \"$@\"", "--"]
1 change: 1 addition & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
Expand Up @@ -659,3 +659,4 @@ specific requirements.
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.

Loading

0 comments on commit b6eb2a3

Please sign in to comment.