Skip to content

Commit

Permalink
Merge branch 'master' into frontier
Browse files Browse the repository at this point in the history
  • Loading branch information
binary-husky committed Dec 15, 2023
2 parents c0a36e3 + f4127a9 commit d3f7267
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 7 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/build-with-all-capacity-beta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: build-with-all-capacity-beta

on:
push:
branches:
- 'master'

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_with_all_capacity_beta

jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}

- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+AllCapacityBeta
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
2 changes: 1 addition & 1 deletion crazy_functions/latex_fns/latex_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
if modified_pdf_success:
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍后 ...', chatbot, history) # 刷新Gradio前端界面
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
if os.path.exists(pj(work_folder, '..', 'translation')):
Expand Down
53 changes: 53 additions & 0 deletions docs/GithubAction+AllCapacityBeta
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacity --network=host --build-arg http_proxy=http://localhost:10881 --build-arg https_proxy=http://localhost:10881 .
# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacityBeta --network=host .
# docker run -it --net=host gpt-academic-all-capacity bash

# 从NVIDIA源,从而支持显卡(检查宿主的nvidia-smi中的cuda版本必须>=11.3)
FROM fuqingxu/11.3.1-runtime-ubuntu20.04-with-texlive:latest

# use python3 as the system default python
WORKDIR /gpt
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8

# # 非必要步骤,更换pip源 (以下三行,可以删除)
# RUN echo '[global]' > /etc/pip.conf && \
# echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
# echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf

# 下载pytorch
RUN python3 -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
# 准备pip依赖
RUN python3 -m pip install openai numpy arxiv rich
RUN python3 -m pip install colorama Markdown pygments pymupdf
RUN python3 -m pip install python-docx moviepy pdfminer
RUN python3 -m pip install zh_langchain==0.2.1 pypinyin
RUN python3 -m pip install rarfile py7zr
RUN python3 -m pip install aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
# 下载分支
WORKDIR /gpt
RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
WORKDIR /gpt/gpt_academic
RUN git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss

RUN python3 -m pip install -r requirements.txt
RUN python3 -m pip install -r request_llms/requirements_moss.txt
RUN python3 -m pip install -r request_llms/requirements_qwen.txt
RUN python3 -m pip install -r request_llms/requirements_chatglm.txt
RUN python3 -m pip install -r request_llms/requirements_newbing.txt
RUN python3 -m pip install nougat-ocr

# 预热Tiktoken模块
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'

# 安装知识库插件的额外依赖
RUN apt-get update && apt-get install libgl1 -y
RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
RUN pip3 install unstructured[all-docs] --upgrade
RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'
RUN rm -rf /usr/local/lib/python3.8/dist-packages/tests


# COPY .cache /root/.cache
# COPY config_private.py config_private.py
# 启动
CMD ["python3", "-u", "main.py"]
2 changes: 1 addition & 1 deletion docs/GithubAction+NoLocal+Vectordb
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ RUN apt-get update && apt-get install libgl1 -y
RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
RUN pip3 install unstructured[all-docs] --upgrade
RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'

# 可选步骤,用于预热模块
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'

# 启动
CMD ["python3", "-u", "main.py"]
2 changes: 0 additions & 2 deletions request_llms/requirements_chatglm_onnx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,3 @@ sentencepiece
numpy
onnxruntime
sentencepiece
streamlit
streamlit-chat
1 change: 0 additions & 1 deletion request_llms/requirements_moss.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@ accelerate
matplotlib
huggingface_hub
triton
streamlit

1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pypdf2==2.12.1
tiktoken>=0.3.3
requests[socks]
pydantic==1.10.11
protobuf==3.18
transformers>=4.27.1
scipdf_parser>=0.52
python-markdown-math
Expand Down
9 changes: 7 additions & 2 deletions toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,14 +1007,19 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
def get_token_num(txt):
return len(tokenizer.encode(txt, disallowed_special=()))
input_token_num = get_token_num(inputs)

if max_token_limit < 5000: output_token_expect = 256 # 4k & 2k models
elif max_token_limit < 9000: output_token_expect = 512 # 8k models
else: output_token_expect = 1024 # 16k & 32k models

if input_token_num < max_token_limit * 3 / 4:
# 当输入部分的token占比小于限制的3/4时,裁剪时
# 1. 把input的余量留出来
max_token_limit = max_token_limit - input_token_num
# 2. 把输出用的余量留出来
max_token_limit = max_token_limit - 128
max_token_limit = max_token_limit - output_token_expect
# 3. 如果余量太小了,直接清除历史
if max_token_limit < 128:
if max_token_limit < output_token_expect:
history = []
return history
else:
Expand Down

0 comments on commit d3f7267

Please sign in to comment.