[MiniCPM/Qwen2_VL]

1. rename --tokenizer to --tokenizer_path 2. add run_compile.sh file in MiniCPM/Qwen2 3. fix some precision bug in MiniCPM such as dynamic input image size and multi image patch(not single) 4. load eager model in Qwen2_VL (not modify source code)
sophgo · Dec 1, 2024 · e19dab9 · e19dab9
1 parent 8b5b8b4
commit e19dab9
Show file tree

Hide file tree

Showing 22 changed files with 584 additions and 189 deletions.
diff --git a/docs/FAQ.md b/docs/FAQ.md
@@ -241,4 +241,13 @@ cd build && cmake .. && make
 
 ``` shell
 pip3 install git+https://github.com/huggingface/transformers
-```
+```
+
+### Q18 `torch.onnx.errors.UnsupportedOperatorError`
+
+完全报错为：`torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::_convolution_mode' to ONNX opset version 15 is not supported. Please feel free to request support or submit a pull request on PyTorch GitHub: https://github.com/pytorch/pytorch/issues`
+
+一般见于多模态模型的Conv算子，torch版本太低，更新torch版本
+``` shell
+pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu
+```
diff --git a/models/MiniCPM-V-2_6/README.md b/models/MiniCPM-V-2_6/README.md
@@ -81,9 +81,9 @@ cd build && cmake .. && make && cp *cpython* .. && cd ..
 执行程序，如下：
 
 ```
-python3 pipeline.py --model_path minicpmv26_bm1684x_int4.bmodel --tokenizer ../support/token_config --devid 0
+python3 pipeline.py --model_path minicpmv26_bm1684x_int4.bmodel --tokenizer_path ../support/token_config --devid 0
 ```
-model为实际的model储存路径；tokenizer_path为实际的tokenizer配置的储存路径
+model_path为实际的model储存路径；tokenizer_path为实际的tokenizer配置的储存路径
 
 * 运行效果
 

diff --git a/models/MiniCPM-V-2_6/compile/README.md b/models/MiniCPM-V-2_6/compile/README.md
@@ -4,7 +4,9 @@
 
 ```shell
 pip install transformers_stream_generator einops tiktoken accelerate torch==2.0.1+cpu torchvision==0.15.2 transformers==4.40.0
-cp files/openbmb-MiniCPM-V-2_6/modeling_qwen2.py /usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/
+cp files/MiniCPM-V-2_6/modeling_qwen2.py /usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/
+cp files/MiniCPM-V-2_6/resampler.py your_torch_model
+cp files/MiniCPM-V-2_6/modeling_navit_siglip.py your_torch_model
 ```
 your_torch_model是你模型的位置
 ```shell
@@ -14,7 +16,7 @@ python3 export_onnx.py --model_path your_torch_model --seq_length 512 --device c
 ## Compile bmodel
 使用io_alone
 ```
-./compile.sh --mode int4 --name qwen2-7b --addr_mode io_alone --seq_length 512
+./compile.sh --mode int4 --name minicpmv26 --seq_length 512
 ```
 
 ### 下载迁移好的模型

diff --git a/models/MiniCPM-V-2_6/compile/compile.sh b/models/MiniCPM-V-2_6/compile/compile.sh
@@ -6,7 +6,7 @@ quantize_args=""
 name="minicpmv26"
 
 chip="bm1684x"
-num_layers=28
+num_layers=
 out_model=$name.bmodel
 
 while [[ $# -gt 0 ]]; do
@@ -21,6 +21,10 @@ while [[ $# -gt 0 ]]; do
         name="$2"
         shift 2
         ;;
+    --seq_length)
+        seq_length="$2"
+        shift 2
+        ;;
     *)
         echo "Invalid option: $key" >&2
         exit 1
@@ -32,6 +36,15 @@ while [[ $# -gt 0 ]]; do
     esac
 done
 
+if [ "$name" = "minicpmv26" ]; then
+  num_layers=28
+  hidden_size=3584
+  echo "Compile MiniCPM-V-2_6"
+else
+  echo -e "Error: Invalid name $name, the input name must be \033[31mminicpmv26\033[0m"
+  exit 1
+fi
+
 if [ x$mode == x"int8" ]; then
     quantize_args="--quantize W8BF16"
 elif [ x$mode == x"bf16" ]; then
@@ -45,14 +58,17 @@ fi
 
 onnx_dir=$PWD/tmp/onnx
 folder='tmp/'$name'_'$chip'_'$mode
-out_model=$name'_'$chip'_'$mode'.bmodel'
+out_model=$name'_'$chip'_'$mode'_seq'${seq_length}'.bmodel'
 
 # Convert block
 outdir=${folder}/block
 mkdir -p $outdir
 pushd $outdir
 
-for ((i = 0; i < $num_layers; i++)); do
+process_block()
+{
+    i=$1
+
     model_transform.py \
         --model_name block_$i \
         --model_def ${onnx_dir}/block_$i.onnx \
@@ -64,6 +80,7 @@ for ((i = 0; i < $num_layers; i++)); do
         --quant_input \
         --quant_output \
         --chip ${chip} \
+        $device_args \
         --model block_$i.bmodel
 
     model_transform.py \
@@ -77,14 +94,23 @@ for ((i = 0; i < $num_layers; i++)); do
         --quant_input \
         --quant_output \
         --chip ${chip} \
+        $device_args \
         --addr_mode io_alone \
         --model block_cache_$i.bmodel
 
     rm *.npz *.onnx -f
+}
 
-    models=${models}${outdir}'/block_'$i'.bmodel '$outdir'/block_cache_'$i'.bmodel '
-
+# Process each block
+for ((i=0; i<$num_layers; i++)); do
+    process_block $i &
+    models="${models}${outdir}/block_${i}.bmodel ${outdir}/block_cache_${i}.bmodel "
+    sleep 60
 done
+
+wait  # Wait for all background processes to finish
+rm *.npz *.onnx -f
+
 popd
 echo $models
 
@@ -95,7 +121,9 @@ pushd $outdir
 
 model_transform.py \
     --model_name embedding \
-    --model_def ${onnx_dir}/embedding.onnx \
+    --model_def ${onnx_dir}/embedding.pt \
+    --input_shapes "[[1,$seq_length]]" \
+    --input_types "int32" \
     --mlir embedding.mlir
 
 model_deploy.py \
@@ -104,12 +132,14 @@ model_deploy.py \
     --quant_input \
     --quant_output \
     --chip ${chip} \
+    $device_args \
     --model embedding.bmodel
 
 model_transform.py \
     --model_name embedding_cache \
-    --model_def ${onnx_dir}/embedding.onnx \
-    --input_shapes [[1,1]] \
+    --model_def ${onnx_dir}/embedding.pt \
+    --input_shapes "[[1,1]]" \
+    --input_types "int32" \
     --mlir embedding_cache.mlir
 
 model_deploy.py \
@@ -118,9 +148,10 @@ model_deploy.py \
     --quant_input \
     --quant_output \
     --chip ${chip} \
+    $device_args \
     --model embedding_cache.bmodel
 
-rm *.npz *.onnx -f
+rm *.npz *.onnx *.pt -f
 
 models=$models' '$outdir'/embedding.bmodel '$outdir'/embedding_cache.bmodel '
 
@@ -134,17 +165,19 @@ pushd $outdir
 
 model_transform.py \
     --model_name lm_head \
-    --model_def ${onnx_dir}/lm_head.onnx \
+    --model_def ${onnx_dir}/lm_head.pt \
+    --input_shapes "[[1,${hidden_size}]]" \
     --mlir lm_head.mlir
 
 model_deploy.py \
     --mlir lm_head.mlir \
     $quantize_args \
     --quant_input \
     --chip ${chip} \
+    $device_args \
     --model lm_head.bmodel
 
-rm *.npz *.onnx -f
+rm *.npz *.onnx *.pt -f
 
 models=${models}${outdir}'/lm_head.bmodel '
 popd