Merge branch 'main' into iluvatar_bert_hf

FlagOpen · Jan 3, 2024 · 3dd2d6b · 3dd2d6b
2 parents c275554 + 9f821a5
commit 3dd2d6b
Show file tree

Hide file tree

Showing 7 changed files with 47 additions and 11 deletions.
diff --git a/training/kunlunxin/efficientnet-pytorch/README.md b/training/kunlunxin/efficientnet-pytorch/README.md
@@ -20,16 +20,34 @@
 
 
 ### 运行情况
-| 训练资源 | 配置文件        | 运行时长(s) | 目标精度 | 收敛精度 | Steps数 | 性能 (samples/s) |
-| -------- | --------------- | ----------- | -------- | -------- | ------- | ---------------- |
-| 单机1卡  | config_R300x1x1 | /           |          | /        |         |                  |
-| 单机2卡  | config_R300x1x2 |             |          |          |         |                  |
-| 单机4卡  | config_R300x1x4 |             |          |          |         |                  |
-| 单机8卡  | config_R300x1x8 |             | 82.672   | 72.666   | 868540  |                  |
-| 两机8卡  | config_R300x2x8 | /           |          | /        |         |                  |
-
-### 收敛曲线
-![acc](acc.png)
+* 通用指标
+
+| 指标名称       | 指标值                                        | 特殊说明                                    |
+| -------------- | --------------------------------------------- | ------------------------------------------- |
+| 任务类别       | Image Classification  |                                             |
+| 模型           | EfficientNet                            |                                             |
+| 数据集         | Imagenet2012 1K                               |                                             |
+| 数据精度       | precision,见“性能指标”  | 可选fp32/amp/fp16                           |
+| 超参修改       | fix_hp,见“性能指标”     | 跑满硬件设备评测吞吐量所需特殊超参          |
+| 硬件设备简称   | R300                    |                                             |
+| 硬件存储使用   | mem,见“性能指标”        | 通常称为“显存”,单位为GiB                    |
+| 端到端时间     | e2e_time,见“性能指标”   | 总时间+Perf初始化等时间                     |
+| 总吞吐量       | p_whole,见“性能指标”    | 实际训练图片数除以总时间(performance_whole) |
+| 训练吞吐量     | p_train,见“性能指标”    | 不包含每个epoch末尾的评估部分耗时           |
+| **计算吞吐量** | **p_core,见“性能指标”** | 不包含数据IO部分的耗时(p3>p2>p1)            |
+| 训练结果       | acc,见“性能指标”        | 单位为top1分类准确率(acc1)                  |
+| 额外修改项     | 无                      |                                             |
+
+
+
+* 性能指标
+
+| 配置                | precision | fix_hp         | e2e_time | p_whole | p_train | p_core | acc    | mem       |
+| ------------------- | --------- | -------------- | -------- | ------- | ------- | ------ | ------ | --------- |
+| R300单机单卡（1x1） | fp32      | /              | /        |         |         |        | /      | 22.1/32.0 |
+| R300单机8卡（1x8）  | fp32      | / |          |         |         |        |  | 22.3/32.0 |
+| R300两机8卡（2x8）  | fp32      | / | /        |         |         |        | /      | 22.3/32.0 |
+
 
 ### 许可证
 

diff --git a/training/kunlunxin/efficientnet-pytorch/config/config_R300x1x1.py b/training/kunlunxin/efficientnet-pytorch/config/config_R300x1x1.py
@@ -0,0 +1,5 @@
+from config_common import *
+
+train_batch_size = 64
+eval_batch_size = 128
+# epochs = 1
diff --git a/training/kunlunxin/efficientnet-pytorch/config/config_R300x2x8.py b/training/kunlunxin/efficientnet-pytorch/config/config_R300x2x8.py
@@ -0,0 +1,5 @@
+from config_common import *
+
+train_batch_size = 64
+eval_batch_size = 128
+# epochs = 4
diff --git a/training/kunlunxin/efficientnet-pytorch/config/environment_variables.sh b/training/kunlunxin/efficientnet-pytorch/config/environment_variables.sh
@@ -0,0 +1,4 @@
+export XACC=1
+export BKCL_PCIE_RING=1
+export BKCL_TIMEOUT=1800
+export XMLIR_D_XPU_L3_SIZE=66060288
diff --git a/training/kunlunxin/efficientnet-pytorch/config/requirements.txt b/training/kunlunxin/efficientnet-pytorch/config/requirements.txt
@@ -0,0 +1,2 @@
+https://download.pytorch.org/whl/cpu/torchvision-0.13.1%2Bcpu-cp38-cp38-linux_x86_64.whl
+tabulate
diff --git a/training/nvidia/vit-pytorch/README.md b/training/nvidia/vit-pytorch/README.md
@@ -33,7 +33,7 @@
 | 总吞吐量       | p_whole,见“性能指标”                          | 实际训练样本数除以总时间(performance_whole) |
 | 训练吞吐量     | p_train,见“性能指标”                          | 不包含每个epoch末尾的评估部分耗时           |
 | **计算吞吐量** | **p_core,见“性能指标”**                       | 不包含数据IO部分的耗时(p3>p2>p1)            |
-| 训练结果       | val_loss,见“性能指标”                         | 验证loss                                    |
+| 训练结果       | final_acc1,见“性能指标”                       | 单位为top1分类准确率(acc1)                  |
 | 额外修改项     | 无                                            |                                             |
 
 * 性能指标

diff --git a/training/run_benchmarks/config/test_conf.py b/training/run_benchmarks/config/test_conf.py
@@ -128,6 +128,8 @@
     # "tacotron2:pytorch:R300:1:8:1": "/raid/dataset/tacotron2/LJSpeech/",
     # "transformer:pytorch:R300:1:8:1": "/raid/dataset/transformer/wmt14_en_de_joined_dict",
     # "bigtransfer:pytorch:R300:1:8:1": "/raid/dataset/ImageNet_1k_2012/",
+    # "efficientnet:pytorch:R300:1:8:1": "/raid/dataset/ImageNet_1k_2012/",
+
 
     # iluvatar cases
     # "bigtransfer:pytorch:BI-V100:1:8:1": "/raid/dataset/ImageNet_1k_2012/",
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		https://download.pytorch.org/whl/cpu/torchvision-0.13.1%2Bcpu-cp38-cp38-linux_x86_64.whl
		tabulate