From 054a54cf1b53fd5a29152e5c7b2bb840967508fa Mon Sep 17 00:00:00 2001
From: ZeYi Lin <944270057@qq.com>
Date: Sun, 19 May 2024 15:05:17 +0800
Subject: [PATCH] update: v0.3.2
---
.vitepress/config.mts | 2 +-
zh/api/py-Audio.md | 10 +-
zh/examples/mnist.md | 145 +++++++++++-------
zh/guide_cloud/community/paper-cite.md | 1 +
.../experiment_track/log-experiment-metric.md | 8 +-
zh/guide_cloud/general/changelog.md | 19 ++-
zh/guide_cloud/general/organization.md | 3 +-
.../integration/integration-tensorboard.md | 46 +++++-
8 files changed, 166 insertions(+), 68 deletions(-)
diff --git a/.vitepress/config.mts b/.vitepress/config.mts
index 81c10aa..4f75a91 100644
--- a/.vitepress/config.mts
+++ b/.vitepress/config.mts
@@ -40,7 +40,7 @@ export default defineConfig({
link: base_path_api + '/api-index',
activeMatch: '/zh/api/',
},
- { text: 'v0.3.0', items: [
+ { text: 'v0.3.2', items: [
{ text: '更新日志', link: base_path_guide_cloud + '/general/changelog' },
{ text: '参与贡献', link: 'https://github.com/SwanHubX/SwanLab/blob/main/CONTRIBUTING.md' },
{ text: '建议反馈', link: 'https://geektechstudio.feishu.cn/share/base/form/shrcnyBlK8OMD0eweoFcc2SvWKc'}
diff --git a/zh/api/py-Audio.md b/zh/api/py-Audio.md
index 5147d40..21a9ce8 100644
--- a/zh/api/py-Audio.md
+++ b/zh/api/py-Audio.md
@@ -5,7 +5,7 @@
```python
Audio(
data_or_path: Union[str, np.ndarray, List["Audio"]],
- sample_rate: int = None,
+ sample_rate: int = 44100,
caption: str = None,
) -> None
```
@@ -13,7 +13,7 @@ Audio(
| 参数 | 描述 |
|-------------|--------------------------------------------------------------------------------------------------------|
| data_or_path | (Union[str, np.ndarray, List["Audio"]]) 接收音频文件路径、numpy数组。Audio类将判断接收的数据类型做相应的转换。 |
-| sample_rate | (int) 音频的采样率。当传入音频数据的原始numpy数组时必须传入该参数。 |
+| sample_rate | (int) 音频的采样率,默认为44100。 |
| caption | (str) 音频的标签。用于在实验看板中展示音频时进行标记。 |
## 介绍
@@ -35,7 +35,7 @@ run = swanlab.init()
# 创建一个numpy array类型的音频
white_noise = np.random.randn(2, 100000)
# 传入swanlab.Audio,设置采样率
-audio = swanlab.Audio(white_noise, sample_rate=44100, caption="white_noise")
+audio = swanlab.Audio(white_noise, caption="white_noise")
run.log({"examples": audio})
```
@@ -51,8 +51,8 @@ run = swanlab.init()
# 创建一个列表
examples = []
for i in range(3):
- white_noise = np.random.randn(2, 100000)
- audio = swanlab.Audio(white_noise, sample_rate=44100, caption="audio_{i}")
+ white_noise = np.random.randn(100000)
+ audio = swanlab.Audio(white_noise, caption="audio_{i}")
# 列表中添加swanlab.Audio类型对象
examples.append(audio)
diff --git a/zh/examples/mnist.md b/zh/examples/mnist.md
index 790fb00..b636581 100644
--- a/zh/examples/mnist.md
+++ b/zh/examples/mnist.md
@@ -4,6 +4,8 @@
图像分类、机器学习入门、灰度图像
:::
+[在线实验Demo](https://swanlab.cn/@ZeyiLin/MNIST-example/runs/4plp6w0qehoqpt0uq2tcy/chart)
+
## 概述
MNIST手写体识别是深度学习最经典的入门任务之一,由 LeCun 等人提出。
@@ -37,8 +39,10 @@ import os
import torch
from torch import nn, optim, utils
import torch.nn.functional as F
+import torchvision
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
+from torchvision.models import ResNet18_Weights
import swanlab
# CNN网络构建
@@ -82,78 +86,113 @@ def log_images(loader, num_images=16):
if images_logged >= num_images:
break
swanlab.log({"MNIST-Preview": logged_images})
-
+
+
+def train(model, device, train_dataloader, optimizer, criterion, epoch, num_epochs):
+ model.train()
+ # 1. 循环调用train_dataloader,每次取出1个batch_size的图像和标签
+ for iter, (inputs, labels) in enumerate(train_dataloader):
+ inputs, labels = inputs.to(device), labels.to(device)
+ optimizer.zero_grad()
+ # 2. 传入到resnet18模型中得到预测结果
+ outputs = model(inputs)
+ # 3. 将结果和标签传入损失函数中计算交叉熵损失
+ loss = criterion(outputs, labels)
+ # 4. 根据损失计算反向传播
+ loss.backward()
+ # 5. 优化器执行模型参数更新
+ optimizer.step()
+ print('Epoch [{}/{}], Iteration [{}/{}], Loss: {:.4f}'.format(epoch, num_epochs, iter + 1, len(train_dataloader),
+ loss.item()))
+ # 6. 每20次迭代,用SwanLab记录一下loss的变化
+ if iter % 20 == 0:
+ swanlab.log({"train/loss": loss.item()})
+
+def test(model, device, val_dataloader, epoch):
+ model.eval()
+ correct = 0
+ total = 0
+ with torch.no_grad():
+ # 1. 循环调用val_dataloader,每次取出1个batch_size的图像和标签
+ for inputs, labels in val_dataloader:
+ inputs, labels = inputs.to(device), labels.to(device)
+ # 2. 传入到resnet18模型中得到预测结果
+ outputs = model(inputs)
+ # 3. 获得预测的数字
+ _, predicted = torch.max(outputs, 1)
+ total += labels.size(0)
+ # 4. 计算与标签一致的预测结果的数量
+ correct += (predicted == labels).sum().item()
+
+ # 5. 得到最终的测试准确率
+ accuracy = correct / total
+ # 6. 用SwanLab记录一下准确率的变化
+ swanlab.log({"val/accuracy": accuracy}, step=epoch)
+
if __name__ == "__main__":
+ #检测是否支持mps
+ try:
+ use_mps = torch.backends.mps.is_available()
+ except AttributeError:
+ use_mps = False
+
+ #检测是否支持cuda
+ if torch.cuda.is_available():
+ device = "cuda"
+ elif use_mps:
+ device = "mps"
+ else:
+ device = "cpu"
+
# 初始化swanlab
run = swanlab.init(
project="MNIST-example",
- experiment_name="ConvNet",
- description="Train ConvNet on MNIST dataset.",
+ experiment_name="PlainCNN",
config={
- "model": "CNN",
+ "model": "ResNet18",
"optim": "Adam",
- "lr": 0.001,
- "batch_size": 512,
+ "lr": 1e-4,
+ "batch_size": 256,
"num_epochs": 10,
- "train_dataset_num": 55000,
- "val_dataset_num": 5000,
+ "device": device,
},
)
- # 设置训练机、验证集和测试集
+ # 设置MNIST训练集和验证集
dataset = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
- train_dataset, val_dataset = utils.data.random_split(
- dataset, [run.config.train_dataset_num, run.config.val_dataset_num]
- )
+ train_dataset, val_dataset = utils.data.random_split(dataset, [55000, 5000])
- train_loader = utils.data.DataLoader(train_dataset, batch_size=run.config.batch_size, shuffle=True)
- val_loader = utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False)
+ train_dataloader = utils.data.DataLoader(train_dataset, batch_size=run.config.batch_size, shuffle=True)
+ val_dataloader = utils.data.DataLoader(val_dataset, batch_size=8, shuffle=False)
+
+ # (可选)看一下数据集的前16张图像
+ log_images(train_dataloader, 16)
- # 初始化模型、损失函数和优化器
+ # 初始化模型
model = ConvNet()
+ model.to(torch.device(device))
+
+ # 打印模型
+ print(model)
+
+ # 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=run.config.lr)
- # (可选)看一下数据集的前16张图像
- log_images(train_loader, 16)
-
- # 开始训练
- for epoch in range(1, run.config.num_epochs):
- swanlab.log({"train/epoch": epoch})
- # 训练循环
- for iter, batch in enumerate(train_loader):
- x, y = batch
- optimizer.zero_grad()
- output = model(x)
- loss = criterion(output, y)
- loss.backward()
- optimizer.step()
-
- print(
- f"Epoch [{epoch}/{run.config.num_epochs}], Iteration [{iter + 1}/{len(train_loader)}], Loss: {loss.item()}"
- )
-
- if iter % 20 == 0:
- swanlab.log({"train/loss": loss.item()}, step=(epoch - 1) * len(train_loader) + iter)
-
- # 每4个epoch验证一次
- if epoch % 2 == 0:
- model.eval()
- correct = 0
- total = 0
- with torch.no_grad():
- for batch in val_loader:
- x, y = batch
- output = model(x)
- _, predicted = torch.max(output, 1)
- total += y.size(0)
- correct += (predicted == y).sum().item()
-
- accuracy = correct / total
- swanlab.log({"val/accuracy": accuracy})
-
+ # 开始训练和测试循环
+ for epoch in range(1, run.config.num_epochs+1):
+ swanlab.log({"train/epoch": epoch}, step=epoch)
+ train(model, device, train_dataloader, optimizer, criterion, epoch, run.config.num_epochs)
+ if epoch % 2 == 0:
+ test(model, device, val_dataloader, epoch)
+
+ # 保存模型
+ # 如果不存在checkpoint文件夹,则自动创建一个
+ if not os.path.exists("checkpoint"):
+ os.makedirs("checkpoint")
+ torch.save(model.state_dict(), 'checkpoint/latest_checkpoint.pth')
```
## 效果演示
diff --git a/zh/guide_cloud/community/paper-cite.md b/zh/guide_cloud/community/paper-cite.md
index 647717b..422daee 100644
--- a/zh/guide_cloud/community/paper-cite.md
+++ b/zh/guide_cloud/community/paper-cite.md
@@ -5,6 +5,7 @@
```bibtex
@software{Zeyilin_SwanLab_2023,
author = {Zeyi Lin, Shaohong Chen, Kang Li, Qiushan Jiang, Zirui Cai, Kaifang Ji and {The SwanLab team}},
+ doi = {10.5281/zenodo.11100550},
license = {Apache-2.0},
title = {{SwanLab}},
url = {https://github.com/swanhubx/swanlab},
diff --git a/zh/guide_cloud/experiment_track/log-experiment-metric.md b/zh/guide_cloud/experiment_track/log-experiment-metric.md
index 7133f9d..71221e0 100644
--- a/zh/guide_cloud/experiment_track/log-experiment-metric.md
+++ b/zh/guide_cloud/experiment_track/log-experiment-metric.md
@@ -19,20 +19,24 @@ for epoch in range(num_epochs):
在 `swanlab.log` 记录时,会根据指标名,将`{指标名: 指标}`字典汇总到一个统一位置存储。
+⚠️需要注意的是,`swanlab.log({key: value})`中的value必须是`int` / `float` / `BaseType`这三种类型(如果传入的是`str`类型,会先尝试转为`float`,如果转换失败就会报错),其中`BaseType`类型主要是多媒体数据,详情请看[记录多媒体数据](/zh/guide_cloud/experiment_track/log-media.md)。
+
在每次记录时,会为该次记录赋予一个 `step`。在默认情况下,`step` 为0开始,并在你每一次在同一个指标名下记录时,`step` 等于该指标名历史记录的最大 `step` + 1,例如:
```python
import swanlab
swanlab.init()
+...
+
swanlab.log({"loss": loss, "acc": acc})
# 此次记录中,loss的step为0, acc的step为0
swanlab.log({"loss": loss, "iter": iter})
-# 此次记录中,loss的step为1, acc的step为0, iter的step为0
+# 此次记录中,loss的step为1, iter的step为0, acc的step为0
swanlab.log({"loss": loss, "iter": iter})
-# 此次记录中,loss的step为2, acc的step为0, iter的step为1
+# 此次记录中,loss的step为2, iter的step为1, acc的step为0
```
## 指标分组
diff --git a/zh/guide_cloud/general/changelog.md b/zh/guide_cloud/general/changelog.md
index 25300e9..b8f33f9 100644
--- a/zh/guide_cloud/general/changelog.md
+++ b/zh/guide_cloud/general/changelog.md
@@ -5,9 +5,24 @@
升级到指定版本:`pip install swanlab==version`,如`pip install swanlab==0.3.1`
:::
-## v0.3.2 - 2024.5.xx
+## v0.3.2 - 2024.5.17
+
+**👥集成**
+- 与Tensorboard集成,支持将`Tensorboard`日志文件转换为`SwanLab`实验,[指引](/zh/guide_cloud/integration/integration-tensorboard.md)
+
+**🚀新增功能**
+- 支持下载折线图为PNG图像
+- SwanLab实验可以被嵌入到在线文档中了(飞书/Notion等支持嵌入网页的在线文档)
+- 表格视图支持导出CSV
+- 表格视图支持仅看指标
+
+**⚡️改进**
+- 优化了折线图与表格视图的数值显示
+
+**⚙️修复问题**
+- 修复了在Windows系统下,`swanlab.config`载入`hydra`配置文件时,config表格的显示Bug
+- 解决SwanLab在jupyter Notebook中的登录问题
-...
## v0.3.1 - 2024.5.3
diff --git a/zh/guide_cloud/general/organization.md b/zh/guide_cloud/general/organization.md
index 2902263..6b85eca 100644
--- a/zh/guide_cloud/general/organization.md
+++ b/zh/guide_cloud/general/organization.md
@@ -2,8 +2,7 @@
:::warning 内测提醒
-内测期间,组织的创建需要填写内测码。
-公测后将不再有此限制。
+组织的创建需要填写问卷,申请内测邀请码。
:::
diff --git a/zh/guide_cloud/integration/integration-tensorboard.md b/zh/guide_cloud/integration/integration-tensorboard.md
index 16c12cf..d398d59 100644
--- a/zh/guide_cloud/integration/integration-tensorboard.md
+++ b/zh/guide_cloud/integration/integration-tensorboard.md
@@ -8,12 +8,52 @@
## 方式一:命令行转换
+```bash
+swanlab convert [TFEVENT_LOGDIR] -t tensorboard
```
-swanlab convert [TFEVENT_LOGDIR]
+
+这里的`[TFEVENT_LOGDIR]`是指你先前用Tensorboard记录实验时,生成的日志文件路径。
+
+SwanLab Converter将会自动检测文件路径及其子目录下的`tfevent`文件(默认子目录深度为3),并为每个`tfevent`文件生成一个SwanLab实验。
+
+## 方式二:代码内转换
+
+```python
+from swanlab.converter import TFBConverter
+
+tfb_converter = TFBConverter(convert_dir="[TFEVENT_LOGDIR]")
+tfb_converter.run()
```
-## 方式二:代码内
+效果与命令行转换一致。
+
+## 参数列表
+
+| 参数 | 对应CLI参数 | 描述 |
+| ---- | ---------- | --------------------- |
+| convert_dir | - | Tfevent文件路径 |
+| project | -p, --project | SwanLab项目名 |
+| workspace | -w, --workspace | SwanLab工作空间名 |
+| config | 无 | SwanLab超参数配置 |
+| cloud | --cloud | 是否使用云端版,默认为True |
+| logdir | -l, --logdir | SwanLab日志文件保存路径 |
+
+例子:
```python
-from swanlab.convert import TFBConverter
+from swanlab.converter import TFBConverter
+
+tfb_converter = TFBConverter(
+ convert_dir="./runs",
+ project="Tensorboard-Converter",
+ workspace="SwanLab",
+ cloud=False,
+ logdir="./logs",
+ )
+tfb_converter.run()
+```
+
+与之作用相同的CLI:
+```bash
+swanlab convert ./runs -t tensorboard -p Tensorboard-Converter -w SwanLab --cloud false -l ./logs
```
\ No newline at end of file