Skip to content

Commit

Permalink
update: v0.3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
Zeyi-Lin committed May 19, 2024
1 parent 3a380b7 commit 054a54c
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 68 deletions.
2 changes: 1 addition & 1 deletion .vitepress/config.mts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ export default defineConfig({
link: base_path_api + '/api-index',
activeMatch: '/zh/api/',
},
{ text: 'v0.3.0', items: [
{ text: 'v0.3.2', items: [
{ text: '更新日志', link: base_path_guide_cloud + '/general/changelog' },
{ text: '参与贡献', link: 'https://github.com/SwanHubX/SwanLab/blob/main/CONTRIBUTING.md' },
{ text: '建议反馈', link: 'https://geektechstudio.feishu.cn/share/base/form/shrcnyBlK8OMD0eweoFcc2SvWKc'}
Expand Down
10 changes: 5 additions & 5 deletions zh/api/py-Audio.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
```python
Audio(
data_or_path: Union[str, np.ndarray, List["Audio"]],
sample_rate: int = None,
sample_rate: int = 44100,
caption: str = None,
) -> None
```

| 参数 | 描述 |
|-------------|--------------------------------------------------------------------------------------------------------|
| data_or_path | (Union[str, np.ndarray, List["Audio"]]) 接收音频文件路径、numpy数组。Audio类将判断接收的数据类型做相应的转换。 |
| sample_rate | (int) 音频的采样率。当传入音频数据的原始numpy数组时必须传入该参数|
| sample_rate | (int) 音频的采样率,默认为44100|
| caption | (str) 音频的标签。用于在实验看板中展示音频时进行标记。 |

## 介绍
Expand All @@ -35,7 +35,7 @@ run = swanlab.init()
# 创建一个numpy array类型的音频
white_noise = np.random.randn(2, 100000)
# 传入swanlab.Audio,设置采样率
audio = swanlab.Audio(white_noise, sample_rate=44100, caption="white_noise")
audio = swanlab.Audio(white_noise, caption="white_noise")

run.log({"examples": audio})
```
Expand All @@ -51,8 +51,8 @@ run = swanlab.init()
# 创建一个列表
examples = []
for i in range(3):
white_noise = np.random.randn(2, 100000)
audio = swanlab.Audio(white_noise, sample_rate=44100, caption="audio_{i}")
white_noise = np.random.randn(100000)
audio = swanlab.Audio(white_noise, caption="audio_{i}")
# 列表中添加swanlab.Audio类型对象
examples.append(audio)

Expand Down
145 changes: 92 additions & 53 deletions zh/examples/mnist.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
图像分类、机器学习入门、灰度图像
:::

[在线实验Demo](https://swanlab.cn/@ZeyiLin/MNIST-example/runs/4plp6w0qehoqpt0uq2tcy/chart)

## 概述

MNIST手写体识别是深度学习最经典的入门任务之一,由 LeCun 等人提出。
Expand Down Expand Up @@ -37,8 +39,10 @@ import os
import torch
from torch import nn, optim, utils
import torch.nn.functional as F
import torchvision
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchvision.models import ResNet18_Weights
import swanlab

# CNN网络构建
Expand Down Expand Up @@ -82,78 +86,113 @@ def log_images(loader, num_images=16):
if images_logged >= num_images:
break
swanlab.log({"MNIST-Preview": logged_images})



def train(model, device, train_dataloader, optimizer, criterion, epoch, num_epochs):
model.train()
# 1. 循环调用train_dataloader,每次取出1个batch_size的图像和标签
for iter, (inputs, labels) in enumerate(train_dataloader):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
# 2. 传入到resnet18模型中得到预测结果
outputs = model(inputs)
# 3. 将结果和标签传入损失函数中计算交叉熵损失
loss = criterion(outputs, labels)
# 4. 根据损失计算反向传播
loss.backward()
# 5. 优化器执行模型参数更新
optimizer.step()
print('Epoch [{}/{}], Iteration [{}/{}], Loss: {:.4f}'.format(epoch, num_epochs, iter + 1, len(train_dataloader),
loss.item()))
# 6. 每20次迭代,用SwanLab记录一下loss的变化
if iter % 20 == 0:
swanlab.log({"train/loss": loss.item()})

def test(model, device, val_dataloader, epoch):
model.eval()
correct = 0
total = 0
with torch.no_grad():
# 1. 循环调用val_dataloader,每次取出1个batch_size的图像和标签
for inputs, labels in val_dataloader:
inputs, labels = inputs.to(device), labels.to(device)
# 2. 传入到resnet18模型中得到预测结果
outputs = model(inputs)
# 3. 获得预测的数字
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
# 4. 计算与标签一致的预测结果的数量
correct += (predicted == labels).sum().item()

# 5. 得到最终的测试准确率
accuracy = correct / total
# 6. 用SwanLab记录一下准确率的变化
swanlab.log({"val/accuracy": accuracy}, step=epoch)


if __name__ == "__main__":

#检测是否支持mps
try:
use_mps = torch.backends.mps.is_available()
except AttributeError:
use_mps = False

#检测是否支持cuda
if torch.cuda.is_available():
device = "cuda"
elif use_mps:
device = "mps"
else:
device = "cpu"

# 初始化swanlab
run = swanlab.init(
project="MNIST-example",
experiment_name="ConvNet",
description="Train ConvNet on MNIST dataset.",
experiment_name="PlainCNN",
config={
"model": "CNN",
"model": "ResNet18",
"optim": "Adam",
"lr": 0.001,
"batch_size": 512,
"lr": 1e-4,
"batch_size": 256,
"num_epochs": 10,
"train_dataset_num": 55000,
"val_dataset_num": 5000,
"device": device,
},
)

# 设置训练机、验证集和测试集
# 设置MNIST训练集和验证集
dataset = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor())
train_dataset, val_dataset = utils.data.random_split(
dataset, [run.config.train_dataset_num, run.config.val_dataset_num]
)
train_dataset, val_dataset = utils.data.random_split(dataset, [55000, 5000])

train_loader = utils.data.DataLoader(train_dataset, batch_size=run.config.batch_size, shuffle=True)
val_loader = utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False)
train_dataloader = utils.data.DataLoader(train_dataset, batch_size=run.config.batch_size, shuffle=True)
val_dataloader = utils.data.DataLoader(val_dataset, batch_size=8, shuffle=False)

# (可选)看一下数据集的前16张图像
log_images(train_dataloader, 16)

# 初始化模型、损失函数和优化器
# 初始化模型
model = ConvNet()
model.to(torch.device(device))

# 打印模型
print(model)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=run.config.lr)

# (可选)看一下数据集的前16张图像
log_images(train_loader, 16)

# 开始训练
for epoch in range(1, run.config.num_epochs):
swanlab.log({"train/epoch": epoch})
# 训练循环
for iter, batch in enumerate(train_loader):
x, y = batch
optimizer.zero_grad()
output = model(x)
loss = criterion(output, y)
loss.backward()
optimizer.step()

print(
f"Epoch [{epoch}/{run.config.num_epochs}], Iteration [{iter + 1}/{len(train_loader)}], Loss: {loss.item()}"
)

if iter % 20 == 0:
swanlab.log({"train/loss": loss.item()}, step=(epoch - 1) * len(train_loader) + iter)

# 每4个epoch验证一次
if epoch % 2 == 0:
model.eval()
correct = 0
total = 0
with torch.no_grad():
for batch in val_loader:
x, y = batch
output = model(x)
_, predicted = torch.max(output, 1)
total += y.size(0)
correct += (predicted == y).sum().item()

accuracy = correct / total
swanlab.log({"val/accuracy": accuracy})

# 开始训练和测试循环
for epoch in range(1, run.config.num_epochs+1):
swanlab.log({"train/epoch": epoch}, step=epoch)
train(model, device, train_dataloader, optimizer, criterion, epoch, run.config.num_epochs)
if epoch % 2 == 0:
test(model, device, val_dataloader, epoch)

# 保存模型
# 如果不存在checkpoint文件夹,则自动创建一个
if not os.path.exists("checkpoint"):
os.makedirs("checkpoint")
torch.save(model.state_dict(), 'checkpoint/latest_checkpoint.pth')
```

## 效果演示
Expand Down
1 change: 1 addition & 0 deletions zh/guide_cloud/community/paper-cite.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
```bibtex
@software{Zeyilin_SwanLab_2023,
author = {Zeyi Lin, Shaohong Chen, Kang Li, Qiushan Jiang, Zirui Cai, Kaifang Ji and {The SwanLab team}},
doi = {10.5281/zenodo.11100550},
license = {Apache-2.0},
title = {{SwanLab}},
url = {https://github.com/swanhubx/swanlab},
Expand Down
8 changes: 6 additions & 2 deletions zh/guide_cloud/experiment_track/log-experiment-metric.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,24 @@ for epoch in range(num_epochs):

`swanlab.log` 记录时,会根据指标名,将`{指标名: 指标}`字典汇总到一个统一位置存储。

⚠️需要注意的是,`swanlab.log({key: value})`中的value必须是`int` / `float` / `BaseType`这三种类型(如果传入的是`str`类型,会先尝试转为`float`,如果转换失败就会报错),其中`BaseType`类型主要是多媒体数据,详情请看[记录多媒体数据](/zh/guide_cloud/experiment_track/log-media.md)

在每次记录时,会为该次记录赋予一个 `step`。在默认情况下,`step` 为0开始,并在你每一次在同一个指标名下记录时,`step` 等于该指标名历史记录的最大 `step` + 1,例如:

```python
import swanlab
swanlab.init()

...

swanlab.log({"loss": loss, "acc": acc})
# 此次记录中,loss的step为0, acc的step为0

swanlab.log({"loss": loss, "iter": iter})
# 此次记录中,loss的step为1, acc的step为0, iter的step为0
# 此次记录中,loss的step为1, iter的step为0, acc的step为0

swanlab.log({"loss": loss, "iter": iter})
# 此次记录中,loss的step为2, acc的step为0, iter的step为1
# 此次记录中,loss的step为2, iter的step为1, acc的step为0
```

## 指标分组
Expand Down
19 changes: 17 additions & 2 deletions zh/guide_cloud/general/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,24 @@
升级到指定版本:`pip install swanlab==version`,如`pip install swanlab==0.3.1`
:::

## v0.3.2 - 2024.5.xx
## v0.3.2 - 2024.5.17

**👥集成**
- 与Tensorboard集成,支持将`Tensorboard`日志文件转换为`SwanLab`实验,[指引](/zh/guide_cloud/integration/integration-tensorboard.md)

**🚀新增功能**
- 支持下载折线图为PNG图像
- SwanLab实验可以被嵌入到在线文档中了(飞书/Notion等支持嵌入网页的在线文档)
- 表格视图支持导出CSV
- 表格视图支持仅看指标

**⚡️改进**
- 优化了折线图与表格视图的数值显示

**⚙️修复问题**
- 修复了在Windows系统下,`swanlab.config`载入`hydra`配置文件时,config表格的显示Bug
- 解决SwanLab在jupyter Notebook中的登录问题

...

## v0.3.1 - 2024.5.3

Expand Down
3 changes: 1 addition & 2 deletions zh/guide_cloud/general/organization.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

:::warning 内测提醒

内测期间,组织的创建需要填写内测码。<br>
公测后将不再有此限制。
组织的创建需要填写问卷,申请内测邀请码。<br>

:::

Expand Down
46 changes: 43 additions & 3 deletions zh/guide_cloud/integration/integration-tensorboard.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,52 @@

## 方式一:命令行转换

```bash
swanlab convert [TFEVENT_LOGDIR] -t tensorboard
```
swanlab convert [TFEVENT_LOGDIR]

这里的`[TFEVENT_LOGDIR]`是指你先前用Tensorboard记录实验时,生成的日志文件路径。

SwanLab Converter将会自动检测文件路径及其子目录下的`tfevent`文件(默认子目录深度为3),并为每个`tfevent`文件生成一个SwanLab实验。

## 方式二:代码内转换

```python
from swanlab.converter import TFBConverter

tfb_converter = TFBConverter(convert_dir="[TFEVENT_LOGDIR]")
tfb_converter.run()
```

## 方式二:代码内
效果与命令行转换一致。

## 参数列表

| 参数 | 对应CLI参数 | 描述 |
| ---- | ---------- | --------------------- |
| convert_dir | - | Tfevent文件路径 |
| project | -p, --project | SwanLab项目名 |
| workspace | -w, --workspace | SwanLab工作空间名 |
| config || SwanLab超参数配置 |
| cloud | --cloud | 是否使用云端版,默认为True |
| logdir | -l, --logdir | SwanLab日志文件保存路径 |

例子:

```python
from swanlab.convert import TFBConverter
from swanlab.converter import TFBConverter

tfb_converter = TFBConverter(
convert_dir="./runs",
project="Tensorboard-Converter",
workspace="SwanLab",
cloud=False,
logdir="./logs",
)
tfb_converter.run()
```

与之作用相同的CLI:
```bash
swanlab convert ./runs -t tensorboard -p Tensorboard-Converter -w SwanLab --cloud false -l ./logs
```

0 comments on commit 054a54c

Please sign in to comment.