From 054a54cf1b53fd5a29152e5c7b2bb840967508fa Mon Sep 17 00:00:00 2001 From: ZeYi Lin <944270057@qq.com> Date: Sun, 19 May 2024 15:05:17 +0800 Subject: [PATCH] update: v0.3.2 --- .vitepress/config.mts | 2 +- zh/api/py-Audio.md | 10 +- zh/examples/mnist.md | 145 +++++++++++------- zh/guide_cloud/community/paper-cite.md | 1 + .../experiment_track/log-experiment-metric.md | 8 +- zh/guide_cloud/general/changelog.md | 19 ++- zh/guide_cloud/general/organization.md | 3 +- .../integration/integration-tensorboard.md | 46 +++++- 8 files changed, 166 insertions(+), 68 deletions(-) diff --git a/.vitepress/config.mts b/.vitepress/config.mts index 81c10aa..4f75a91 100644 --- a/.vitepress/config.mts +++ b/.vitepress/config.mts @@ -40,7 +40,7 @@ export default defineConfig({ link: base_path_api + '/api-index', activeMatch: '/zh/api/', }, - { text: 'v0.3.0', items: [ + { text: 'v0.3.2', items: [ { text: '更新日志', link: base_path_guide_cloud + '/general/changelog' }, { text: '参与贡献', link: 'https://github.com/SwanHubX/SwanLab/blob/main/CONTRIBUTING.md' }, { text: '建议反馈', link: 'https://geektechstudio.feishu.cn/share/base/form/shrcnyBlK8OMD0eweoFcc2SvWKc'} diff --git a/zh/api/py-Audio.md b/zh/api/py-Audio.md index 5147d40..21a9ce8 100644 --- a/zh/api/py-Audio.md +++ b/zh/api/py-Audio.md @@ -5,7 +5,7 @@ ```python Audio( data_or_path: Union[str, np.ndarray, List["Audio"]], - sample_rate: int = None, + sample_rate: int = 44100, caption: str = None, ) -> None ``` @@ -13,7 +13,7 @@ Audio( | 参数 | 描述 | |-------------|--------------------------------------------------------------------------------------------------------| | data_or_path | (Union[str, np.ndarray, List["Audio"]]) 接收音频文件路径、numpy数组。Audio类将判断接收的数据类型做相应的转换。 | -| sample_rate | (int) 音频的采样率。当传入音频数据的原始numpy数组时必须传入该参数。 | +| sample_rate | (int) 音频的采样率,默认为44100。 | | caption | (str) 音频的标签。用于在实验看板中展示音频时进行标记。 | ## 介绍 @@ -35,7 +35,7 @@ run = swanlab.init() # 创建一个numpy array类型的音频 white_noise = np.random.randn(2, 100000) # 传入swanlab.Audio,设置采样率 -audio = swanlab.Audio(white_noise, sample_rate=44100, caption="white_noise") +audio = swanlab.Audio(white_noise, caption="white_noise") run.log({"examples": audio}) ``` @@ -51,8 +51,8 @@ run = swanlab.init() # 创建一个列表 examples = [] for i in range(3): - white_noise = np.random.randn(2, 100000) - audio = swanlab.Audio(white_noise, sample_rate=44100, caption="audio_{i}") + white_noise = np.random.randn(100000) + audio = swanlab.Audio(white_noise, caption="audio_{i}") # 列表中添加swanlab.Audio类型对象 examples.append(audio) diff --git a/zh/examples/mnist.md b/zh/examples/mnist.md index 790fb00..b636581 100644 --- a/zh/examples/mnist.md +++ b/zh/examples/mnist.md @@ -4,6 +4,8 @@ 图像分类、机器学习入门、灰度图像 ::: +[在线实验Demo](https://swanlab.cn/@ZeyiLin/MNIST-example/runs/4plp6w0qehoqpt0uq2tcy/chart) + ## 概述 MNIST手写体识别是深度学习最经典的入门任务之一,由 LeCun 等人提出。 @@ -37,8 +39,10 @@ import os import torch from torch import nn, optim, utils import torch.nn.functional as F +import torchvision from torchvision.datasets import MNIST from torchvision.transforms import ToTensor +from torchvision.models import ResNet18_Weights import swanlab # CNN网络构建 @@ -82,78 +86,113 @@ def log_images(loader, num_images=16): if images_logged >= num_images: break swanlab.log({"MNIST-Preview": logged_images}) - + + +def train(model, device, train_dataloader, optimizer, criterion, epoch, num_epochs): + model.train() + # 1. 循环调用train_dataloader,每次取出1个batch_size的图像和标签 + for iter, (inputs, labels) in enumerate(train_dataloader): + inputs, labels = inputs.to(device), labels.to(device) + optimizer.zero_grad() + # 2. 传入到resnet18模型中得到预测结果 + outputs = model(inputs) + # 3. 将结果和标签传入损失函数中计算交叉熵损失 + loss = criterion(outputs, labels) + # 4. 根据损失计算反向传播 + loss.backward() + # 5. 优化器执行模型参数更新 + optimizer.step() + print('Epoch [{}/{}], Iteration [{}/{}], Loss: {:.4f}'.format(epoch, num_epochs, iter + 1, len(train_dataloader), + loss.item())) + # 6. 每20次迭代,用SwanLab记录一下loss的变化 + if iter % 20 == 0: + swanlab.log({"train/loss": loss.item()}) + +def test(model, device, val_dataloader, epoch): + model.eval() + correct = 0 + total = 0 + with torch.no_grad(): + # 1. 循环调用val_dataloader,每次取出1个batch_size的图像和标签 + for inputs, labels in val_dataloader: + inputs, labels = inputs.to(device), labels.to(device) + # 2. 传入到resnet18模型中得到预测结果 + outputs = model(inputs) + # 3. 获得预测的数字 + _, predicted = torch.max(outputs, 1) + total += labels.size(0) + # 4. 计算与标签一致的预测结果的数量 + correct += (predicted == labels).sum().item() + + # 5. 得到最终的测试准确率 + accuracy = correct / total + # 6. 用SwanLab记录一下准确率的变化 + swanlab.log({"val/accuracy": accuracy}, step=epoch) + if __name__ == "__main__": + #检测是否支持mps + try: + use_mps = torch.backends.mps.is_available() + except AttributeError: + use_mps = False + + #检测是否支持cuda + if torch.cuda.is_available(): + device = "cuda" + elif use_mps: + device = "mps" + else: + device = "cpu" + # 初始化swanlab run = swanlab.init( project="MNIST-example", - experiment_name="ConvNet", - description="Train ConvNet on MNIST dataset.", + experiment_name="PlainCNN", config={ - "model": "CNN", + "model": "ResNet18", "optim": "Adam", - "lr": 0.001, - "batch_size": 512, + "lr": 1e-4, + "batch_size": 256, "num_epochs": 10, - "train_dataset_num": 55000, - "val_dataset_num": 5000, + "device": device, }, ) - # 设置训练机、验证集和测试集 + # 设置MNIST训练集和验证集 dataset = MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()) - train_dataset, val_dataset = utils.data.random_split( - dataset, [run.config.train_dataset_num, run.config.val_dataset_num] - ) + train_dataset, val_dataset = utils.data.random_split(dataset, [55000, 5000]) - train_loader = utils.data.DataLoader(train_dataset, batch_size=run.config.batch_size, shuffle=True) - val_loader = utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False) + train_dataloader = utils.data.DataLoader(train_dataset, batch_size=run.config.batch_size, shuffle=True) + val_dataloader = utils.data.DataLoader(val_dataset, batch_size=8, shuffle=False) + + # (可选)看一下数据集的前16张图像 + log_images(train_dataloader, 16) - # 初始化模型、损失函数和优化器 + # 初始化模型 model = ConvNet() + model.to(torch.device(device)) + + # 打印模型 + print(model) + + # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=run.config.lr) - # (可选)看一下数据集的前16张图像 - log_images(train_loader, 16) - - # 开始训练 - for epoch in range(1, run.config.num_epochs): - swanlab.log({"train/epoch": epoch}) - # 训练循环 - for iter, batch in enumerate(train_loader): - x, y = batch - optimizer.zero_grad() - output = model(x) - loss = criterion(output, y) - loss.backward() - optimizer.step() - - print( - f"Epoch [{epoch}/{run.config.num_epochs}], Iteration [{iter + 1}/{len(train_loader)}], Loss: {loss.item()}" - ) - - if iter % 20 == 0: - swanlab.log({"train/loss": loss.item()}, step=(epoch - 1) * len(train_loader) + iter) - - # 每4个epoch验证一次 - if epoch % 2 == 0: - model.eval() - correct = 0 - total = 0 - with torch.no_grad(): - for batch in val_loader: - x, y = batch - output = model(x) - _, predicted = torch.max(output, 1) - total += y.size(0) - correct += (predicted == y).sum().item() - - accuracy = correct / total - swanlab.log({"val/accuracy": accuracy}) - + # 开始训练和测试循环 + for epoch in range(1, run.config.num_epochs+1): + swanlab.log({"train/epoch": epoch}, step=epoch) + train(model, device, train_dataloader, optimizer, criterion, epoch, run.config.num_epochs) + if epoch % 2 == 0: + test(model, device, val_dataloader, epoch) + + # 保存模型 + # 如果不存在checkpoint文件夹,则自动创建一个 + if not os.path.exists("checkpoint"): + os.makedirs("checkpoint") + torch.save(model.state_dict(), 'checkpoint/latest_checkpoint.pth') ``` ## 效果演示 diff --git a/zh/guide_cloud/community/paper-cite.md b/zh/guide_cloud/community/paper-cite.md index 647717b..422daee 100644 --- a/zh/guide_cloud/community/paper-cite.md +++ b/zh/guide_cloud/community/paper-cite.md @@ -5,6 +5,7 @@ ```bibtex @software{Zeyilin_SwanLab_2023, author = {Zeyi Lin, Shaohong Chen, Kang Li, Qiushan Jiang, Zirui Cai, Kaifang Ji and {The SwanLab team}}, + doi = {10.5281/zenodo.11100550}, license = {Apache-2.0}, title = {{SwanLab}}, url = {https://github.com/swanhubx/swanlab}, diff --git a/zh/guide_cloud/experiment_track/log-experiment-metric.md b/zh/guide_cloud/experiment_track/log-experiment-metric.md index 7133f9d..71221e0 100644 --- a/zh/guide_cloud/experiment_track/log-experiment-metric.md +++ b/zh/guide_cloud/experiment_track/log-experiment-metric.md @@ -19,20 +19,24 @@ for epoch in range(num_epochs): 在 `swanlab.log` 记录时,会根据指标名,将`{指标名: 指标}`字典汇总到一个统一位置存储。 +⚠️需要注意的是,`swanlab.log({key: value})`中的value必须是`int` / `float` / `BaseType`这三种类型(如果传入的是`str`类型,会先尝试转为`float`,如果转换失败就会报错),其中`BaseType`类型主要是多媒体数据,详情请看[记录多媒体数据](/zh/guide_cloud/experiment_track/log-media.md)。 + 在每次记录时,会为该次记录赋予一个 `step`。在默认情况下,`step` 为0开始,并在你每一次在同一个指标名下记录时,`step` 等于该指标名历史记录的最大 `step` + 1,例如: ```python import swanlab swanlab.init() +... + swanlab.log({"loss": loss, "acc": acc}) # 此次记录中,loss的step为0, acc的step为0 swanlab.log({"loss": loss, "iter": iter}) -# 此次记录中,loss的step为1, acc的step为0, iter的step为0 +# 此次记录中,loss的step为1, iter的step为0, acc的step为0 swanlab.log({"loss": loss, "iter": iter}) -# 此次记录中,loss的step为2, acc的step为0, iter的step为1 +# 此次记录中,loss的step为2, iter的step为1, acc的step为0 ``` ## 指标分组 diff --git a/zh/guide_cloud/general/changelog.md b/zh/guide_cloud/general/changelog.md index 25300e9..b8f33f9 100644 --- a/zh/guide_cloud/general/changelog.md +++ b/zh/guide_cloud/general/changelog.md @@ -5,9 +5,24 @@ 升级到指定版本:`pip install swanlab==version`,如`pip install swanlab==0.3.1` ::: -## v0.3.2 - 2024.5.xx +## v0.3.2 - 2024.5.17 + +**👥集成** +- 与Tensorboard集成,支持将`Tensorboard`日志文件转换为`SwanLab`实验,[指引](/zh/guide_cloud/integration/integration-tensorboard.md) + +**🚀新增功能** +- 支持下载折线图为PNG图像 +- SwanLab实验可以被嵌入到在线文档中了(飞书/Notion等支持嵌入网页的在线文档) +- 表格视图支持导出CSV +- 表格视图支持仅看指标 + +**⚡️改进** +- 优化了折线图与表格视图的数值显示 + +**⚙️修复问题** +- 修复了在Windows系统下,`swanlab.config`载入`hydra`配置文件时,config表格的显示Bug +- 解决SwanLab在jupyter Notebook中的登录问题 -... ## v0.3.1 - 2024.5.3 diff --git a/zh/guide_cloud/general/organization.md b/zh/guide_cloud/general/organization.md index 2902263..6b85eca 100644 --- a/zh/guide_cloud/general/organization.md +++ b/zh/guide_cloud/general/organization.md @@ -2,8 +2,7 @@ :::warning 内测提醒 -内测期间,组织的创建需要填写内测码。
-公测后将不再有此限制。 +组织的创建需要填写问卷,申请内测邀请码。
::: diff --git a/zh/guide_cloud/integration/integration-tensorboard.md b/zh/guide_cloud/integration/integration-tensorboard.md index 16c12cf..d398d59 100644 --- a/zh/guide_cloud/integration/integration-tensorboard.md +++ b/zh/guide_cloud/integration/integration-tensorboard.md @@ -8,12 +8,52 @@ ## 方式一:命令行转换 +```bash +swanlab convert [TFEVENT_LOGDIR] -t tensorboard ``` -swanlab convert [TFEVENT_LOGDIR] + +这里的`[TFEVENT_LOGDIR]`是指你先前用Tensorboard记录实验时,生成的日志文件路径。 + +SwanLab Converter将会自动检测文件路径及其子目录下的`tfevent`文件(默认子目录深度为3),并为每个`tfevent`文件生成一个SwanLab实验。 + +## 方式二:代码内转换 + +```python +from swanlab.converter import TFBConverter + +tfb_converter = TFBConverter(convert_dir="[TFEVENT_LOGDIR]") +tfb_converter.run() ``` -## 方式二:代码内 +效果与命令行转换一致。 + +## 参数列表 + +| 参数 | 对应CLI参数 | 描述 | +| ---- | ---------- | --------------------- | +| convert_dir | - | Tfevent文件路径 | +| project | -p, --project | SwanLab项目名 | +| workspace | -w, --workspace | SwanLab工作空间名 | +| config | 无 | SwanLab超参数配置 | +| cloud | --cloud | 是否使用云端版,默认为True | +| logdir | -l, --logdir | SwanLab日志文件保存路径 | + +例子: ```python -from swanlab.convert import TFBConverter +from swanlab.converter import TFBConverter + +tfb_converter = TFBConverter( + convert_dir="./runs", + project="Tensorboard-Converter", + workspace="SwanLab", + cloud=False, + logdir="./logs", + ) +tfb_converter.run() +``` + +与之作用相同的CLI: +```bash +swanlab convert ./runs -t tensorboard -p Tensorboard-Converter -w SwanLab --cloud false -l ./logs ``` \ No newline at end of file