tensorboard
image
本文字数:1.5k 字 | 阅读时长 ≈ 7 min

tensorboard

image
本文字数:1.5k 字 | 阅读时长 ≈ 7 min

tensorboard 模块,这里是在 pytorch 上使用tensorboard的帮助文档,本文依据文档进行改编,官方 github 地址:https://github.com/lanpa/tensorboardX

tensorboard 可视化一共分为三步

1. tensorboard 安装

安装两个 package,一个是 tensorboardX,另一个是 tensorboard(启动网页)

pip install tensorboardXpip install tensorboard

2. 创建一个 writer 实例

首先创建一个 writer 实例,默认文件在 runs 目录下

from tensorboardX import SummaryWriter 

writer = SummaryWriter('runs/exp-1')  # save in 'runs/exp-1'
writer2 = SummaryWriter() # save in 'runs/Aug20-17-20-33' 
writer3 = SummaryWriter(comment='3x learning rate')  # append # save in 'runs/Aug20-17-20-33-3xlearning rate'

3. 添加数据形式

3.1 记录标量

add_scalar(tag, scalar_value, global_step, walltime)

from tensorboardX import SummaryWriter 
import time
writer = SummaryWriter("runs/scalar") 
x = range(100) 
for i in x:     
    time.sleep(0.1)
    writer.add_scalar('y=2x', i * 2, i, walltime=time.time()) 
writer.close()

运行后进入到 runs 的上级目录,输入 tensorboard --logdir=runs/scalar,进入到网址http://localhost:6006/即可看到可视化结果

add_scalars(main_tag, scalar_dict, global_step, walltime)

from tensorboardX import 
import numpy as np
SummaryWriter writer = SummaryWriter() 
r = 5 
for i in range(100):     
    writer.add_scalars('run_14h', {'xsinx':i*np.sin(i/r),                                                                                        
                                   'xcosx':i*np.cos(i/r),
                                   'tanx': np.tan(i/r)}, i) 
writer.close() 
# This call adds three values to the same scalar plot with the tag 
# 'run_14h' in TensorBoard's scalar section.

3.2 记录文本

add_text(tag, text_string, global_step, walltime)

from tensorboardX import SummaryWriter
import time

writer = SummaryWriter("runs/text")
x = range(20)
for i in x:
# 最多添加十个epoch
   writer.add_text('text', "This is epoch {}".format(i), i)
   time.sleep(0.1)
writer.close()

3.3 记录图片

add_image(tag, img_tensor, global_step, walltime, dataformats)

img_tensor: Default is (3,H,W)(3,H,W). You can use torchvision.utils.make_grid() to convert a batch of tensor into 3xHxW format or use add_images() and let us do the job. Tensor with (1,H,W)(1,H,W), (H,W)(H,W), (H,W,3)(H,W,3) is also suitible as long as corresponding dataformats argument is passed. e.g. CHW, HWC, HW.

from tensorboardX import SummaryWriter
import numpy as np
img = np.zeros((3, 100, 100))
img[0] = np.arange(0, 10000).reshape(100, 100) / 10000
img[1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000

img_HWC = np.zeros((100, 100, 3))
img_HWC[:, :, 0] = np.arange(0, 10000).reshape(100, 100) / 10000
img_HWC[:, :, 1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000

writer = SummaryWriter("runs/image")
writer.add_image('my_image', img, 0)

# If you have non-default dimension setting, set the dataformats argument.
writer.add_image('my_image_HWC', img_HWC, 0, dataformats='HWC')
writer.close()

add_images(tag: str, img_tensor,global_step, walltime, dataformats: Optional[str] = ‘NCHW’)

img_tensor: Default is (N,3,H,W)(N,3,H,W). If dataformats is specified, other shape will be accepted. e.g. NCHW or NHWC.

from tensorboardX import SummaryWriter 
import numpy as np 

img_batch = np.zeros((16, 3, 100, 100)) 
for i in range(16):     
img_batch[i, 0] = np.arange(0, 10000).reshape(100, 100) / 10000 / 16 * i     img_batch[i, 1] = (1 - np.arange(0, 10000).reshape(100, 100) / 10000) / 16 * i 
writer = SummaryWriter("runs/images") 
writer.add_images('my_image_batch', img_batch, 0) 
writer.close()

3.3 添加计算图

add_graph(model, input_to_model=None, verbose=False)

import torch
import torch.nn as nn
import torch.nn.functional as F
from tensorboardX import SummaryWriter


class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        self.bn = nn.BatchNorm2d(20)

    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), 2)
        x = F.relu(x) + F.relu(-x)
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = self.bn(x)
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        x = F.softmax(x, dim=1)
        return x


dummy_input = torch.rand(13, 1, 28, 28)

model = Net1()
# 使用with语句,可以不调用w.close
# with SummaryWriter(comment='Net1') as w:
with SummaryWriter("runs/graph") as w:
    # 第一个参数为需要保存的模型
    # 第二个参数为输入值->元祖类型
    w.add_graph(model, (dummy_input), verbose=True)

4. 运行 tensorboard

当我们在程序中添加了程序日志 events 之后,就可以使用 tensorboard --logdir=<your_log_dir> 运行日志进行可视化了,例如:tensorboard/runs 下有两个文件夹 linear 和 graph,进入到 tensorboard 文件夹中,输入 tensorboard --logdir=runs 即可运行两个文件夹,如果指向运行其中一个,输入 tensorboard --logdir=runs/linear 即可

5. tensorboard 常用例子

import torch
from tensorboardX import SummaryWriter

class TensorboardLogger(object):
    def __init__(self, log_dir):
        self.writer = SummaryWriter(logdir=log_dir)
        self.step = 0

    def set_step(self, step=None):
        if step is not None:
            self.step = step
        else:
            self.step += 1

    def update(self, head='scalar', step=None, **kwargs):
        for k, v in kwargs.items():
            if v is None:
                continue
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.writer.add_scalar(head + "/" + k, v, self.step if step is None else step)

    def flush(self):
        # 具体来说,TensorBoard 的日志记录文件并不是每次调用 add_scalar 方法时就立即写入磁盘
        # 相反,它会将数据暂时存储在内存的日志缓冲区中。为了确保这些数据最终写入磁盘文件,需要调用 flush() 方法。
        self.writer.flush()


if __name__ == "__main__":
    logger = TensorboardLogger(log_dir='./logs')

    for epoch in range(1, 6):
        loss = torch.tensor(1.0 / epoch)
        accuracy = torch.tensor(80.0 + epoch * 3.0)
        logger.set_step(epoch)
        logger.update(head='training', loss=loss, accuracy=accuracy)
        print(f"Epoch {epoch}: Loss={loss.item()}, Accuracy={accuracy.item()}")
    logger.flush()