数据集和数据加载器

数据集

  • 提供了一种方式获取数据及其label
    以官方文档中的数据集为例
    1
    2
    3
    4
    5
    6
    7
    dataset
    ├── train // 训练集
    │   ├── ants // label目录,有对应图片
    │   └── bees
    └── val // 测试集
    ├── ants
    └── bees
  • 为网络提供不同的数据形式,例如batch
  • 使用dataclass封装数据集
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    # 导入模块及获取help
    from torch.utils.data import Dataset
    Dataset??
    import torch
    import cv2

    class Mydata(Dataset):
    def __init__(self, root_dir, label_dir):
    self.root_dir = root_dir
    self.label_dir = label_dir
    self.path = os.path.join(self.root_dir, self.label_dir)
    self.img_path = os.listdir(self.path)
    def __getitem__(self, idx):
    img_name = self.img_path[idx]
    img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
    img = cv2.imread(img_item_path, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    label = self.label_dir
    return img, label
    def __len__(self):
    return len(self.img_path)
    ants_dataset = Mydata("dataset/train/", "ants")
    bees_dataset = Mydata("dataset/train/", "bees")
    train_data = ants_dataset + bees_dataset
    使用torchvision提供的数据集
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    # 数据集tranform操作
    dataset_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    ])
    # 会自动下载
    train_set = torchvision.datasets.CIFAR10(root="./CIFAR10", train=True, transform=dataset_transform, download=True)
    test_set = torchvision.datasets.CIFAR10(root="./CIFAR10", train=False, transform=dataset_transform, download=True)
    # 输出包含一个图片和数字的元组
    print(test_set[0])
    # 打印数字对应的类别
    print(test_set.classes)
    数据加载器的使用
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import torchvision
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    dataset_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    ])
    test_set = torchvision.datasets.CIFAR10(root="./CIFAR10", train=False, transform=dataset_transform, download=False)
    # batch_size 将数据集分块,每块有多少个图像
    # shuffle 是否打乱数据 True 会使每轮迭代batch不同
    # num_workers 并行数
    # drop_last 当数据整数不能被batch_size整除时,是否丢弃最后一个小于batch_size的batch
    test_loader = DataLoader(dataset=test_set, batch_size=4, shuffle=True, num_workers=0, drop_last=False)

    writer = SummaryWriter("logs")
    # 模拟进行两轮的迭代
    for epoch in range(2):
    step=0
    for data in test_loader:
    imgs, targets = data
    writer.add_images(f"Epoch {epoch}", imgs, step)
    step+=1
    writer.close()

    Tensorboard的使用

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    from torch.utils.tensorboard import SummaryWriter
    import numpy as np
    import cv2
    # 设置log输出目录
    writer = SummaryWriter("logs")
    img = cv2.imread(r"moddataset/train/bees_image/16838648_415acd9e3f.jpg")
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    print(type(img))
    print(img.shape)
    # 绘制图像
    writer.add_image("test", img, 2, dataformats="HWC")
    # 绘制y=x的图像
    for i in range(100):
    writer.add_scalar("y=x", i, i)
    writer.close()
    生成后打开tensorboard webserver
    1
    tensorboard --logdir=logs --port=6006
    打开后每次运行程序,回到浏览器刷新,结果就会有变化。

    torchvision中的transform

    使用transform将PIL.Image对象或numpy.ndarray对象转为tensor对象
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    from torchvision import transforms
    from PIL import Image

    img_pil = Image.open("moddataset/train/ants_image/0013035.jpg")

    totensor = transforms.ToTensor()
    img_tensor = totensor(img_pil)

    print(type(img_tensor))
    print(img_tensor.shape)
    使用transform将图像进行标准化处理
    标准化计算公式其中为均值,为标准差。
    标准化可过滤图像中不必要的信息,如亮度。从而加快训练。
    1
    2
    3
    # 设置均值和标准差
    normalize = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    img_normalize = normalize(img_tensor)
    使用tranform进行大小变换
    1
    2
    3
    4
    5
    6
    resize = transforms.Resize((500, 500))
    # PIL -> PIL
    # 现在可以直接传入tensor,这里作为Compose的例子
    img_resize = resize(img_pil)
    # PIL -> tensor
    img_resize = totensor(img_resize)
    使用transform.Compose合并以上两步转换操作
    1
    2
    3
    4
    5
    resize_totensor = transforms.Compose([
    transforms.Resize((500, 500)),
    transforms.ToTensor()
    ])
    img_resize_tensor = resize_totensor(img_tensor)
    transforms源代码中有较为详细的文档,不再进行记录

    神经网络的基本骨架

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    from torch import nn
    import torch

    # nn.Module是模型的基类,所有模型都继承它
    class Model(nn.Module):
    def __init__(self):
    super().__init__()
    # 必须重写forward函数
    def forward(self, input):
    output = input + 1
    return output
    m = Model()
    x = torch.tensor(1.0)
    output = m(x)
    print(output)

    卷积操作

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    import torch.nn.functional as F
    import torch

    input = torch.tensor([[1, 2, 0, 3, 1],
    [0, 1, 2, 3, 1],
    [1, 2, 1, 0, 0],
    [5, 2, 3, 1, 1],
    [2, 1, 0, 1, 1]])
    kernel = torch.tensor([[1, 2, 1],
    [0,1, 0],
    [2, 1, 0]])
    # 对输入tensor进行reshap (minibatch, channels, H, W)
    input = torch.reshape(input, (1, 1, 5, 5))
    kernel = torch.reshape(kernel, (1, 1, 3, 3))
    print(input.shape)
    print(kernel.shape)
    # 指定步长进行卷积
    output = F.conv2d(input, kernel, stride=1)
    print(output)
    # 进行全零的边界填充,长度为1
    output1 = F.conv2d(input, kernel, stride=1, padding=1)
    print(output1)

    卷积层

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    import torch
    import torchvision
    from torch import nn
    from torch.nn import Conv2d
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=False, transform=torchvision.transforms.ToTensor(), download=False)

    dataloader = DataLoader(dataset, batch_size=64)
    class Model(nn.Module):
    def __init__(self):
    super().__init__()
    # 给模型添加卷积层
    self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, x):
    x = self.conv1(x)
    return x

    m = Model()
    writer = SummaryWriter("./logs")
    step = 0
    for data in dataloader:
    imgs, targets = data
    output = m(imgs)
    print(output.shape)
    writer.add_images("input", imgs, step)
    # 对输出reshape才能放进writer
    output = torch.reshape(output, (-1, 3, 30, 30))
    # 只能添加channel数为1的灰色图,和channel数为3的RGC彩色图
    # 卷积输出通道为6,所以要reshape
    writer.add_images("output", output, step)
    step+=1

    writer.close()

    池化层

    添加池化层
    池化层可以保留数据特征,并给tensor降维,减小数据量,提高训练速度。
    1
    2
    3
    4
    5
    # 池化层默认的步长等于池化核的大小,ceil表示向上取整,设置为True
    # 会保留小于kernel_size的矩阵
    # 池化要求数据类型为浮点类型,可为tensor指定dtype=torch.float32
    # 池化层不改变图像通道数
    self.maxpool1 = nn.MaxPool2d(kernel_size=3, ceil_mode=True)

    非线性激活函数

    ReLU函数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    class Model(nn.Module):
    def __init__(self):
    super().__init__()
    # inplace 指定是否替换原tensor, 默认不替换
    self.relu = nn.ReLU(inplace=False)

    def forward(self, x):
    output = self.relu(x)
    return output
    其他激活函数与ReLU函数的用法基本相同,以下待补充其他函数的解析式子和作用。

    线性层

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # 指定输入特征和输出特征
    self.linear1 = nn.Linear(196608, 10)

    m = Model()
    for data in dataloader:
    imgs, targets = data
    print(imgs.shape)
    # output = torch.reshape(imgs, (1, 1, 1, -1))
    # 对图像进行展平
    output = torch.flatten(imgs)
    print(output.shape)
    output = m(output)
    print(output.shape)

    Seqential

    类似tranforms.Compose,对多个层进行合并。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    import torch
    from torch.utils.tensorboard import SummaryWriter
    from torch import nn


    class Model(nn.Module):
    def __init__(self):
    super().__init__()
    self.model1 = nn.Sequential(
    nn.Conv2d(3, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 64, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(1024, 64),
    nn.Linear(64, 10),
    )

    def forward(self, x):
    out = self.model1(x)
    return out


    m = Model()
    print(m)
    input1 = torch.ones((64, 3, 32, 32))
    output = m(input1)
    print(output.shape)

    writer = SummaryWriter("./logs/")
    writer.add_graph(m, input1)
    writer.close()

    常用损失函数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    import torch
    from torch import nn
    inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
    targets = torch.tensor([1, 2, 5], dtype=torch.float32)

    inputs = torch.reshape(inputs, (1, 1, 1, 3))
    targets = torch.reshape(targets, (1, 1, 1, 3))

    loss = nn.L1Loss(reduction='sum')
    result = loss(inputs, targets)
    print(result)

    loss_mse = nn.MSELoss()
    result_mse = loss_mse(inputs, targets)

    x = torch.tensor([0.1, 0.2, 0.3])
    y = torch.tensor([1])
    x = torch.reshape(x, (1, 3))

    loss_cross = nn.CrossEntropyLoss()
    result_cross = loss_cross(x, y)
    print(result_cross)

    反向传播与优化器

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    import torch
    from torch.utils.tensorboard import SummaryWriter
    from torch.utils.data import DataLoader
    from torch import nn
    import torchvision
    dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=False, transform=torchvision.transforms.ToTensor(), download=False)
    dataloader = DataLoader(dataset, batch_size=64)


    class Model(nn.Module):
    def __init__(self):
    super().__init__()
    self.model1 = nn.Sequential(
    nn.Conv2d(3, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 64, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(1024, 64),
    nn.Linear(64, 10),
    )

    def forward(self, x):
    out=self.model1(x)
    return out


    m = Model()
    # 创建损失函数
    loss = nn.CrossEntropyLoss()
    # 添加优化器
    optim = torch.optim.SGD(m.parameters(), lr=0.01)
    for epoch in range(20):
    running_loss = 0.0
    for imgs, targets in dataloader:
    output = m(imgs)
    result_loss = loss(output, targets)
    # 清除上个图片计算的梯度
    optim.zero_grad()
    # 反向传播计算梯度
    result_loss.backward()
    # 优化器 参数优化
    optim.step()
    running_loss = running_loss + result_loss
    print(f"Epoch: {epoch+1}, Loss: {running_loss}")

    现有的网络模型使用及修改

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import torchvision
    from torch import nn

    vgg16_true = torchvision.models.vgg16()
    # 末尾添加模块
    vgg16_true.add_module("add linear", nn.Linear(1000, 10))
    # 在某个tag中添加模块
    vgg16_true.classifier.add_module("add linear in classifier", nn.Linear(1000, 10))
    # 修改模块
    vgg16_true.classifier[6] = nn.Linear(4096, 10)
    print(vgg16_true)

    保存和加载模型

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    vgg16_true = torchvision.models.vgg16()

    # 模型的结构和参数全部保存
    torch.save(vgg16_true, "vgg16_method1.pth")
    # 读出的是model类
    # 注意:当我们保存和导入自己的模型时,需要将原模型类导入到读取的python程序中
    # 可以复制源码,也可import
    model = torch.load("vgg16_method1.pth")

    # 只保存参数,不保存结构
    torch.save(vgg16_true.state_dict(), "vgg16_method2.pth")
    # 读出的是字典
    model = torch.load("vgg16_method2.pth")
    # 模型加参数
    vgg16 = torchvision.models.vgg16()
    vgg16.load_state_dict(torch.load("vgg16_method2.pth"))

    完整模型训练流程

    model.py
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    import torch
    from torch import nn


    class Model(nn.Module):
    def __init__(self):
    super().__init__()
    self.model1 = nn.Sequential(
    nn.Conv2d(3, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 64, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(1024, 64),
    nn.Linear(64, 10),
    )

    def forward(self, x):
    out = self.model1(x)
    return out
    # 在这里对模型进行基本测试
    if __name__ == '__main__':
    m = Model()
    inputs = torch.ones((64, 3, 32, 32))
    output = m(inputs)
    print(output.shape)
    train_test.py
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    import torch.optim
    import torchvision
    from torch import nn
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    # 模型导入
    from model import Model

    # 准备数据集和数据加载器,并设置bath_size为64
    train_dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=True, transform=torchvision.transforms.ToTensor(), download=False)
    test_dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=False, transform=torchvision.transforms.ToTensor(), download=False)
    train_dataloader = DataLoader(train_dataset, batch_size=64)
    test_dataloader = DataLoader(test_dataset, batch_size=64)

    # 模型实例化
    m = Model()
    # 交叉熵损失函数 10分类
    loss_fn = nn.CrossEntropyLoss()
    # 学习率 标准写法
    learning_rate = 1e-2
    # 随机梯度下降优化器
    optimizer = torch.optim.SGD(m.parameters(), lr=learning_rate)
    # 统计训练步数和测试步数 总数
    total_train_step = 0
    total_test_step = 0
    # 10轮训练
    epoch = 10

    writer = SummaryWriter("./logs")

    for i in range(epoch):
    # 当模型含有DrapLast BatchNorm层时必须写此行
    # 自己的模型可以省略,最好是写上
    m.train()
    for imgs, targets in train_dataloader:
    output = m(imgs)
    loss = loss_fn(output, targets)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    total_train_step += 1
    if total_train_step % 100 == 0:
    print(f"Epoch {total_train_step}, Loss: {loss.item()}")
    writer.add_scalar("train_loss", loss.item(), total_train_step)

    total_test_loss = 0
    total_accuracy = 0
    # 当模型含有DrapLast BatchNorm层时必须写此行
    # 自己的模型可以省略,最好是写上
    m.eval()
    # 清空梯度,测试不需要
    with torch.no_grad():
    for imgs, targets in test_dataloader:
    output = m(imgs)
    loss = loss_fn(output, targets)
    total_test_loss += loss
    # 按第一轴取最大,求正确预测总数
    accuracy = (output.argmax(1) == targets).sum()
    total_accuracy += accuracy

    print(f"Total test Loss: {total_test_loss}")
    # 求准确率
    print(f"Total test acc: {total_accuracy/len(test_dataset)}")
    writer.add_scalar("test_acuracy", total_accuracy/len(test_dataset), total_test_step)
    writer.add_scalar("test_loss", total_test_loss, total_test_step )
    total_test_step += 1

    torch.save(m, f"torch_{i}.pth")

    writer.close()

    使用GPU训练

    方式1:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    # 对模型 数据 损失函数调用.cuda()
    m = Model()
    m = m.cuda()

    loss_fn = nn.CrossEntropyLoss()
    loss_fn = loss_fn.cuda()

    imgs = imgs.cuda()
    targets = targets.cuda()
    方式2:
    1
    2
    3
    4
    5
    6
    7
    # 对模型 数据 损失函数调用.to(device)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    loss_fn = loss_fn.to(device)
    imgs = imgs.to(device)
    targets = targets.to(device)

    验证模型

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    from PIL import Image
    from torchvision import transforms
    from model import Model
    import torch
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # 图片预处理
    img = Image.open("img.png")
    img = img.convert("RGB")

    transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor()
    ])

    img = transform(img)
    # 模型为gpu训练,图片也放到gpu上
    img = img.to(device)
    # reshape添加一维batchsize
    img = torch.reshape(img, (1, 3, 32, 32))
    # 如果在仅有cpu的设备上使用gpu的预训练模型,需指定map_location参数
    # m = torch.load("torch_9.pth", map_location=torch.device('cpu'))
    m = torch.load("torch_9.pth")
    # 模型为gpu训练,模型也放到gpu上
    m = m.to(device)
    m.eval()
    output = m(img)
    print(output.argmax(1))