pytorch基本用法
数据集和数据加载器
数据集
- 提供了一种方式获取数据及其label
以官方文档中的数据集为例1
2
3
4
5
6
7dataset
├── train // 训练集
│ ├── ants // label目录,有对应图片
│ └── bees
└── val // 测试集
├── ants
└── bees - 为网络提供不同的数据形式,例如batch
- 使用dataclass封装数据集使用torchvision提供的数据集
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24# 导入模块及获取help
from torch.utils.data import Dataset
Dataset??
import torch
import cv2
class Mydata(Dataset):
def __init__(self, root_dir, label_dir):
self.root_dir = root_dir
self.label_dir = label_dir
self.path = os.path.join(self.root_dir, self.label_dir)
self.img_path = os.listdir(self.path)
def __getitem__(self, idx):
img_name = self.img_path[idx]
img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
img = cv2.imread(img_item_path, cv2.IMREAD_COLOR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
label = self.label_dir
return img, label
def __len__(self):
return len(self.img_path)
ants_dataset = Mydata("dataset/train/", "ants")
bees_dataset = Mydata("dataset/train/", "bees")
train_data = ants_dataset + bees_dataset数据加载器的使用1
2
3
4
5
6
7
8
9
10
11# 数据集tranform操作
dataset_transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
])
# 会自动下载
train_set = torchvision.datasets.CIFAR10(root="./CIFAR10", train=True, transform=dataset_transform, download=True)
test_set = torchvision.datasets.CIFAR10(root="./CIFAR10", train=False, transform=dataset_transform, download=True)
# 输出包含一个图片和数字的元组
print(test_set[0])
# 打印数字对应的类别
print(test_set.classes)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset_transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
])
test_set = torchvision.datasets.CIFAR10(root="./CIFAR10", train=False, transform=dataset_transform, download=False)
# batch_size 将数据集分块,每块有多少个图像
# shuffle 是否打乱数据 True 会使每轮迭代batch不同
# num_workers 并行数
# drop_last 当数据整数不能被batch_size整除时,是否丢弃最后一个小于batch_size的batch
test_loader = DataLoader(dataset=test_set, batch_size=4, shuffle=True, num_workers=0, drop_last=False)
writer = SummaryWriter("logs")
# 模拟进行两轮的迭代
for epoch in range(2):
step=0
for data in test_loader:
imgs, targets = data
writer.add_images(f"Epoch {epoch}", imgs, step)
step+=1
writer.close()Tensorboard的使用
生成后打开tensorboard webserver1
2
3
4
5
6
7
8
9
10
11
12
13
14
15from torch.utils.tensorboard import SummaryWriter
import numpy as np
import cv2
# 设置log输出目录
writer = SummaryWriter("logs")
img = cv2.imread(r"moddataset/train/bees_image/16838648_415acd9e3f.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
print(type(img))
print(img.shape)
# 绘制图像
writer.add_image("test", img, 2, dataformats="HWC")
# 绘制y=x的图像
for i in range(100):
writer.add_scalar("y=x", i, i)
writer.close()打开后每次运行程序,回到浏览器刷新,结果就会有变化。1
tensorboard --logdir=logs --port=6006
torchvision中的transform
使用transform将PIL.Image对象或numpy.ndarray对象转为tensor对象使用transform将图像进行标准化处理1
2
3
4
5
6
7
8
9
10from torchvision import transforms
from PIL import Image
img_pil = Image.open("moddataset/train/ants_image/0013035.jpg")
totensor = transforms.ToTensor()
img_tensor = totensor(img_pil)
print(type(img_tensor))
print(img_tensor.shape)
标准化计算公式其中为均值, 为标准差。
标准化可过滤图像中不必要的信息,如亮度。从而加快训练。使用tranform进行大小变换1
2
3# 设置均值和标准差
normalize = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
img_normalize = normalize(img_tensor)使用transform.Compose合并以上两步转换操作1
2
3
4
5
6resize = transforms.Resize((500, 500))
# PIL -> PIL
# 现在可以直接传入tensor,这里作为Compose的例子
img_resize = resize(img_pil)
# PIL -> tensor
img_resize = totensor(img_resize)transforms源代码中有较为详细的文档,不再进行记录1
2
3
4
5resize_totensor = transforms.Compose([
transforms.Resize((500, 500)),
transforms.ToTensor()
])
img_resize_tensor = resize_totensor(img_tensor)神经网络的基本骨架
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15from torch import nn
import torch
# nn.Module是模型的基类,所有模型都继承它
class Model(nn.Module):
def __init__(self):
super().__init__()
# 必须重写forward函数
def forward(self, input):
output = input + 1
return output
m = Model()
x = torch.tensor(1.0)
output = m(x)
print(output)卷积操作
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22import torch.nn.functional as F
import torch
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
[0,1, 0],
[2, 1, 0]])
# 对输入tensor进行reshap (minibatch, channels, H, W)
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
print(input.shape)
print(kernel.shape)
# 指定步长进行卷积
output = F.conv2d(input, kernel, stride=1)
print(output)
# 进行全零的边界填充,长度为1
output1 = F.conv2d(input, kernel, stride=1, padding=1)
print(output1)卷积层
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=False, transform=torchvision.transforms.ToTensor(), download=False)
dataloader = DataLoader(dataset, batch_size=64)
class Model(nn.Module):
def __init__(self):
super().__init__()
# 给模型添加卷积层
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
def forward(self, x):
x = self.conv1(x)
return x
m = Model()
writer = SummaryWriter("./logs")
step = 0
for data in dataloader:
imgs, targets = data
output = m(imgs)
print(output.shape)
writer.add_images("input", imgs, step)
# 对输出reshape才能放进writer
output = torch.reshape(output, (-1, 3, 30, 30))
# 只能添加channel数为1的灰色图,和channel数为3的RGC彩色图
# 卷积输出通道为6,所以要reshape
writer.add_images("output", output, step)
step+=1
writer.close()池化层
添加池化层
池化层可以保留数据特征,并给tensor降维,减小数据量,提高训练速度。1
2
3
4
5# 池化层默认的步长等于池化核的大小,ceil表示向上取整,设置为True
# 会保留小于kernel_size的矩阵
# 池化要求数据类型为浮点类型,可为tensor指定dtype=torch.float32
# 池化层不改变图像通道数
self.maxpool1 = nn.MaxPool2d(kernel_size=3, ceil_mode=True)非线性激活函数
ReLU函数其他激活函数与ReLU函数的用法基本相同,以下待补充其他函数的解析式子和作用。1
2
3
4
5
6
7
8
9class Model(nn.Module):
def __init__(self):
super().__init__()
# inplace 指定是否替换原tensor, 默认不替换
self.relu = nn.ReLU(inplace=False)
def forward(self, x):
output = self.relu(x)
return output线性层
1
2
3
4
5
6
7
8
9
10
11
12
13# 指定输入特征和输出特征
self.linear1 = nn.Linear(196608, 10)
m = Model()
for data in dataloader:
imgs, targets = data
print(imgs.shape)
# output = torch.reshape(imgs, (1, 1, 1, -1))
# 对图像进行展平
output = torch.flatten(imgs)
print(output.shape)
output = m(output)
print(output.shape)Seqential
类似tranforms.Compose,对多个层进行合并。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34import torch
from torch.utils.tensorboard import SummaryWriter
from torch import nn
class Model(nn.Module):
def __init__(self):
super().__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10),
)
def forward(self, x):
out = self.model1(x)
return out
m = Model()
print(m)
input1 = torch.ones((64, 3, 32, 32))
output = m(input1)
print(output.shape)
writer = SummaryWriter("./logs/")
writer.add_graph(m, input1)
writer.close()常用损失函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22import torch
from torch import nn
inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
targets = torch.tensor([1, 2, 5], dtype=torch.float32)
inputs = torch.reshape(inputs, (1, 1, 1, 3))
targets = torch.reshape(targets, (1, 1, 1, 3))
loss = nn.L1Loss(reduction='sum')
result = loss(inputs, targets)
print(result)
loss_mse = nn.MSELoss()
result_mse = loss_mse(inputs, targets)
x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])
x = torch.reshape(x, (1, 3))
loss_cross = nn.CrossEntropyLoss()
result_cross = loss_cross(x, y)
print(result_cross)反向传播与优化器
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47import torch
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from torch import nn
import torchvision
dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=False, transform=torchvision.transforms.ToTensor(), download=False)
dataloader = DataLoader(dataset, batch_size=64)
class Model(nn.Module):
def __init__(self):
super().__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10),
)
def forward(self, x):
out=self.model1(x)
return out
m = Model()
# 创建损失函数
loss = nn.CrossEntropyLoss()
# 添加优化器
optim = torch.optim.SGD(m.parameters(), lr=0.01)
for epoch in range(20):
running_loss = 0.0
for imgs, targets in dataloader:
output = m(imgs)
result_loss = loss(output, targets)
# 清除上个图片计算的梯度
optim.zero_grad()
# 反向传播计算梯度
result_loss.backward()
# 优化器 参数优化
optim.step()
running_loss = running_loss + result_loss
print(f"Epoch: {epoch+1}, Loss: {running_loss}")现有的网络模型使用及修改
1
2
3
4
5
6
7
8
9
10
11import torchvision
from torch import nn
vgg16_true = torchvision.models.vgg16()
# 末尾添加模块
vgg16_true.add_module("add linear", nn.Linear(1000, 10))
# 在某个tag中添加模块
vgg16_true.classifier.add_module("add linear in classifier", nn.Linear(1000, 10))
# 修改模块
vgg16_true.classifier[6] = nn.Linear(4096, 10)
print(vgg16_true)保存和加载模型
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16vgg16_true = torchvision.models.vgg16()
# 模型的结构和参数全部保存
torch.save(vgg16_true, "vgg16_method1.pth")
# 读出的是model类
# 注意:当我们保存和导入自己的模型时,需要将原模型类导入到读取的python程序中
# 可以复制源码,也可import
model = torch.load("vgg16_method1.pth")
# 只保存参数,不保存结构
torch.save(vgg16_true.state_dict(), "vgg16_method2.pth")
# 读出的是字典
model = torch.load("vgg16_method2.pth")
# 模型加参数
vgg16 = torchvision.models.vgg16()
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))完整模型训练流程
model.pytrain_test.py1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28import torch
from torch import nn
class Model(nn.Module):
def __init__(self):
super().__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10),
)
def forward(self, x):
out = self.model1(x)
return out
# 在这里对模型进行基本测试
if __name__ == '__main__':
m = Model()
inputs = torch.ones((64, 3, 32, 32))
output = m(inputs)
print(output.shape)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71import torch.optim
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
# 模型导入
from model import Model
# 准备数据集和数据加载器,并设置bath_size为64
train_dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=True, transform=torchvision.transforms.ToTensor(), download=False)
test_dataset = torchvision.datasets.CIFAR10("./CIFAR10", train=False, transform=torchvision.transforms.ToTensor(), download=False)
train_dataloader = DataLoader(train_dataset, batch_size=64)
test_dataloader = DataLoader(test_dataset, batch_size=64)
# 模型实例化
m = Model()
# 交叉熵损失函数 10分类
loss_fn = nn.CrossEntropyLoss()
# 学习率 标准写法
learning_rate = 1e-2
# 随机梯度下降优化器
optimizer = torch.optim.SGD(m.parameters(), lr=learning_rate)
# 统计训练步数和测试步数 总数
total_train_step = 0
total_test_step = 0
# 10轮训练
epoch = 10
writer = SummaryWriter("./logs")
for i in range(epoch):
# 当模型含有DrapLast BatchNorm层时必须写此行
# 自己的模型可以省略,最好是写上
m.train()
for imgs, targets in train_dataloader:
output = m(imgs)
loss = loss_fn(output, targets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step += 1
if total_train_step % 100 == 0:
print(f"Epoch {total_train_step}, Loss: {loss.item()}")
writer.add_scalar("train_loss", loss.item(), total_train_step)
total_test_loss = 0
total_accuracy = 0
# 当模型含有DrapLast BatchNorm层时必须写此行
# 自己的模型可以省略,最好是写上
m.eval()
# 清空梯度,测试不需要
with torch.no_grad():
for imgs, targets in test_dataloader:
output = m(imgs)
loss = loss_fn(output, targets)
total_test_loss += loss
# 按第一轴取最大,求正确预测总数
accuracy = (output.argmax(1) == targets).sum()
total_accuracy += accuracy
print(f"Total test Loss: {total_test_loss}")
# 求准确率
print(f"Total test acc: {total_accuracy/len(test_dataset)}")
writer.add_scalar("test_acuracy", total_accuracy/len(test_dataset), total_test_step)
writer.add_scalar("test_loss", total_test_loss, total_test_step )
total_test_step += 1
torch.save(m, f"torch_{i}.pth")
writer.close()使用GPU训练
方式1:方式2:1
2
3
4
5
6
7
8
9# 对模型 数据 损失函数调用.cuda()
m = Model()
m = m.cuda()
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.cuda()
imgs = imgs.cuda()
targets = targets.cuda()1
2
3
4
5
6
7# 对模型 数据 损失函数调用.to(device)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
loss_fn = loss_fn.to(device)
imgs = imgs.to(device)
targets = targets.to(device)验证模型
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27from PIL import Image
from torchvision import transforms
from model import Model
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 图片预处理
img = Image.open("img.png")
img = img.convert("RGB")
transform = transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor()
])
img = transform(img)
# 模型为gpu训练,图片也放到gpu上
img = img.to(device)
# reshape添加一维batchsize
img = torch.reshape(img, (1, 3, 32, 32))
# 如果在仅有cpu的设备上使用gpu的预训练模型,需指定map_location参数
# m = torch.load("torch_9.pth", map_location=torch.device('cpu'))
m = torch.load("torch_9.pth")
# 模型为gpu训练,模型也放到gpu上
m = m.to(device)
m.eval()
output = m(img)
print(output.argmax(1))
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Pengunix!