在GPU上训练模型

检查GPU的可用性

确保GPU可用并且正确安装了CUDA和Pytorch的CUDA版本

1
2
import torch
print(torch.cuda.is_available()) # 应该返回True如果GPU可用

查询可用GPU数量

1
2
3
import torch

print(torch.cuda.device_count())

设置设备

通过torch.device()方式指定是使用CPU还是GPU,如果有多个GPU,指明是哪个GPU,比如cuda:0cuda:1

1
2
3
4
5
6
import torch

device = torch.device("cuda:0")

# 如果不确定是否有GPU,可以使用if语句判定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

将模型转移到GPU

定义好模型后,可以使用.to(device)方法将模型转移到GPU

1
2
3
4
5
6
7
8
9
10
11
12
13
import torch
import torch.nn as nn

model = nn.Sequential(
nn.Linear(2, 5),
nn.ReLU(),
nn.Linear(5, 1)
)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model.to(device) # inplace操作
print(model[0].weight)

输出结果:

1
2
3
4
5
6
Parameter containing:
tensor([[ 0.4424, -0.4286],
[-0.5383, -0.2539],
[-0.5725, -0.4457],
[ 0.5889, -0.5590],
[-0.3453, -0.6645]], device='cuda:0', requires_grad=True)

除了使用device之外,还可以直接在to()方法中使用字符串:

1
2
3
4
5
6
7
8
9
10
11
12
import torch
import torch.nn as nn

model = nn.Sequential(
nn.Linear(2, 5),
nn.ReLU(),
nn.Linear(5, 1)
)


model.to("cuda:0") # inplace操作
print(model[0].weight)

将数据转移到GPU

张量数据也可以使用to(device)方法,但是这种方法不是in-place操作,也就是不会改变张量本身,而是返回一个在GPU上的数据相同的张量

注意,如果两个张量进行运算,这两个张量需要在同一个设备上(均在CPU上或者同一个GPU上)

1
2
3
4
5
6
# 假设有一些输入数据和标签
inputs = torch.randn(64, 1, 28, 28)
labels = torch.randint(0, 10, (64,))

# 将数据转移到设备
inputs, labels = inputs.to(device), labels.to(device)

我们可以通过DataLoader多进程的方式将数据快速地进行转移并训练:

将数据来回从一个设备迁移到另一个设备需要消耗大量的时间

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

class SoftmaxRegression(nn.Module):
def __init__(self, num_input, num_output):
super().__init__()
self.flatten = nn.Flatten()
self.linear1 = nn.Linear(num_input, 1000)
self.linear2 = nn.Linear(1000, 600)
self.linear3 = nn.Linear(600, 200)
self.linear4 = nn.Linear(200, num_output)

def forward(self, x):
x = self.flatten(x)
x = self.linear1(x)
x = F.relu(x)
x = self.linear2(x)
x = F.relu(x)
x = self.linear3(x)
x = F.relu(x)
return self.linear4(x)

# 累加器
class Accumulate():
def __init__(self, num):
self.data = [0.0] * num

def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]

def reset(self):
self.data = [0.0] * len(self.data)

def __getitem__(self, idx):
return self.data[idx]

# 模型精确率
def accuracy(data_iter, model, device):
accumulate = Accumulate(2)
with torch.no_grad():
for X, y in data_iter:
X, y = X.to(device), y.to(device)
y_hat = model(X).argmax(dim=1)
accumulate.add((y_hat == y).sum().item(), y.numel())
return accumulate[0] / accumulate[1]

# 模型权重初始化
def init_weight(m):
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight)
nn.init.zeros_(m.bias)


batch_size = 256
transform = transforms.ToTensor()
train_data = datasets.FashionMNIST(root='./data', train=True, download=False, transform=transform)
test_data = datasets.FashionMNIST(root='./data', train=False, download=False, transform=transform)
train_iter = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, num_workers=4)
test_iter = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True, num_workers=4)

model = SoftmaxRegression(28*28, 10)
model.apply(init_weight)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)



device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

num_epochs = 10
for epoch in range(num_epochs):
accumulate = Accumulate(2)
for X, y in train_iter:
X, y = X.to(device), y.to(device)
y_hat = model(X)
accumulate.add((y_hat.argmax(dim=1) == y).sum().item(), y.numel())
loss = loss_function(y_hat, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'epoch {epoch + 1}, Train accuracy {(accumulate[0] / accumulate[1])*100:.2f}%, Test accuracy {accuracy(test_iter, model, device)*100:.2f}%')

在GPU上训练模型
https://blog.shinebook.net/2025/03/03/人工智能/pytorch/在GPU上训练模型/
作者
X
发布于
2025年3月3日
许可协议