用Python PyTorch做猫狗识别，看完就能上手

不用懂数学！小白也能跟着敲的深度学习实战，练完就能自己做AI识别！

打开今日头条查看图片详情

为什么选PyTorch？看完你就懂

想入门深度学习，选对工具很重要！PyTorch凭这几点成了新手首选：

· 简单易学：代码像写Python一样自然，比TensorFlow更适合入门

· 灵活高效：动态计算图，调试起来超方便

· 大佬都在用：OpenAI、特斯拉、斯坦福大学都靠它做研究

· 咱们今天不用从零造车：用’迁移学习’借现成模型。

环境准备：3行代码搞定

先打开你的Python终端，复制粘贴这行命令，安装需要的工具：

pip install torch torchvision matplotlib tqdm # tqdm用来显示训练进度条

等安装完成，咱们就可以开始啦~

数据集怎么弄？附获取方式

用Kaggle经典的猫狗数据集（含25000张图片），获取方式：

1. 直接百度搜“Kaggle猫狗数据集”，找国内网盘链接（关键词：kaggle cats vs dogs 百度云）

2. 或者去Kaggle官网下载（需注册，地址：
https://www./c/dogs-vs-cats/data）

下载后按这个结构放好：

data/

train/

cat/ # 放所有猫的图片

dog/ # 放所有狗的图片

test/

cat/ # 测试用的猫图片

dog/ # 测试用的狗图片

然后用代码加载数据：

import torch

from torchvision import datasets, transforms

import matplotlib.pyplot as plt

# 数据预处理

transform = transforms.Compose([

transforms.Resize((224, 224)), # 统一改成224×224像素

transforms.RandomHorizontalFlip(), # 随机翻转图片，增加数据多样性

transforms.ToTensor(), # 转成模型能读的格式

transforms.Normalize(mean=[0.485, 0.456, 0.406], # 标准化，加速训练

std=[0.229, 0.224, 0.225])

])

# 加载数据集

train_data = datasets.ImageFolder(‘data/train’, transform=transform)

test_data = datasets.ImageFolder(‘data/test’, transform=transform)

# 批量加载数据

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)

test_loader = torch.utils.data.DataLoader(test_data, batch_size=32)

# 看看数据对不对

print(f’训练集有{len(train_data)}张图，测试集有{len(test_data)}张图’)

print(f’类别：{train_data.classes}’) # 应该显示[‘cat’, ‘dog’]

先看看数据长啥样？可视化代码

运行这段代码，就能看到数据集中的猫狗图片，确认数据没问题：

def imshow(img):

”’把模型能读的格式转成人能看的图片”’

img = img.numpy().transpose((1, 2, 0))

mean = np.array([0.485, 0.456, 0.406])

std = np.array([0.229, 0.224, 0.225])

img = std * img + mean # 反标准化

img = np.clip(img, 0, 1) # 保证图片颜色正常

plt.imshow(img)

plt.axis(‘off’)

# 取一批数据展示

images, labels = next(iter(train_loader))

# 画个网格看8张图

fig = plt.figure(figsize=(12, 8))

for i in range(min(8, len(images))):

ax = fig.add_subplot(2, 4, i+1)

imshow(images[i])

ax.set_title(train_data.classes[labels[i]]) # 显实类别

plt.show()

数据集中的猫狗图片，是不是很可爱？

打开今日头条查看图片详情

重点来了！用迁移学习快速建模型

不用自己设计复杂网络！咱们直接用别人训练好的ResNet18模型：

from torchvision import models

import torch.nn as nn

# 加载预训练好的模型

model = models.resnet18(pretrained=True)

# 冻结前面的层

for param in model.parameters():

param.requires_grad = False

# 只改最后一层：让模型学会区分’猫’和’狗’

num_features = model.fc.in_features # 获取最后一层的输入特征数

model.fc = nn.Sequential(

nn.Linear(num_features, 512), # 加一层中间层

nn.ReLU(), # 激活函数，增加非线性

nn.Dropout(0.5), # 防止过拟合

nn.Linear(512, 2) # 输出2类：猫和狗

)

# 用GPU加速（没有GPU也能跑，就是慢点）

device = torch.device(‘cuda:0’ if torch.cuda.is_available() else ‘cpu’)

model = model.to(device)

print(‘模型建好了！结构长这样：’)

print(model)

训练模型：代码抄进去，等着出结果

复制这段代码，运行后会自动训练+显示进度，新手不用懂原理也能跑：

import torch.optim as optim

from tqdm import tqdm # 显示进度条的工具

# 定义损失函数和优化器

criterion = nn.CrossEntropyLoss() # 分类问题常用损失函数

optimizer = optim.Adam(model.fc.parameters(), lr=0.001) # 只优化最后一层

# 训练参数

num_epochs = 10 # 训练10轮，足够出效果了

train_losses = [] # 记录损失

val_accuracies = [] # 记录准确率

# 开始训练

for epoch in range(num_epochs):

model.train() # 切换到训练模式

running_loss = 0.0

# 用进度条显示训练过程

loop = tqdm(train_loader, desc=f’第{epoch+1}轮训练’)

for images, labels in loop:

# 把数据传到GPU（如果有的话）

images, labels = images.to(device), labels.to(device)

# 前向传播：算预测结果

outputs = model(images)

loss = criterion(outputs, labels) # 算预测和真实的差距

# 反向传播：调整参数

optimizer.zero_grad() # 清空之前的梯度

loss.backward() # 计算梯度

optimizer.step() # 更新参数

running_loss += loss.item()

loop.set_postfix(当前损失=loss.item()) # 显示当前损失

# 记录每轮的平均损失

epoch_loss = running_loss / len(train_loader)

train_losses.append(epoch_loss)

# 测试一下模型准不准

model.eval() # 切换到评估模式

correct = 0

total = 0

with torch.no_grad(): # 评估时不计算梯度，省内存

for images, labels in test_loader:

images, labels = images.to(device), labels.to(device)

outputs = model(images)

_, predicted = torch.max(outputs.data, 1) # 取预测概率最高的类别

total += labels.size(0)

correct += (predicted == labels).sum().item() # 算对了多少

# 计算准确率

accuracy = 100 * correct / total

val_accuracies.append(accuracy)

print(f’第{epoch+1}轮结果：损失={epoch_loss:.4f}，测试准确率={accuracy:.2f}%’)

# 保存模型

torch.save(model.state_dict(), ‘猫狗识别模型.pth’)

print(‘模型保存好了！叫’猫狗识别模型.pth”)

看看训练效果：画个图直观感受

训练完了，用代码画个图，看看模型是不是越学越好：

plt.figure(figsize=(12, 5))

# 左边画损失曲线（应该越来越低）

plt.subplot(1, 2, 1)

plt.plot(train_losses, label=’训练损失’)

plt.title(‘训练损失变化’)

plt.xlabel(‘训练轮数’)

plt.ylabel(‘损失值’)

plt.legend()

# 右边画准确率曲线

plt.subplot(1, 2, 2)

plt.plot(val_accuracies, label=’测试准确率’, color=’orange’)

plt.title(‘测试准确率变化’)

plt.xlabel(‘训练轮数’)

plt.ylabel(‘准确率(%)’)

plt.legend()

plt.tight_layout()

plt.show()

打开今日头条查看图片详情

模型评估：看看哪里对得多，哪里错得多

用混淆矩阵看看模型表现，比如是不是把猫认错成狗的情况多：

def evaluate_model(model, data_loader):

model.eval()

correct = 0

total = 0

all_preds = [] # 记录所有预测结果

all_labels = [] # 记录所有真实标签

with torch.no_grad():

for images, labels in data_loader:

images, labels = images.to(device), labels.to(device)

outputs = model(images)

_, predicted = torch.max(outputs.data, 1)

total += labels.size(0)

correct += (predicted == labels).sum().item()

all_preds.extend(predicted.cpu().numpy())

all_labels.extend(labels.cpu().numpy())

# 计算准确率

accuracy = 100 * correct / total

print(f’最终测试准确率：{accuracy:.2f}%’) # 一般能到95%左右

# 画混淆矩阵

from sklearn.metrics import confusion_matrix

import seaborn as sns

cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(8, 6))

sns.heatmap(cm, annot=True, fmt=’d’, cmap=’Blues’,

xticklabels=[‘预测为猫’, ‘预测为狗’],

yticklabels=[‘实际是猫’, ‘实际是狗’])

plt.title(‘混淆矩阵：看看模型哪里容易错’)

plt.show()

return accuracy

# 评估一下

test_accuracy = evaluate_model(model, test_loader)

打开今日头条查看图片详情

实战预测：拿张照片让模型认一认

找张自己的猫/狗照片，用这段代码让模型预测，超有成就感！

from PIL import Image # 处理图片的库

def predict_image(image_path):

# 加载图片并预处理

image = Image.open(image_path)

image = transform(image).unsqueeze(0).to(device) # 加个batch维度

# 预测

model.eval()

with torch.no_grad():

output = model(image)

_, predicted = torch.max(output, 1)

class_idx = predicted.item()

# 算一下预测的 confidence

probability = torch.nn.functional.softmax(output, dim=1)[0][class_idx].item()

# 显示结果

plt.imshow(Image.open(image_path))

plt.axis(‘off’)

plt.title(f’模型说这是：{train_data.classes[class_idx]}（可信度{probability*100:.1f}%）’)

plt.show()

return train_data.classes[class_idx]

# 测试一下

predict_image(‘我家的猫.jpg’) # 替换成你的图片路径

predict_image(‘邻居家的狗.jpg’) # 替换成你的图片路径

新手必看：常见问题+优化技巧

没GPU？这样加速训练

· 用Google Colab（免费GPU）：打开colab.google，新建笔记本，代码粘进去，运行时选’修改→笔记本设置→硬件加速器→GPU’

· 减小图片尺寸：把Resize((224,224))改成Resize((112,112))，训练更快（准确率略降）

想让模型更准？加这3行代码

# 1. 更强的数据增强（让模型见更多样的图片）

打开今日头条查看图片详情

transform = transforms.Compose([

transforms.RandomResizedCrop(224), # 随机裁剪

transforms.RandomRotation(15), # 随机旋转

transforms.ColorJitter(brightness=0.2), # 随机调亮度

transforms.ToTensor(),

transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

])

# 2. 学习率自动调整（让模型学得更稳）

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode=’max’, factor=0.1, patience=2)

# 3. 早停机制（防止学过头）

best_accuracy = 0

patience_counter = 0

# 在每轮验证后加这段：

if accuracy > best_accuracy:

best_accuracy = accuracy

torch.save(model.state_dict(), ‘最好的模型.pth’)

patience_counter = 0

else:

patience_counter += 1

if patience_counter >= 3: # 3轮没进步就停

print(‘差不多了，停！’)

break

打开今日头条查看图片详情

模型怎么存起来以后用？

训练好的模型存在猫狗识别模型.pth里，以后用的时候加载：

# 加载模型

model.load_state_dict(torch.load(‘猫狗识别模型.pth’))

model = model.to(device) # 别忘了传到GPU

学会了能做啥？这些方向可以试试

· 改改代码，做’猫/狗/其他动物’三分类

· 训练一个’人脸/风景’识别器

· 给宠物APP加个自动分类功能

打开今日头条查看图片详情

完整代码打包放这了：[GitHub链接]

跟着敲完代码的同学，有任何报错或疑问，直接留言，我来帮你解决~ 学会这招，以后别人问你’会AI吗’，就可以自信地说’会啊，我做过猫狗识别呢！’

#Python基础##新春营销学习计划##每天学python##分享我的头条荣誉#

微精选