個人網(wǎng)站備案要錢嗎站長工具查詢seo
一、任務(wù)背景
? ? ? ? 本次python實戰(zhàn),我們使用來自Kaggle的數(shù)據(jù)集《Chinese MNIST》進行CNN分類建模,不同于經(jīng)典的MNIST數(shù)據(jù)集,我們這次使用的數(shù)據(jù)集是漢字手寫體數(shù)字。除了常規(guī)的漢字“零”到“九”之外還多了“十”、“百”、“千”、“萬”、“億”,共15種漢字?jǐn)?shù)字。
二、python建模
1、數(shù)據(jù)讀取
? ? ? ? 首先,讀取jpg數(shù)據(jù)文件,可以看到總共有15000張圖像數(shù)據(jù)。
import pandas as pd
import ospath = '/kaggle/input/chinese-mnist/data/data/'
files = os.listdir(path)
print('數(shù)據(jù)總量:', len(files))
? ? ? ? 我們也可以打印一張圖片出來看看。
import matplotlib.pyplot as plt
import matplotlib.image as mpimg# 定義圖片路徑
image_path = path+files[3]# 加載圖片
image = mpimg.imread(image_path)# 繪制圖片
plt.figure(figsize=(3, 3))
plt.imshow(image)
plt.axis('off') # 關(guān)閉坐標(biāo)軸
plt.show()
2、數(shù)據(jù)集構(gòu)建
? ? ? ? 加載必要的庫以便后續(xù)使用,再定義一些超參數(shù)。
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import precision_score, recall_score, f1_score# 超參數(shù)
batch_size = 64
learning_rate = 0.01
num_epochs = 5# 數(shù)據(jù)預(yù)處理
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))
])
? ? ? ? 這里,我們看一看數(shù)據(jù)集介紹就會知道圖片名稱及其含義,需要從chinese_mnist.csv文件中根據(jù)圖片名稱中的幾個數(shù)字來確定圖片對應(yīng)的標(biāo)簽。
# 獲取所有圖片文件的路徑
all_images = [os.path.join(path, img) for img in os.listdir(path) if img.endswith('.jpg')]# 讀取索引-標(biāo)簽對應(yīng)關(guān)系csv文件,并將'suite_id', 'sample_id', 'code'設(shè)置為索引列便于查找
index_df = pd.read_csv('/kaggle/input/chinese-mnist/chinese_mnist.csv')
index_df.set_index(['suite_id', 'sample_id', 'code'], inplace=True)# 定義函數(shù),根據(jù)各索引取值定位圖片對應(yīng)的數(shù)值標(biāo)簽value
def get_label_from_index(filename, index_df):suite_id, sample_id, code = map(int, filename.split('.')[0].split('_')[1:])return index_df.loc[(suite_id, sample_id, code), 'value']# 構(gòu)建value值對應(yīng)的標(biāo)簽序號,用于模型訓(xùn)練
label_dic = {0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 10:10, 100:11, 1000:12, 10000:13, 100000000:14}
# 獲取所有圖片的標(biāo)簽并轉(zhuǎn)化為標(biāo)簽序號
all_labels = [get_label_from_index(os.path.basename(img), index_df) for img in all_images]
all_labels = [label_dic[li] for li in all_labels]# 將圖片路徑和標(biāo)簽分成訓(xùn)練集和測試集
train_images, test_images, train_labels, test_labels = train_test_split(all_images, all_labels, test_size=0.2, random_state=2024)
? ? ? ? 下面定義數(shù)據(jù)集類并完成數(shù)據(jù)的加載。
# 自定義數(shù)據(jù)集類
class CustomDataset(Dataset):def __init__(self, image_paths, labels, transform=None):self.image_paths = image_pathsself.labels = labelsself.transform = transformdef __len__(self):return len(self.image_paths)def __getitem__(self, idx):image = Image.open(self.image_paths[idx]).convert('L') # 轉(zhuǎn)換為灰度圖像label = self.labels[idx]if self.transform:image = self.transform(image)return image, label# 創(chuàng)建訓(xùn)練集和測試集數(shù)據(jù)集
train_dataset = CustomDataset(train_images, train_labels, transform=transform)
test_dataset = CustomDataset(test_images, test_labels, transform=transform)# 創(chuàng)建數(shù)據(jù)加載器
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)# 打印一些信息
print(f'訓(xùn)練集樣本數(shù): {len(train_dataset)}')
print(f'測試集樣本數(shù): {len(test_dataset)}')
3、模型構(gòu)建
? ? ? ? 我們構(gòu)建一個包含兩層卷積層和池化層的CNN并且在池化層中使用最大池化的方式。
# 定義CNN模型
class CNN(nn.Module):def __init__(self):super(CNN, self).__init__()self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)self.fc1 = nn.Linear(64 * 16 * 16, 128)self.fc2 = nn.Linear(128, 15)def forward(self, x):x = self.pool(F.relu(self.conv1(x)))x = self.pool(F.relu(self.conv2(x)))x = x.view(-1, 64 * 16 * 16)x = F.relu(self.fc1(x))x = self.fc2(x)return x
4、模型實例化及訓(xùn)練
? ? ? ? 下面我們對模型進行實例化并定義criterion和optimizer。
# 初始化模型、損失函數(shù)和優(yōu)化器
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
? ? ? ? 定義訓(xùn)練的代碼并調(diào)用代碼訓(xùn)練模型。
from tqdm import tqdm
# 訓(xùn)練模型
def train(model, train_loader, criterion, optimizer, epochs):model.train()running_loss = 0.0for epoch in range(epochs):for data, target in tqdm(train_loader):optimizer.zero_grad()output = model(data)loss = criterion(output, target)loss.backward()optimizer.step()running_loss += loss.item()print(f'Epoch [{epoch + 1}], Loss: {running_loss / len(train_loader):.4f}')running_loss = 0.0train(model, train_loader, criterion, optimizer, num_epochs)
5、測試模型
? ? ? ? 定義模型測試代碼,調(diào)用代碼看指標(biāo)可知我們所構(gòu)建的CNN模型表現(xiàn)還不錯。
# 測試模型
def test(model, test_loader, criterion):model.eval()test_loss = 0correct = 0all_preds = []all_targets = []with torch.no_grad():for data, target in test_loader:output = model(data)test_loss += criterion(output, target).item()pred = output.argmax(dim=1, keepdim=True)correct += pred.eq(target.view_as(pred)).sum().item()all_preds.extend(pred.cpu().numpy())all_targets.extend(target.cpu().numpy())test_loss /= len(test_loader.dataset)accuracy = 100. * correct / len(test_loader.dataset)precision = precision_score(all_targets, all_preds, average='macro')recall = recall_score(all_targets, all_preds, average='macro')f1 = f1_score(all_targets, all_preds, average='macro')print(f'Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')test(model, test_loader, criterion)
三、完整代碼
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import precision_score, recall_score, f1_scorepath = '/kaggle/input/chinese-mnist/data/data/'
files = os.listdir(path)
print('數(shù)據(jù)總量:', len(files))# 超參數(shù)
batch_size = 64
learning_rate = 0.01
num_epochs = 5# 數(shù)據(jù)預(yù)處理
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))
])# 獲取所有圖片文件的路徑
all_images = [os.path.join(path, img) for img in os.listdir(path) if img.endswith('.jpg')]# 讀取索引-標(biāo)簽對應(yīng)關(guān)系csv文件,并將'suite_id', 'sample_id', 'code'設(shè)置為索引列便于查找
index_df = pd.read_csv('/kaggle/input/chinese-mnist/chinese_mnist.csv')
index_df.set_index(['suite_id', 'sample_id', 'code'], inplace=True)# 定義函數(shù),根據(jù)各索引取值定位圖片對應(yīng)的數(shù)值標(biāo)簽value
def get_label_from_index(filename, index_df):suite_id, sample_id, code = map(int, filename.split('.')[0].split('_')[1:])return index_df.loc[(suite_id, sample_id, code), 'value']# 構(gòu)建value值對應(yīng)的標(biāo)簽序號,用于模型訓(xùn)練
label_dic = {0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 10:10, 100:11, 1000:12, 10000:13, 100000000:14}# 獲取所有圖片的標(biāo)簽并轉(zhuǎn)化為標(biāo)簽序號
all_labels = [get_label_from_index(os.path.basename(img), index_df) for img in all_images]
all_labels = [label_dic[li] for li in all_labels]# 將圖片路徑和標(biāo)簽分成訓(xùn)練集和測試集
train_images, test_images, train_labels, test_labels = train_test_split(all_images, all_labels, test_size=0.2, random_state=2024)# 自定義數(shù)據(jù)集類
class CustomDataset(Dataset):def __init__(self, image_paths, labels, transform=None):self.image_paths = image_pathsself.labels = labelsself.transform = transformdef __len__(self):return len(self.image_paths)def __getitem__(self, idx):image = Image.open(self.image_paths[idx]).convert('L') # 轉(zhuǎn)換為灰度圖像label = self.labels[idx]if self.transform:image = self.transform(image)return image, label# 創(chuàng)建訓(xùn)練集和測試集數(shù)據(jù)集
train_dataset = CustomDataset(train_images, train_labels, transform=transform)
test_dataset = CustomDataset(test_images, test_labels, transform=transform)# 創(chuàng)建數(shù)據(jù)加載器
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)# 打印信息
print(f'訓(xùn)練集樣本數(shù): {len(train_dataset)}')
print(f'測試集樣本數(shù): {len(test_dataset)}')# 定義CNN模型
class CNN(nn.Module):def __init__(self):super(CNN, self).__init__()self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)self.fc1 = nn.Linear(64 * 16 * 16, 128)self.fc2 = nn.Linear(128, 15)def forward(self, x):x = self.pool(F.relu(self.conv1(x)))x = self.pool(F.relu(self.conv2(x)))x = x.view(-1, 64 * 16 * 16)x = F.relu(self.fc1(x))x = self.fc2(x)return x# 初始化模型、損失函數(shù)和優(yōu)化器
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)# 訓(xùn)練模型
def train(model, train_loader, criterion, optimizer, epochs):model.train()running_loss = 0.0for epoch in range(epochs):for data, target in tqdm(train_loader):optimizer.zero_grad()output = model(data)loss = criterion(output, target)loss.backward()optimizer.step()running_loss += loss.item()print(f'Epoch [{epoch + 1}], Loss: {running_loss / len(train_loader):.4f}')running_loss = 0.0train(model, train_loader, criterion, optimizer, num_epochs)# 測試模型
def test(model, test_loader, criterion):model.eval()test_loss = 0correct = 0all_preds = []all_targets = []with torch.no_grad():for data, target in test_loader:output = model(data)test_loss += criterion(output, target).item()pred = output.argmax(dim=1, keepdim=True)correct += pred.eq(target.view_as(pred)).sum().item()all_preds.extend(pred.cpu().numpy())all_targets.extend(target.cpu().numpy())test_loss /= len(test_loader.dataset)accuracy = 100. * correct / len(test_loader.dataset)precision = precision_score(all_targets, all_preds, average='macro')recall = recall_score(all_targets, all_preds, average='macro')f1 = f1_score(all_targets, all_preds, average='macro')print(f'Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')test(model, test_loader, criterion)
四、總結(jié)
? ? ? ? 本文基于漢字手寫體數(shù)字圖像進行了CNN分類實戰(zhàn),CNN作為圖像處理的經(jīng)典模型,展現(xiàn)出了它強大的圖像特征提取能力,結(jié)合更加復(fù)雜的模型框架CNN還可用于高精度人臉識別、物體識別等任務(wù)中。