初めに
Local Storageにある画像をDataset化した後、Data Loaderにする方法をまとめる。
データはCIFAR10を利用する。
全体の流れ
- Local Storageにある画像より、Image PathとLabelをつけてPandasのDataframe化にする。
- Train画像の各チャンネルのmean(), std()を計算して、格納する。後ほどNormalize()に使うため。
- PytorchのDatasetクラスを利用し、Custom Datasetを作る。
- PytorchのDataloaderクラスを利用し、Custom Dataloaderを作る。
1.Path名、LabelをDataframe化
import os
import pandas as pd
import cv2
import torch
from torchvision import transforms
dataset_path = 'C:/DATASETS/basic_datasets/cifar10/train'
label_mapping = {
'airplane': 0,
'automobile': 1,
'bird': 2,
'cat': 3,
'deer': 4,
'dog': 5,
'frog': 6,
'horse': 7,
'ship': 8,
'truck': 9
}
# Initialize empty lists for paths and labels
file_paths = []
labels = []
# Iterate through each class directory
for class_name in os.listdir(dataset_path):
class_dir = os.path.join(dataset_path, class_name)
# Iterate through each image file in the class directory
for file_name in os.listdir(class_dir):
file_path = os.path.join(class_dir, file_name)
# Append the file path and corresponding label to the lists
file_paths.append(file_path)
labels.append(label_mapping[class_name])
# Create a DataFrame from the lists
df = pd.DataFrame({'path': file_paths, 'label': labels})
# Display the DataFrame
print(df.head())
# Save the DataFrame to a CSV file
df.to_csv('dataset_train.csv', index=False)
2. Train画像の各チャンネルのmeanとstd
# Calculate the mean and std values of train images
# Iterate through each class directory
# Initialize empty lists for storing the image tensors
image_tensors = []
for class_name in os.listdir(dataset_path):
class_dir = os.path.join(dataset_path, class_name)
# Iterate through each image file in the class directory
for file_name in os.listdir(class_dir):
file_path = os.path.join(class_dir, file_name)
img = cv2.imread(file_path)
# Convert image to PyTorch tensor
transform = transforms.ToTensor()
img_tensor = transform(img)
image_tensors.append(img_tensor)
print(len(image_tensors))
# Concatenate the image tensors along the batch dimension
concatenated_images = torch.stack(image_tensors, dim=0)
# Convert the tensor to a floating-point data type
concatenated_images = concatenated_images.float()
print(concatenated_images.shape)
print(concatenated_images.dtype)
# Calculate the mean and std values
mean = torch.mean(concatenated_images, dim=(0, 2, 3))
std = torch.std(concatenated_images, dim=(0, 2, 3))
'''
The dimension 0 (dim=0) refers to the batch dimension. In this case, we have a tensor of shape (50000, 32, 32, 3), where 50000 represents the number of images. Specifying dim=0 means that we want to calculate the mean across the batch dimension, resulting in a single mean value for each channel.
The dimension 2 (dim=2) corresponds to the height dimension of the image. By specifying dim=2, we calculate the mean value separately for each channel and across the height dimension of the image. This will result in a single mean value for each channel and each image in the batch.
The dimension 3 (dim=3) corresponds to the width dimension of the image. Similar to dim=2, specifying dim=3 means that we calculate the mean value separately for each channel and across the width dimension of the image. Again, this will result in a single mean value for each channel and each image in the batch.
'''
# Print the calculated mean and std values
print("Mean values:", mean)
print("Standard deviation values:", std)
# output file
output_file = 'data.pt'
# Create a dictionary to store the tensors
data_dict = {'mean': mean, 'std': std}
# save the tensor to a file
torch.save(data_dict, output_file)
print(f"Tensors saved to '{output_file}'.")
3. Custom Datasets and Dataloader
import os
import pandas as pd
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import random
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm #コマンドラインで実行するとき
# from tqdm.notebook import tqdm # jupyter で実行するとき
# リソースの選択(CPU/GPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class my_dataset(Dataset):
def __init__(self, csv_path, transform=None):
# read csv file
df = pd.read_csv(csv_path)
image_paths = df['path']
labels = df['label']
self.image_paths = image_paths
self.labels = labels
self.transform = transform
def __len__(self):
# データ数を返す
return len(self.image_paths)
def __getitem__(self, index):
path = self.image_paths[index]
# image load
img = Image.open(path)
if self.transform is not None:
img = self.transform(img)
# --------------------
label = self.labels[index]
image_path = self.image_paths[index]
return img, label, image_path
if __name__ == '__main__':
#output file
output_file = 'data.pt'
#load data dict
loaded_data_dict = torch.load(output_file)
mean_values = loaded_data_dict['mean']
std_values = loaded_data_dict['std']
# # Making Datasets
csv_train_fpath = './dataset_train.csv'
transform_normal = transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=mean_values, std=std_values),
])
dataset_train = my_dataset(csv_path=csv_train_fpath, transform=transform_normal)
# Making Dataloader
batch_size = 32
dataloader = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True)
# Check inside of the data loader
for img, label, img_path in dataloader:
print(f'label = {label}, img shape = {img.shape}, \n img_path = {img_path}')
4. 全体コード
Local Storageにある画像より、Image PathとLabelをつけてPandasのDataframe化にする。 Train画像の各チャンネルのmean(), std()を計算して、格納する。
import os
import pandas as pd
import cv2
import torch
from torchvision import transforms
dataset_path = 'C:/DATASETS/basic_datasets/cifar10/train'
label_mapping = {
'airplane': 0,
'automobile': 1,
'bird': 2,
'cat': 3,
'deer': 4,
'dog': 5,
'frog': 6,
'horse': 7,
'ship': 8,
'truck': 9
}
# Initialize empty lists for paths and labels
file_paths = []
labels = []
# Iterate through each class directory
for class_name in os.listdir(dataset_path):
class_dir = os.path.join(dataset_path, class_name)
# Iterate through each image file in the class directory
for file_name in os.listdir(class_dir):
file_path = os.path.join(class_dir, file_name)
# Append the file path and corresponding label to the lists
file_paths.append(file_path)
labels.append(label_mapping[class_name])
# Create a DataFrame from the lists
df = pd.DataFrame({'path': file_paths, 'label': labels})
# Display the DataFrame
print(df.head())
# Save the DataFrame to a CSV file
df.to_csv('dataset_train.csv', index=False)
# Calculate the mean and std values of train images
# Iterate through each class directory
# Initialize empty lists for storing the image tensors
image_tensors = []
for class_name in os.listdir(dataset_path):
class_dir = os.path.join(dataset_path, class_name)
# Iterate through each image file in the class directory
for file_name in os.listdir(class_dir):
file_path = os.path.join(class_dir, file_name)
img = cv2.imread(file_path)
# Convert image to PyTorch tensor
transform = transforms.ToTensor()
img_tensor = transform(img)
image_tensors.append(img_tensor)
print(len(image_tensors))
# Concatenate the image tensors along the batch dimension
concatenated_images = torch.stack(image_tensors, dim=0)
# Convert the tensor to a floating-point data type
concatenated_images = concatenated_images.float()
print(concatenated_images.shape)
print(concatenated_images.dtype)
# Calculate the mean and std values
mean = torch.mean(concatenated_images, dim=(0, 2, 3))
std = torch.std(concatenated_images, dim=(0, 2, 3))
'''
The dimension 0 (dim=0) refers to the batch dimension. In this case, we have a tensor of shape (50000, 32, 32, 3), where 50000 represents the number of images. Specifying dim=0 means that we want to calculate the mean across the batch dimension, resulting in a single mean value for each channel.
The dimension 2 (dim=2) corresponds to the height dimension of the image. By specifying dim=2, we calculate the mean value separately for each channel and across the height dimension of the image. This will result in a single mean value for each channel and each image in the batch.
The dimension 3 (dim=3) corresponds to the width dimension of the image. Similar to dim=2, specifying dim=3 means that we calculate the mean value separately for each channel and across the width dimension of the image. Again, this will result in a single mean value for each channel and each image in the batch.
'''
# Print the calculated mean and std values
print("Mean values:", mean)
print("Standard deviation values:", std)
# output file
output_file = 'data.pt'
# Create a dictionary to store the tensors
data_dict = {'mean': mean, 'std': std}
# save the tensor to a file
torch.save(data_dict, output_file)
print(f"Tensors saved to '{output_file}'.")
1. PytorchのDatasetクラスを利用し、Custom Datasetを作る。PytorchのDataloaderクラスを利用し、Custom Dataloaderを作る。
import os
import pandas as pd
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import random
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm #コマンドラインで実行するとき
# from tqdm.notebook import tqdm # jupyter で実行するとき
# リソースの選択(CPU/GPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class my_dataset(Dataset):
def __init__(self, csv_path, transform=None):
# read csv file
df = pd.read_csv(csv_path)
image_paths = df['path']
labels = df['label']
self.image_paths = image_paths
self.labels = labels
self.transform = transform
def __len__(self):
# データ数を返す
return len(self.image_paths)
def __getitem__(self, index):
path = self.image_paths[index]
# image load
img = Image.open(path)
if self.transform is not None:
img = self.transform(img)
# --------------------
label = self.labels[index]
image_path = self.image_paths[index]
return img, label, image_path
if __name__ == '__main__':
#output file
output_file = 'data.pt'
#load data dict
loaded_data_dict = torch.load(output_file)
mean_values = loaded_data_dict['mean']
std_values = loaded_data_dict['std']
# # Making Datasets
csv_train_fpath = './dataset_train.csv'
transform_normal = transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=mean_values, std=std_values),
])
dataset_train = my_dataset(csv_path=csv_train_fpath, transform=transform_normal)
# Making Dataloader
batch_size = 32
dataloader = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True)
# Check inside of the data loader
for img, label, img_path in dataloader:
print(f'label = {label}, img shape = {img.shape}, \n img_path = {img_path}')