More than 1 year has passed since last update.

PyTorchのCustom Dataloader

Posted at 2023-06-10

初めに

Local Storageにある画像をDataset化した後、Data Loaderにする方法をまとめる。
データはCIFAR10を利用する。

全体の流れ

Local Storageにある画像より、Image PathとLabelをつけてPandasのDataframe化にする。
Train画像の各チャンネルのmean(), std()を計算して、格納する。後ほどNormalize()に使うため。
PytorchのDatasetクラスを利用し、Custom Datasetを作る。
PytorchのDataloaderクラスを利用し、Custom Dataloaderを作る。

1.Path名、LabelをDataframe化

import os
import pandas as pd
import cv2
import torch
from torchvision import transforms

dataset_path = 'C:/DATASETS/basic_datasets/cifar10/train'

label_mapping = {
    'airplane': 0,
    'automobile': 1,
    'bird': 2,
    'cat': 3,
    'deer': 4,
    'dog': 5,
    'frog': 6,
    'horse': 7,
    'ship': 8,
    'truck': 9
}

# Initialize empty lists for paths and labels
file_paths = []
labels = []

# Iterate through each class directory
for class_name in os.listdir(dataset_path):
    class_dir = os.path.join(dataset_path, class_name)

    # Iterate through each image file in the class directory
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)

        # Append the file path and corresponding label to the lists
        file_paths.append(file_path)
        labels.append(label_mapping[class_name])

# Create a DataFrame from the lists
df = pd.DataFrame({'path': file_paths, 'label': labels})

# Display the DataFrame
print(df.head())

# Save the DataFrame to a CSV file
df.to_csv('dataset_train.csv', index=False)

2. Train画像の各チャンネルのmeanとstd

#  Calculate the mean and std values of train images

# Iterate through each class directory

# Initialize empty lists for storing the image tensors
image_tensors = []
for class_name in os.listdir(dataset_path):
    class_dir = os.path.join(dataset_path, class_name)

    # Iterate through each image file in the class directory
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)
        img = cv2.imread(file_path)

        # Convert image to PyTorch tensor
        transform = transforms.ToTensor()
        img_tensor = transform(img)

        image_tensors.append(img_tensor)
print(len(image_tensors))
# Concatenate the image tensors along the batch dimension
concatenated_images = torch.stack(image_tensors, dim=0)
# Convert the tensor to a floating-point data type
concatenated_images = concatenated_images.float()
print(concatenated_images.shape)
print(concatenated_images.dtype)

# Calculate the mean and std values
mean = torch.mean(concatenated_images, dim=(0, 2, 3))
std = torch.std(concatenated_images, dim=(0, 2, 3))

'''
The dimension 0 (dim=0) refers to the batch dimension. In this case, we have a tensor of shape (50000, 32, 32, 3), where 50000 represents the number of images. Specifying dim=0 means that we want to calculate the mean across the batch dimension, resulting in a single mean value for each channel.
The dimension 2 (dim=2) corresponds to the height dimension of the image. By specifying dim=2, we calculate the mean value separately for each channel and across the height dimension of the image. This will result in a single mean value for each channel and each image in the batch.
The dimension 3 (dim=3) corresponds to the width dimension of the image. Similar to dim=2, specifying dim=3 means that we calculate the mean value separately for each channel and across the width dimension of the image. Again, this will result in a single mean value for each channel and each image in the batch.
'''

# Print the calculated mean and std values
print("Mean values:", mean)
print("Standard deviation values:", std)

# output file
output_file = 'data.pt'

# Create a dictionary to store the tensors
data_dict = {'mean': mean, 'std': std}

# save the tensor to a file
torch.save(data_dict, output_file)

print(f"Tensors saved to '{output_file}'.")

3. Custom Datasets and Dataloader

import os

import pandas as pd
import torch

from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image


import random
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm  #コマンドラインで実行するとき
# from tqdm.notebook import tqdm  # jupyter で実行するとき

# リソースの選択（CPU/GPU）
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class my_dataset(Dataset):
    def __init__(self, csv_path, transform=None):
        # read csv file
        df = pd.read_csv(csv_path)
        image_paths = df['path']
        labels = df['label']

        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        # データ数を返す
        return len(self.image_paths)

    def __getitem__(self, index):
        path = self.image_paths[index]
        # image load
        img = Image.open(path)

        if self.transform is not None:
            img = self.transform(img)
        # --------------------
        label = self.labels[index]
        image_path = self.image_paths[index]
        return img, label, image_path


if __name__ == '__main__':

    #output file
    output_file = 'data.pt'

    #load data dict
    loaded_data_dict = torch.load(output_file)
    mean_values = loaded_data_dict['mean']
    std_values = loaded_data_dict['std']

    # # Making Datasets
    csv_train_fpath = './dataset_train.csv'
    transform_normal = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean_values, std=std_values),
    ])
    dataset_train = my_dataset(csv_path=csv_train_fpath, transform=transform_normal)

    # Making Dataloader
    batch_size = 32
    dataloader = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True)

    # Check inside of the data loader
    for img, label, img_path in dataloader:
        print(f'label = {label}, img shape = {img.shape}, \n img_path = {img_path}')

4. 全体コード

Local Storageにある画像より、Image PathとLabelをつけてPandasのDataframe化にする。 Train画像の各チャンネルのmean(), std()を計算して、格納する。

import os
import pandas as pd
import cv2
import torch
from torchvision import transforms

dataset_path = 'C:/DATASETS/basic_datasets/cifar10/train'

label_mapping = {
    'airplane': 0,
    'automobile': 1,
    'bird': 2,
    'cat': 3,
    'deer': 4,
    'dog': 5,
    'frog': 6,
    'horse': 7,
    'ship': 8,
    'truck': 9
}

# Initialize empty lists for paths and labels
file_paths = []
labels = []

# Iterate through each class directory
for class_name in os.listdir(dataset_path):
    class_dir = os.path.join(dataset_path, class_name)

    # Iterate through each image file in the class directory
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)

        # Append the file path and corresponding label to the lists
        file_paths.append(file_path)
        labels.append(label_mapping[class_name])

# Create a DataFrame from the lists
df = pd.DataFrame({'path': file_paths, 'label': labels})

# Display the DataFrame
print(df.head())

# Save the DataFrame to a CSV file
df.to_csv('dataset_train.csv', index=False)

#  Calculate the mean and std values of train images

# Iterate through each class directory

# Initialize empty lists for storing the image tensors
image_tensors = []
for class_name in os.listdir(dataset_path):
    class_dir = os.path.join(dataset_path, class_name)

    # Iterate through each image file in the class directory
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)
        img = cv2.imread(file_path)

        # Convert image to PyTorch tensor
        transform = transforms.ToTensor()
        img_tensor = transform(img)

        image_tensors.append(img_tensor)
print(len(image_tensors))
# Concatenate the image tensors along the batch dimension
concatenated_images = torch.stack(image_tensors, dim=0)
# Convert the tensor to a floating-point data type
concatenated_images = concatenated_images.float()
print(concatenated_images.shape)
print(concatenated_images.dtype)

# Calculate the mean and std values
mean = torch.mean(concatenated_images, dim=(0, 2, 3))
std = torch.std(concatenated_images, dim=(0, 2, 3))

'''
The dimension 0 (dim=0) refers to the batch dimension. In this case, we have a tensor of shape (50000, 32, 32, 3), where 50000 represents the number of images. Specifying dim=0 means that we want to calculate the mean across the batch dimension, resulting in a single mean value for each channel.
The dimension 2 (dim=2) corresponds to the height dimension of the image. By specifying dim=2, we calculate the mean value separately for each channel and across the height dimension of the image. This will result in a single mean value for each channel and each image in the batch.
The dimension 3 (dim=3) corresponds to the width dimension of the image. Similar to dim=2, specifying dim=3 means that we calculate the mean value separately for each channel and across the width dimension of the image. Again, this will result in a single mean value for each channel and each image in the batch.
'''

# Print the calculated mean and std values
print("Mean values:", mean)
print("Standard deviation values:", std)

# output file
output_file = 'data.pt'

# Create a dictionary to store the tensors
data_dict = {'mean': mean, 'std': std}

# save the tensor to a file
torch.save(data_dict, output_file)

print(f"Tensors saved to '{output_file}'.")

1. PytorchのDatasetクラスを利用し、Custom Datasetを作る。PytorchのDataloaderクラスを利用し、Custom Dataloaderを作る。

import os

import pandas as pd
import torch

from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image


import random
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm  #コマンドラインで実行するとき
# from tqdm.notebook import tqdm  # jupyter で実行するとき

# リソースの選択（CPU/GPU）
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class my_dataset(Dataset):
    def __init__(self, csv_path, transform=None):
        # read csv file
        df = pd.read_csv(csv_path)
        image_paths = df['path']
        labels = df['label']

        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        # データ数を返す
        return len(self.image_paths)

    def __getitem__(self, index):
        path = self.image_paths[index]
        # image load
        img = Image.open(path)

        if self.transform is not None:
            img = self.transform(img)
        # --------------------
        label = self.labels[index]
        image_path = self.image_paths[index]
        return img, label, image_path


if __name__ == '__main__':

    #output file
    output_file = 'data.pt'

    #load data dict
    loaded_data_dict = torch.load(output_file)
    mean_values = loaded_data_dict['mean']
    std_values = loaded_data_dict['std']

    # # Making Datasets
    csv_train_fpath = './dataset_train.csv'
    transform_normal = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean_values, std=std_values),
    ])
    dataset_train = my_dataset(csv_path=csv_train_fpath, transform=transform_normal)

    # Making Dataloader
    batch_size = 32
    dataloader = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True)

    # Check inside of the data loader
    for img, label, img_path in dataloader:
        print(f'label = {label}, img shape = {img.shape}, \n img_path = {img_path}')

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up