src.data
1import subprocess 2 3from torch.utils.data import DataLoader 4from torch.utils.data import Dataset 5from torchvision import transforms 6from torchvision.datasets import MNIST 7 8 9def get_mnist_dataloaders( 10 data_path: str = "save/data", 11 batch_size: int = 32, 12) -> tuple[DataLoader, DataLoader, Dataset, Dataset]: 13 """Get the MNIST dataloaders. This is used as an example dataset. You will have to modify this to work with your own data. 14 15 Args: 16 batch_size (int): Batch size to retrieve on each call to `__next__()`. 17 18 Returns: 19 tuple[DataLoader, DataLoader, Dataset, Dataset]: The iterators to return. 20 21 .. warning:: Be Careful! 22 `DataLoader` and `Dataset` are very different types of iterators. A `DataLoader` will return a batch tensor whereas a `Dataset` will return a single tensor. The user has to be aware of what kind of iterator he is using or risk causing problems. 23 """ 24 25 # Make dirs 26 subprocess.run(["mkdir", "-p", data_path]) 27 28 # Apply transformations to data to ensure that data is normalized between -1 and 1 29 transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) 30 31 # Download the MNIST data and apply transforms 32 dataset_train = MNIST(root=data_path, train=True, download=True, transform=transform) 33 dataset_val = MNIST(root=data_path, train=False, download=True, transform=transform) 34 35 # Shuffle the data loaders 36 dataloader_train = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True) 37 dataloader_val = DataLoader(dataset=dataset_val, batch_size=batch_size, shuffle=False) 38 39 return dataloader_train, dataloader_val, dataset_train, dataset_val
def
get_mnist_dataloaders( data_path: str = 'save/data', batch_size: int = 32) -> tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader, torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]:
10def get_mnist_dataloaders( 11 data_path: str = "save/data", 12 batch_size: int = 32, 13) -> tuple[DataLoader, DataLoader, Dataset, Dataset]: 14 """Get the MNIST dataloaders. This is used as an example dataset. You will have to modify this to work with your own data. 15 16 Args: 17 batch_size (int): Batch size to retrieve on each call to `__next__()`. 18 19 Returns: 20 tuple[DataLoader, DataLoader, Dataset, Dataset]: The iterators to return. 21 22 .. warning:: Be Careful! 23 `DataLoader` and `Dataset` are very different types of iterators. A `DataLoader` will return a batch tensor whereas a `Dataset` will return a single tensor. The user has to be aware of what kind of iterator he is using or risk causing problems. 24 """ 25 26 # Make dirs 27 subprocess.run(["mkdir", "-p", data_path]) 28 29 # Apply transformations to data to ensure that data is normalized between -1 and 1 30 transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) 31 32 # Download the MNIST data and apply transforms 33 dataset_train = MNIST(root=data_path, train=True, download=True, transform=transform) 34 dataset_val = MNIST(root=data_path, train=False, download=True, transform=transform) 35 36 # Shuffle the data loaders 37 dataloader_train = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True) 38 dataloader_val = DataLoader(dataset=dataset_val, batch_size=batch_size, shuffle=False) 39 40 return dataloader_train, dataloader_val, dataset_train, dataset_val
Get the MNIST dataloaders. This is used as an example dataset. You will have to modify this to work with your own data.
Arguments:
- batch_size (int): Batch size to retrieve on each call to
__next__().
Returns:
tuple[DataLoader, DataLoader, Dataset, Dataset]: The iterators to return.
Be Careful!
DataLoader and Dataset are very different types of iterators. A DataLoader will return a batch tensor whereas a Dataset will return a single tensor. The user has to be aware of what kind of iterator he is using or risk causing problems.