ic_gan / data_utils /datasets_common.py

ArantxaCasanova

First model version

a00ee36 over 3 years ago

32.3 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.
	#
	# All contributions by Andy Brock:
	# Copyright (c) 2019 Andy Brock
	#
	# All contributions made by NAVER Corp.:
	# Copyright (c) 2020-present NAVER Corp.
	#
	# MIT license

	import sys
	import os
	import os.path

	sys.path.insert(1, os.path.join(sys.path[0], ".."))
	from data_utils import utils as data_utils
	from PIL import Image
	import numpy as np
	from tqdm import tqdm
	import random
	import sklearn.metrics
	import torch.utils.data as data
	try:
	import faiss
	USE_FAISS = 1
	except:
	print('Faiss library not found!')
	USE_FAISS = 0
	import h5py as h5
	import torch

	IMG_EXTENSIONS = [".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm"]


	def is_image_file(filename):
	"""Checks if a file is an image.

	Args:
	filename (string): path to a file

	Returns:
	bool: True if the filename ends with a known image extension
	"""
	filename_lower = filename.lower()
	return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS)


	def find_classes(dir):
	classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
	classes.sort()
	class_to_idx = {classes[i]: i for i in range(len(classes))}
	return classes, class_to_idx


	def make_dataset(dir, class_to_idx):
	images = []
	dir = os.path.expanduser(dir)
	for target in tqdm(sorted(os.listdir(dir))):
	d = os.path.join(dir, target)
	if not os.path.isdir(d):
	continue

	for root, _, fnames in sorted(os.walk(d)):
	for fname in sorted(fnames):
	if is_image_file(fname):
	path = os.path.join(root, fname)
	item = (path, class_to_idx[target])
	images.append(item)

	return images


	def pil_loader(path):
	# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
	with open(path, "rb") as f:
	img = Image.open(f)
	return img.convert("RGB")


	def accimage_loader(path):
	import accimage

	try:
	return accimage.Image(path)
	except IOError:
	# Potentially a decoding problem, fall back to PIL.Image
	return pil_loader(path)


	def default_loader(path):
	from torchvision import get_image_backend

	if get_image_backend() == "accimage":
	return accimage_loader(path)
	else:
	return pil_loader(path)


	class ImageFolder(data.Dataset):
	"""A generic data loader where the images are arranged in this way: ::

	root/dogball/xxx.png
	root/dogball/xxy.png
	root/dogball/xxz.png

	root/cat/123.png
	root/cat/nsdf3.png
	root/cat/asd932_.png

	Parameters
	----------
	root: string. Root directory path.
	transform: callable, optional. A function/transform that takes in an PIL image
	and returns a transformed version. E.g, ``transforms.RandomCrop``
	target_transform: callable, optional. A function/transform that takes in the
	target and transforms it.
	loader: callable, optional. A function to load an image given its path.

	Attributes
	----------
	classes: list. List of the class names.
	class_to_idx: dict. Dict with items (class_name, class_index).
	imgs: list. List of (image path, class_index) tuples
	"""

	def __init__(
	self,
	root,
	transform=None,
	target_transform=None,
	loader=default_loader,
	load_in_mem=False,
	index_filename="imagenet_imgs.npz",
	longtail=False,
	subsampled=False,
	split="train",
	**kwargs
	):

	classes, class_to_idx = find_classes(root)
	# Load pre-computed image directory walk
	if False: # os.path.exists(os.path.join(index_filename)):
	print("Loading pre-saved Index file %s..." % index_filename)
	imgs = np.load(os.path.join(index_filename))["imgs"]
	# If first time, walk the folder directory and save the
	# results to a pre-computed file.
	else:
	print("Generating Index file %s..." % index_filename)
	if not longtail:
	imgs = make_dataset(root, class_to_idx)
	if subsampled:
	# Same number of samples as in ImageNet-LT
	imgs = random.sample(imgs, 115846)
	else:
	imgs = []
	print("Using long-tail version of the dataset with split ", split, "!")
	with open(
	"BigGAN_PyTorch/imagenet_lt/ImageNet_LT_" + split + ".txt"
	) as f:
	for line in f:
	imgs.append(
	(
	os.path.join(
	root, "/".join(line.split()[0].split("/")[1:])
	),
	int(line.split()[1]),
	)
	)
	np.savez_compressed(os.path.join(index_filename), **{"imgs": imgs})
	if len(imgs) == 0:
	raise (
	RuntimeError(
	"Found 0 images in subfolders of: " + root + "\n"
	"Supported image extensions are: " + ",".join(IMG_EXTENSIONS)
	)
	)

	self.root = root
	self.imgs = imgs
	self.classes = classes
	self.class_to_idx = class_to_idx
	self.transform = transform
	self.target_transform = target_transform
	self.loader = loader
	self.load_in_mem = load_in_mem

	if self.load_in_mem:
	print("Loading all images into memory...")
	self.data, self.labels = [], []
	for index in tqdm(range(len(self.imgs))):
	path, target = imgs[index][0], imgs[index][1]
	self.data.append(self.transform(self.loader(path)))
	self.labels.append(target)

	def __getitem__(self, index):
	"""
	Parameters
	----------
	index: int. Index

	Returns
	-------
	tuple: (image, target) where target is class_index of the target class.
	"""
	if self.load_in_mem:
	img = self.data[index]
	target = self.labels[index]
	else:
	path, target = self.imgs[index]
	img = self.loader(str(path))
	if self.transform is not None:
	img = self.transform(img)

	if self.target_transform is not None:
	target = self.target_transform(target)
	return img, int(target), index

	def __len__(self):
	return len(self.imgs)

	def __repr__(self):
	fmt_str = "Dataset " + self.__class__.__name__ + "\n"
	fmt_str += " Number of datapoints: {}\n".format(self.__len__())
	fmt_str += " Root Location: {}\n".format(self.root)
	tmp = " Transforms (if any): "
	fmt_str += "{0}{1}\n".format(
	tmp, self.transform.__repr__().replace("\n", "\n" + " " * len(tmp))
	)
	tmp = " Target Transforms (if any): "
	fmt_str += "{0}{1}".format(
	tmp, self.target_transform.__repr__().replace("\n", "\n" + " " * len(tmp))
	)
	return fmt_str


	class ILSVRC_HDF5_feats(data.Dataset):
	""" ILSVRC_HDF5_feats: A dataset to support I/O from an HDF5.

	Parameters
	----------
	root :str
	Path to the hdf5 file containing images and labels.
	root_feats: str, optional
	Path to the hdf5 file containing the instance features.
	root_nns: str, optional
	Path to the hdf5 file containing the list of nearest neighbors for each instance.
	transform : callable, optional
	A function/transform that takes in an PIL image and returns a transformed version.
	E.g, ``transforms.RandomCrop``
	target_transform: callable, optional
	A function/transform that takes in the target and transforms it.
	load_labels: bool, optional
	Return labels for each example.
	load_features: bool, optional
	Return instance features and its neighbors (needed for IC-GAN).
	load_in_mem_images: bool, optional
	Load all images in memory.
	load_in_mem_labels: bool, optional
	Load all labels in memory.
	load_in_mem_feats: bool, optional
	Load all instance features in memory.
	k_nn: int, optional
	Size of the neighborhood obtained with the k-NN algorithm.
	which_nn_balance: str, optional
	Whether to sample an instance or a neighbor class first. By default,
	``instance_balance`` is used.
	Using ``nnclass_balance`` allows class balancing to be applied.
	kmeans_file: str, optional
	Path to a file where only the dataset indexes selected with k-means are stored.
	It reduces the amount of available data to train or test the model.
	n_subsampled_data: int, optional
	If other than -1, that number of data points are randomly selected from the dataset.
	It reduces the amount of available data to train or test the model.
	filter_hd: int, optional
	Only used for COCO-Stuff dataset. If -1, all COCO-Stuff evaluation set is used.
	If 0, only images with seen class combinations are used.
	If 1, only images with unseen class combinations are used.
	label_dim: int, optional
	Dimensionality of label embeddings. Useful for the StyleGAN2 backbone code.
	feature_dim: int, optional
	Dimensionality of instance features embeddings. Useful for the StyleGAN2 backbone
	code.
	feature_augmentation: bool, optional
	Use the instance features of the flipped ground-truth image instances as
	conditioning, with a 50% probability.
	gpu_knn: bool, optional
	Accelerate k-NN faiss computation with GPUs.
	apply_norm: bool, optional
	Normalize images between [-0.5, 0.5].
	label_onehot: bool, optional
	Return labels as a one hot encoding. Useful for StyleGAN2 backbone code.

	Attributes
	---------
	root: str
	Path to the hdf5 file containing images and labels.
	root_feats: str
	Path to the hdf5 file containing the instance features.
	root_nns: str
	Path to the hdf5 file containing the list of nearest neighbors for each
	instance.
	transform : callable
	A function/transform that takes in an PIL image and returns a transformed version.
	E.g, ``transforms.RandomCrop``
	target_transform: callable
	A function/transform that takes in the target and transforms it.
	load_labels: bool
	Return labels for each example.
	load_features: bool
	Return instance features and its neighbors (needed for IC-GAN).
	load_in_mem_images: bool
	Load all images in memory.
	load_in_mem_labels: bool
	Load all labels in memory.
	load_in_mem_feats: bool
	Load all instance features in memory.
	feature_augmentation: bool
	Use the instance features of the flipped ground-truth image instances as conditioning,
	with a 50% probability.
	which_nn_balance: str
	Whether to sample an instance or a neighbor class first. By default,
	``instance_balance`` is used. Using ``nnclass_balance`` allows class balancing to be
	applied.
	apply_norm: bool
	Normalize images between [-0.5, 0.5].
	label_onehot: bool
	Return labels as a one hot encoding. Useful for StyleGAN2 backbone code.
	num_imgs: int.
	Number of data points in the dataset.
	data: NumPy array
	Image data, with the shape (num_imgs, w, h, 3), where w: width and h: height.
	labels: NumPy array
	Label data, with the shape (num_imgs, 1).
	feats: NumPy array
	Instance features data, with the shape (num_imgs, 2048).
	sample_nns: list
	List with length ``num_imgs``, that contains a list of the ``k_nn`` neighbor indexes
	for each instance.
	sample_nn_radius: NumPy array
	Array of size (num_imgs) that stores the distance between each instance and its
	farthest(k-th) neighbor.
	possible_sampling_idxs: list
	List of all effective possible data samples. By default, it is a range(0, num_imgs).
	kmeans_samples: list
	List of indexes for samples selected with k-means algorithm.
	kth_values: NumPy array
	Distances between instances and its k-th neighbor.
	"""

	def __init__(
	self,
	root,
	root_feats=None,
	root_nns=None,
	transform=None,
	target_transform=None,
	load_labels=True,
	load_features=True,
	load_in_mem_images=False,
	load_in_mem_labels=False,
	load_in_mem_feats=False,
	k_nn=4,
	which_nn_balance="instance_balance",
	kmeans_file=None,
	n_subsampled_data=-1,
	filter_hd=-1,
	label_dim=0,
	feature_dim=2048,
	feature_augmentation=False,
	gpu_knn=True,
	apply_norm=True,
	label_onehot=False,
	**kwargs
	):
	self.root = root
	self.root_feats = root_feats
	self.root_nns = root_nns

	self.load_labels = load_labels
	self.load_features = load_features
	self._label_dim = label_dim
	self._feature_dim = feature_dim
	self.label_onehot = label_onehot
	self.feature_augmentation = feature_augmentation

	# Set the transform here
	self.transform = transform
	self.target_transform = target_transform
	# Normalization of images between -0.5 and 0.5 used in BigGAN
	self.apply_norm = apply_norm

	# load the entire dataset into memory?
	self.load_in_mem_images = load_in_mem_images
	self.load_in_mem_labels = load_in_mem_labels
	self.load_in_mem_feats = load_in_mem_feats

	self.which_nn_balance = which_nn_balance

	self.num_imgs = len(h5.File(root, "r")["labels"])

	self.labels, self.feats = None, None
	self.kth_values = None
	# If loading into memory, do so now
	print(
	"Load in mem? Images: %r, Labels: %r, Features: %r."
	% (self.load_in_mem_images, self.load_in_mem_labels, self.load_in_mem_feats)
	)
	if self.load_in_mem_images:
	print("Loading images from %s into memory..." % root)
	with h5.File(root, "r") as f:
	self.data = f["imgs"][:]
	if load_labels and self.load_in_mem_labels:
	print("Loading labels from %s into memory..." % root)
	with h5.File(root, "r") as f:
	self.labels = f["labels"][:]
	if load_features and self.load_in_mem_feats:
	print("Loading features from %s into memory..." % root_feats)
	with h5.File(root_feats, "r") as f:
	self.feats = f["feats"][:]
	# Normalizing features
	print("Normalizing features by their norm")
	self.feats /= np.linalg.norm(self.feats, axis=1, keepdims=True)
	self.feats = torch.from_numpy(self.feats)
	self.feats.share_memory_()

	if load_features:
	if root_nns is None and self.load_in_mem_feats:
	# We compute NNs only if we are loading features and there is no root_nns file.
	self._obtain_nns(k_nn, gpu=gpu_knn, faiss_lib=USE_FAISS)
	elif root_nns is not None:
	# Still loading the NNs indexes!
	print("Loading %s into memory..." % root_nns)
	with h5.File(root_nns, "r") as f:
	self.sample_nns = f["sample_nns"][:]
	self.sample_nn_radius = f["sample_nns_radius"][:]
	else:
	raise ValueError(
	"If no file with pre-computed neighborhoods is provided, "
	"the features need to be loaded in memory to extract them."
	" Set the load_in_mem_feats=True."
	)

	# Reducing the number of available samples according to different criteria
	self.possible_sampling_idxs = range(self.num_imgs)
	self.kmeans_samples = None
	if kmeans_file is not None:
	print("Loading file with just a few centroids (kmeans)... ", kmeans_file)
	self.kmeans_samples = np.load(kmeans_file, allow_pickle=True).item()[
	"center_examples"
	][:, 0]
	self.possible_sampling_idxs = self.kmeans_samples
	elif n_subsampled_data > -1:
	self.possible_sampling_idxs = np.random.choice(
	np.array(self.possible_sampling_idxs),
	int(n_subsampled_data),
	replace=False,
	)
	elif filter_hd > -1:
	# For COCO_Stuff, we can divide the evaluation set in seen class combinations
	# (filter_hd=0)
	# or unseen class combinations (filter_hd=1)
	allowed_idxs = data_utils.filter_by_hd(filter_hd)
	self.possible_sampling_idxs = allowed_idxs
	# Change the size of the dataset if only a subset of the data is used
	self.possible_sampling_idxs = np.array(self.possible_sampling_idxs)
	self.num_imgs = len(self.possible_sampling_idxs)

	print(
	"All possible conditioning instances are ", len(self.possible_sampling_idxs)
	)

	def __getitem__(self, index):
	"""
	Parameters
	----------
	index: int

	Returns
	-------
	If the dataset loads both features and labels, return 4 elements: neighbor image,
	neighbor class label, instance features and instance radius
	If the dataset loads only features (no labels), return 4 elements: neighbor image,
	instance features, instance radius
	If the dataset loads ony labels (no features), return 2 elements: neighbor image and
	neighbor class label.
	If the dataset does not load features nor labels, return only an image.
	"""
	# This only changes the index if possible_sampling_idx contains only a subset of the data
	# (k-means/random sampling or evaluation sets in COCO-Stuff)
	index = self.possible_sampling_idxs[index]
	img = self._get_image(index)
	target = self.get_label(index)
	if self.load_features:
	img_nn, label_nn, feats, radii = self._get_instance_features_and_nn(index)
	img = img_nn
	target = label_nn
	else:
	feats, radii = None, None

	# Apply transform
	img = torch.from_numpy(img)
	if self.apply_norm:
	img = ((img.float() / 255) - 0.5) * 2
	if self.transform is not None:
	img = self.transform(img)
	if self.target_transform is not None:
	target = self.target_transform(target)

	if not self.label_onehot:
	target = int(target)

	if self.load_features and self.load_labels:
	return img, target, feats, radii
	elif self.load_features:
	return img, feats, radii
	elif self.load_labels:
	return img, target
	else:
	return img

	def sample_conditioning_instance_balance(self, batch_size, weights=None):
	"""
	It samples a batch size of conditionings.

	First, by first sampling an instance, and then one of the neighbor's class.

	Parameters
	----------
	batch_size: int
	Number of conditioning to sample.
	weights: NumPy array, optional
	Array of size len(self.possible_sampling_idxs), indicating the weight for each instance,
	used for sampling.

	Returns
	-------
	labels_gen: torch.LongTensor
	Tensor of shape (batch_size, label_dim). Batch of neighbor labels.
	instance_gen: torch.LongTensor
	Tensor of shape (batch_size, label_dim). Batch of instance features.
	"""
	# Control instance (center of k-NN) balancing with weights
	# Sampling from p(h)
	if weights is None:
	# np.random.randint is a faster function than np.random.choice.
	# If there is no sampling weights, use this one.
	sel_idxs = np.random.randint(0, len(self.possible_sampling_idxs), size=batch_size)
	sel_idxs = self.possible_sampling_idxs[sel_idxs]
	else:
	sel_idxs = np.random.choice(
	self.possible_sampling_idxs, batch_size, replace=True, p=weights
	)

	# Features from center example
	instance_gen = self.get_instance_features(sel_idxs)
	# Get labels from neighbor
	labels_gen = []
	for idx_ in sel_idxs:
	# Sampling neighbor from p(x_nn, y_nn\| h)
	chosen_idx = np.random.choice(self.sample_nns[idx_])
	# Labels from neighbors
	if self.load_labels:
	labels_gen.append(self.get_label(chosen_idx)[np.newaxis, ...])
	if self.load_labels:
	labels_gen = np.concatenate(labels_gen, 0)
	labels_gen = torch.LongTensor(labels_gen)
	else:
	labels_gen = None

	instance_gen = torch.FloatTensor(instance_gen)

	return labels_gen, instance_gen

	def sample_conditioning_nnclass_balance(
	self, batch_size, weights=None, num_classes=1000
	):
	"""
	It samples a batch size of conditionings.

	First, by sampling a class, then an image from this class, and finally an instance feature
	that would have this image as a neighbor in feature space.

	Parameters
	----------
	batch_size: int
	Number of conditioning to sample.
	weights: NumPy array, optional
	Array of size num_classes, indicating the weight for each instance, used for sampling.
	num_classes: int, optional
	Number of classes in the dataset

	Returns
	-------
	labels_gen: torch.LongTensor
	Tensor of shape (batch_size, label_dim). Batch of neighbor labels.
	instance_gen: torch.LongTensor
	Tensor of shape (batch_size, label_dim). Batch of instance features.
	"""
	if weights is not None:
	weights = np.array(weights) / sum(weights)

	# Sampling from p(y)
	chosen_class = np.random.choice(
	range(num_classes), batch_size, replace=True, p=weights
	)
	nn_idxs = []
	for lab_ in chosen_class:
	# Sampling from p(x_nn\|y)
	chosen_xnn = np.random.choice((self.labels == lab_).nonzero()[0])
	# Sampling from p(h\| x_nn,y)
	nn_idxs.append(np.random.choice(self.sample_nns[chosen_xnn]))

	instance_gen = self.get_instance_features(nn_idxs)

	instance_gen = torch.FloatTensor(instance_gen)
	labels_gen = torch.LongTensor(chosen_class)

	return labels_gen, instance_gen

	def get_label(self, index):
	"""Obtain a label as an int or as a one-hot vector."""
	if not self.load_labels:
	if self.label_onehot:
	return np.zeros(self.label_dim, dtype=np.float32).copy()
	else:
	return 0

	if self.load_labels:
	if self.load_in_mem_labels:
	target = self.labels[index]
	else:
	with h5.File(self.root, "r") as f:
	target = f["labels"][index]
	else:
	target = None
	if self.label_onehot:
	onehot_vec = np.zeros(self.label_dim, dtype=np.float32)
	onehot_vec[target] = 1
	target = onehot_vec.copy()

	return target

	def get_instance_features(self, index):
	"""Obtain an instance feature vector."""
	if not self.load_features:
	return np.zeros(self.feature_dim, dtype=np.float32).copy()

	if self.load_in_mem_feats:
	feat = self.feats[index].clone().float() # .astype('float')
	else:
	with h5.File(self.root_feats, "r") as f:
	if isinstance(index, (int, np.int64)):
	hflip = np.random.randint(2) == 1
	if self.feature_augmentation and hflip:
	feat = f["feats_hflip"][index].astype("float")
	else:
	feat = f["feats"][index].astype("float")
	feat /= np.linalg.norm(feat, keepdims=True)
	else:
	feat = []
	for sl_idx in index:
	hflip = np.random.randint(2) == 1
	if self.feature_augmentation and hflip:
	feat.append(
	f["feats_hflip"][sl_idx].astype("float")[
	np.newaxis, ...
	]
	)
	else:
	feat.append(
	f["feats"][sl_idx].astype("float")[np.newaxis, ...]
	)
	feat = np.concatenate(feat)
	feat /= np.linalg.norm(feat, axis=1, keepdims=True)
	return feat

	@property
	def resolution(self):
	with h5.File(self.root, "r") as f:
	sze = list(f["imgs"][0].shape)
	return sze[1]

	@property
	def label_dim(self):
	return self._label_dim

	@property
	def feature_dim(self):
	return self._feature_dim

	def _obtain_nns(self, k_nn=20, faiss_lib=True, feat_sz=2048, gpu=True):
	"""
	It obtains the neighborhoods for all instances using the k-NN algorithm.

	Parameters
	----------
	k_nn: int, optional
	Number of neighbors (k).
	faiss_lib: bool, optional
	If True, use the faiss library implementation of k-NN. If not, use the slower
	implementation of sklearn.
	feat_sz: int, optional
	Feature dimensionality.
	gpu: bool, optional
	If True, leverage GPU resources to speed up computation with the faiss library.

	"""
	# K_nn computation takes into account the input sample as the first NN,
	# so we add an extra NN to later remove the input sample.
	k_nn += 1

	self.sample_nns = [[] for _ in range(self.num_imgs)]
	self.sample_nn_radius = np.zeros(self.num_imgs, dtype=float)

	if faiss_lib:
	cpu_index = faiss.IndexFlatL2(feat_sz)
	if gpu:
	gpu_index = faiss.index_cpu_to_all_gpus(cpu_index) # build the index
	index = gpu_index
	else:
	index = cpu_index
	index.add(self.feats.float().numpy().astype("float32"))
	kth_values, kth_values_arg = index.search(
	self.feats.numpy().astype("float32"), k_nn
	)
	self.kth_values = np.sqrt(kth_values)
	knn_radii = np.sqrt(kth_values[:, -1])

	else:
	dists = sklearn.metrics.pairwise_distances(
	self.feats, self.feats, metric="euclidean", n_jobs=-1
	)
	print("Computed distances.")
	knn_radii, kth_values_arg = self._get_kth_value_accurate(dists, k_nn)
	for i_sample in range(self.num_imgs):
	knns = kth_values_arg[i_sample]
	# Discarding the input sample, also seen as the 0-NN.
	knns = np.delete(knns, np.where(knns == i_sample)[0], 0)
	self.sample_nns[i_sample] = knns.tolist()
	self.sample_nn_radius[i_sample] = knn_radii[i_sample]
	print("Computed NNs.")

	@staticmethod
	def _get_kth_value_accurate(distances, k, axis=-1):
	""" Find k nearest neighbor
	Parameters
	---------
	distances: NumPy array
	Matrix of size (M, M) of unordered distances.
	k: int
	Neighborhood size
	axis: int

	Returns
	-------
	kth values: NumPy array
	Distances of the k-th nearest neighbor along the designated axis.
	indices: NumPy array
	Array positions in the input matrix indicating all neighbors up until the k-th.

	"""
	indices = np.argpartition(distances, k - 1, axis=axis)[..., :k]
	k_smallests = np.take_along_axis(distances, indices, axis=axis)
	kth_values = k_smallests.max(axis=axis)
	return kth_values, indices

	def _get_image(self, index):
	"""Obtain an image array."""
	if self.load_in_mem_images:
	img = self.data[index]
	else:
	with h5.File(self.root, "r") as f:
	img = f["imgs"][index]
	return img

	def _get_instance_features_and_nn(self, index):
	""" Builds a quadruplet of neighbor image, its label, conditioning instance features, radii.

	Returns
	----------
	img_nn: NumPy array
	Neighbor image.
	label_nn: NumPy array
	Neighbor label.
	feats: NumPy array
	Conditioning instance features.
	radii: float
	Distance between conditioning instance and farthest (k-th) neighbor.
	"""
	# Standard sampling: Obtain a feature vector for the input index,
	# and image/class label for a neighbor.
	if self.which_nn_balance == "instance_balance":
	idx_h = index
	# If we are only using a selected number of instances (kmeans), re-choose the index
	if self.kmeans_samples is not None:
	index = np.random.choice(self.kmeans_samples)
	idx_nn = np.random.choice(self.sample_nns[index])

	# Reverse sampling, used when we want to perform class balancing (long-tail setup).
	# In class-conditional IC-GAN, the classes are taken from the neighbors.
	# The reverse sampling allows us to control the class balancing by using extra weights
	# in the DataLoader.
	elif self.which_nn_balance == "nnclass_balance":
	idx_h = np.random.choice(self.sample_nns[index])
	idx_nn = index

	# Index selects the instance feature vector
	radii = self.sample_nn_radius[idx_h]

	img_nn = self._get_image(idx_nn)
	label_nn = self.get_label(idx_nn)
	feats = self.get_instance_features(idx_h)

	return img_nn, label_nn, feats, radii

	def __len__(self):
	return self.num_imgs