|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" Convert dataset to HDF5 |
|
This script preprocesses a dataset and saves it (images and labels) to |
|
an HDF5 file for improved I/O. """ |
|
import os |
|
import sys |
|
from argparse import ArgumentParser |
|
from tqdm import tqdm, trange |
|
import h5py as h5 |
|
|
|
import numpy as np |
|
import torch |
|
|
|
import utils |
|
import torchvision.transforms.functional as tv_f |
|
|
|
|
|
def prepare_parser(): |
|
usage = "Parser for ImageNet HDF5 scripts." |
|
parser = ArgumentParser(description=usage) |
|
parser.add_argument( |
|
"--resolution", |
|
type=int, |
|
default=128, |
|
help="Which Dataset resolution to train on, out of 64, 128, 256 (default: %(default)s)", |
|
) |
|
parser.add_argument( |
|
"--split", |
|
type=str, |
|
default="train", |
|
help="Which Dataset to convert: train, val (default: %(default)s)", |
|
) |
|
parser.add_argument( |
|
"--data_root", |
|
type=str, |
|
default="data", |
|
help="Default location where data is stored (default: %(default)s)", |
|
) |
|
parser.add_argument( |
|
"--out_path", |
|
type=str, |
|
default="data", |
|
help="Default location where data in hdf5 format will be stored (default: %(default)s)", |
|
) |
|
parser.add_argument( |
|
"--pretrained_model_path", |
|
type=str, |
|
default="", |
|
help="Location where the pretrained model (to extract features) can be found (default: %(default)s)", |
|
) |
|
parser.add_argument( |
|
"--save_features_only", |
|
action="store_true", |
|
default=False, |
|
help="Only save features in hdf5 file.", |
|
) |
|
parser.add_argument( |
|
"--save_images_only", |
|
action="store_true", |
|
default=False, |
|
help="Only save images and their labels in hdf5 file.", |
|
) |
|
parser.add_argument( |
|
"--feature_augmentation", |
|
action="store_true", |
|
default=False, |
|
help="Additioally store instance features with horizontally flipped input images.", |
|
) |
|
parser.add_argument( |
|
"--feature_extractor", |
|
type=str, |
|
default="classification", |
|
choices=["classification", "selfsupervised"], |
|
help="Choice of feature extractor", |
|
) |
|
parser.add_argument( |
|
"--backbone_feature_extractor", |
|
type=str, |
|
default="resnet50", |
|
choices=["resnet50"], |
|
help="Choice of feature extractor backbone", |
|
) |
|
parser.add_argument( |
|
"--which_dataset", type=str, default="imagenet", help="Dataset choice." |
|
) |
|
parser.add_argument( |
|
"--instance_json", |
|
type=str, |
|
default="", |
|
help="Path to JSON containing instance segmentations for COCO_Stuff", |
|
) |
|
parser.add_argument( |
|
"--stuff_json", |
|
type=str, |
|
default="", |
|
help="Path to JSON containing instance segmentations for COCO_Stuff", |
|
) |
|
parser.add_argument( |
|
"--batch_size", |
|
type=int, |
|
default=256, |
|
help="Default overall batchsize (default: %(default)s)", |
|
) |
|
parser.add_argument( |
|
"--num_workers", |
|
type=int, |
|
default=16, |
|
help="Number of dataloader workers (default: %(default)s)", |
|
) |
|
parser.add_argument( |
|
"--chunk_size", |
|
type=int, |
|
default=500, |
|
help="Default overall batchsize (default: %(default)s)", |
|
) |
|
parser.add_argument( |
|
"--compression", |
|
action="store_true", |
|
default=False, |
|
help="Use LZF compression? (default: %(default)s)", |
|
) |
|
return parser |
|
|
|
|
|
def run(config): |
|
|
|
net = utils.load_pretrained_feature_extractor( |
|
config["pretrained_model_path"], |
|
config["feature_extractor"], |
|
config["backbone_feature_extractor"], |
|
) |
|
net.eval() |
|
|
|
|
|
config["compression"] = ( |
|
"lzf" if config["compression"] else None |
|
) |
|
|
|
|
|
kwargs = { |
|
"num_workers": config["num_workers"], |
|
"pin_memory": False, |
|
"drop_last": False, |
|
} |
|
test_part = False |
|
if config["split"] == "test": |
|
config["split"] = "val" |
|
test_part = True |
|
if config["which_dataset"] in ["imagenet", "imagenet_lt"]: |
|
data_path = os.path.join(config["data_root"], config["split"]) |
|
else: |
|
data_path = config["data_root"] |
|
dataset = utils.get_dataset_images( |
|
config["resolution"], |
|
data_path=data_path, |
|
longtail=config["which_dataset"] == "imagenet_lt", |
|
split=config["split"], |
|
test_part=test_part, |
|
which_dataset=config["which_dataset"], |
|
instance_json=config["instance_json"], |
|
stuff_json=config["stuff_json"], |
|
) |
|
train_loader = utils.get_dataloader( |
|
dataset, config["batch_size"], shuffle=False, **kwargs |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
norm_mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).cuda() |
|
norm_std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).cuda() |
|
|
|
if config["which_dataset"] in ["imagenet", "imagenet_lt"]: |
|
dataset_name_prefix = "ILSVRC" |
|
elif config["which_dataset"] == "coco": |
|
dataset_name_prefix = "COCO" |
|
else: |
|
dataset_name_prefix = config["which_dataset"] |
|
|
|
if not config["save_features_only"]: |
|
h5file_name = config["out_path"] + "/%s%i%s%s%s_xy.hdf5" % ( |
|
dataset_name_prefix, |
|
config["resolution"], |
|
"" if config["which_dataset"] != "imagenet_lt" else "longtail", |
|
"_val" if config["split"] == "val" else "", |
|
"_test" if test_part else "", |
|
) |
|
print("Filenames are ", h5file_name) |
|
|
|
if not config["save_images_only"]: |
|
h5file_name_feats = config["out_path"] + "/%s%i%s%s%s_feats_%s_%s.hdf5" % ( |
|
dataset_name_prefix, |
|
config["resolution"], |
|
"" if config["which_dataset"] != "imagenet_lt" else "longtail", |
|
"_val" if config["split"] == "val" else "", |
|
"_test" if test_part else "", |
|
config["feature_extractor"], |
|
config["backbone_feature_extractor"], |
|
) |
|
print("Filenames are ", h5file_name_feats) |
|
|
|
print( |
|
"Starting to load dataset into an HDF5 file with chunk size %i and compression %s..." |
|
% (config["chunk_size"], config["compression"]) |
|
) |
|
|
|
|
|
if config["which_dataset"] == "coco" and test_part: |
|
all_image_ids = [] |
|
|
|
for i, (x, y, image_id) in enumerate(tqdm(train_loader)): |
|
if config["which_dataset"] == "coco" and test_part: |
|
all_image_ids.append(image_id) |
|
if not config["save_images_only"]: |
|
with torch.no_grad(): |
|
x_tf = x.cuda() |
|
x_tf = x_tf * 0.5 + 0.5 |
|
x_tf = (x_tf - norm_mean) / norm_std |
|
x_tf = torch.nn.functional.upsample(x_tf, 224, mode="bicubic") |
|
|
|
x_feat, _ = net(x_tf) |
|
x_feat = x_feat.cpu().numpy() |
|
if config["feature_augmentation"]: |
|
x_tf_hflip = tv_f.hflip(x_tf) |
|
x_feat_hflip, _ = net(x_tf_hflip) |
|
x_feat_hflip = x_feat_hflip.cpu().numpy() |
|
else: |
|
x_feat_hflip = None |
|
else: |
|
x_feat, x_feat_hflip = None, None |
|
|
|
x = (255 * ((x + 1) / 2.0)).byte().numpy() |
|
|
|
y = y.numpy() |
|
|
|
if i == 0: |
|
|
|
if not config["save_features_only"]: |
|
with h5.File(h5file_name, "w") as f: |
|
print("Producing dataset of len %d" % len(train_loader.dataset)) |
|
imgs_dset = f.create_dataset( |
|
"imgs", |
|
x.shape, |
|
dtype="uint8", |
|
maxshape=( |
|
len(train_loader.dataset), |
|
3, |
|
config["resolution"], |
|
config["resolution"], |
|
), |
|
chunks=( |
|
config["chunk_size"], |
|
3, |
|
config["resolution"], |
|
config["resolution"], |
|
), |
|
compression=config["compression"], |
|
) |
|
print("Image chunks chosen as " + str(imgs_dset.chunks)) |
|
imgs_dset[...] = x |
|
labels_dset = f.create_dataset( |
|
"labels", |
|
y.shape, |
|
dtype="int64", |
|
maxshape=(len(train_loader.dataset),), |
|
chunks=(config["chunk_size"],), |
|
compression=config["compression"], |
|
) |
|
print("Label chunks chosen as " + str(labels_dset.chunks)) |
|
labels_dset[...] = y |
|
|
|
|
|
if not config["save_images_only"]: |
|
with h5.File(h5file_name_feats, "w") as f: |
|
features_dset = f.create_dataset( |
|
"feats", |
|
x_feat.shape, |
|
dtype="float", |
|
maxshape=(len(train_loader.dataset), x_feat.shape[1]), |
|
chunks=(config["chunk_size"], x_feat.shape[1]), |
|
compression=config["compression"], |
|
) |
|
features_dset[...] = x_feat |
|
if config["feature_augmentation"]: |
|
features_dset_hflips = f.create_dataset( |
|
"feats_hflip", |
|
x_feat.shape, |
|
dtype="float", |
|
maxshape=(len(train_loader.dataset), x_feat.shape[1]), |
|
chunks=(config["chunk_size"], x_feat.shape[1]), |
|
compression=config["compression"], |
|
) |
|
features_dset_hflips[...] = x_feat_hflip |
|
|
|
|
|
else: |
|
if not config["save_features_only"]: |
|
with h5.File(h5file_name, "a") as f: |
|
f["imgs"].resize(f["imgs"].shape[0] + x.shape[0], axis=0) |
|
f["imgs"][-x.shape[0] :] = x |
|
f["labels"].resize(f["labels"].shape[0] + y.shape[0], axis=0) |
|
f["labels"][-y.shape[0] :] = y |
|
|
|
if not config["save_images_only"]: |
|
with h5.File(h5file_name_feats, "a") as f: |
|
f["feats"].resize(f["feats"].shape[0] + x_feat.shape[0], axis=0) |
|
f["feats"][-x_feat.shape[0] :] = x_feat |
|
if config["feature_augmentation"]: |
|
with h5.File(h5file_name_feats, "a") as f: |
|
f["feats_hflip"].resize( |
|
f["feats_hflip"].shape[0] + x_feat_hflip.shape[0], axis=0 |
|
) |
|
f["feats_hflip"][-x_feat_hflip.shape[0] :] = x_feat_hflip |
|
|
|
if config["which_dataset"] == "coco" and test_part: |
|
print( |
|
"Saved COCO index images for evaluation set (in order of appearance in the hdf5 file)" |
|
) |
|
np.save( |
|
os.path.join("coco_stuff_val_indexes", "cocostuff_val2_all_idxs"), |
|
np.concatenate(all_image_ids), |
|
) |
|
|
|
|
|
def main(): |
|
|
|
parser = prepare_parser() |
|
config = vars(parser.parse_args()) |
|
print(config) |
|
run(config) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|