File size: 7,681 Bytes
c642393 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import SimpleITK as sitk
from natsort import natsorted
import numpy as np
from pathlib import Path
import pandas as pd
from collections import defaultdict
from shutil import copyfile
import os
from os.path import join
from tqdm import tqdm
import gc
import multiprocessing as mp
from nnunet.dataset_conversion.utils import generate_dataset_json
from functools import partial
def preprocess_dataset(dataset_load_path, dataset_save_path, pool):
train_image_load_path = join(dataset_load_path, "imagesTr")
train_mask_load_path = join(dataset_load_path, "labelsTr")
test_image_load_path = join(dataset_load_path, "imagesTs")
ribfrac_train_info_1_path = join(dataset_load_path, "ribfrac-train-info-1.csv")
ribfrac_train_info_2_path = join(dataset_load_path, "ribfrac-train-info-2.csv")
ribfrac_val_info_path = join(dataset_load_path, "ribfrac-val-info.csv")
train_image_save_path = join(dataset_save_path, "imagesTr")
train_mask_save_path = join(dataset_save_path, "labelsTr")
test_image_save_path = join(dataset_save_path, "imagesTs")
Path(train_image_save_path).mkdir(parents=True, exist_ok=True)
Path(train_mask_save_path).mkdir(parents=True, exist_ok=True)
Path(test_image_save_path).mkdir(parents=True, exist_ok=True)
meta_data = preprocess_csv(ribfrac_train_info_1_path, ribfrac_train_info_2_path, ribfrac_val_info_path)
preprocess_train(train_image_load_path, train_mask_load_path, meta_data, dataset_save_path, pool)
preprocess_test(test_image_load_path, dataset_save_path)
def preprocess_csv(ribfrac_train_info_1_path, ribfrac_train_info_2_path, ribfrac_val_info_path):
print("Processing csv...")
meta_data = defaultdict(list)
for csv_path in [ribfrac_train_info_1_path, ribfrac_train_info_2_path, ribfrac_val_info_path]:
df = pd.read_csv(csv_path)
for index, row in df.iterrows():
name = row["public_id"]
instance = row["label_id"]
class_label = row["label_code"]
meta_data[name].append({"instance": instance, "class_label": class_label})
print("Finished csv processing.")
return meta_data
def preprocess_train(image_path, mask_path, meta_data, save_path, pool):
print("Processing train data...")
pool.map(partial(preprocess_train_single, image_path=image_path, mask_path=mask_path, meta_data=meta_data, save_path=save_path), meta_data.keys())
print("Finished processing train data.")
def preprocess_train_single(name, image_path, mask_path, meta_data, save_path):
id = int(name[7:])
image, _, _, _ = load_image(join(image_path, name + "-image.nii.gz"), return_meta=True, is_seg=False)
instance_seg_mask, spacing, _, _ = load_image(join(mask_path, name + "-label.nii.gz"), return_meta=True, is_seg=True)
semantic_seg_mask = np.zeros_like(instance_seg_mask, dtype=int)
for entry in meta_data[name]:
semantic_seg_mask[instance_seg_mask == entry["instance"]] = entry["class_label"]
semantic_seg_mask[semantic_seg_mask == -1] = 5 # Set ignore label to 5
save_image(join(save_path, "imagesTr/RibFrac_" + str(id).zfill(4) + "_0000.nii.gz"), image, spacing=spacing, is_seg=False)
save_image(join(save_path, "labelsTr/RibFrac_" + str(id).zfill(4) + ".nii.gz"), semantic_seg_mask, spacing=spacing, is_seg=True)
def preprocess_test(load_test_image_dir, save_path):
print("Processing test data...")
filenames = load_filenames(load_test_image_dir)
for filename in tqdm(filenames):
id = int(os.path.basename(filename)[8:-13])
copyfile(filename, join(save_path, "imagesTs/RibFrac_" + str(id).zfill(4) + "_0000.nii.gz"))
print("Finished processing test data.")
def load_filenames(img_dir, extensions=None):
_img_dir = fix_path(img_dir)
img_filenames = []
for file in os.listdir(_img_dir):
if extensions is None or file.endswith(extensions):
img_filenames.append(_img_dir + file)
img_filenames = np.asarray(img_filenames)
img_filenames = natsorted(img_filenames)
return img_filenames
def fix_path(path):
if path[-1] != "/":
path += "/"
return path
def load_image(filepath, return_meta=False, is_seg=False):
image = sitk.ReadImage(filepath)
image_np = sitk.GetArrayFromImage(image)
if is_seg:
image_np = np.rint(image_np)
image_np = image_np.astype(np.int8) # In special cases segmentations can contain negative labels, so no np.uint8
if not return_meta:
return image_np
else:
spacing = image.GetSpacing()
keys = image.GetMetaDataKeys()
header = {key:image.GetMetaData(key) for key in keys}
affine = None # How do I get the affine transform with SimpleITK? With NiBabel it is just image.affine
return image_np, spacing, affine, header
def save_image(filename, image, spacing=None, affine=None, header=None, is_seg=False, mp_pool=None, free_mem=False):
if is_seg:
image = np.rint(image)
image = image.astype(np.int8) # In special cases segmentations can contain negative labels, so no np.uint8
image = sitk.GetImageFromArray(image)
if header is not None:
[image.SetMetaData(key, header[key]) for key in header.keys()]
if spacing is not None:
image.SetSpacing(spacing)
if affine is not None:
pass # How do I set the affine transform with SimpleITK? With NiBabel it is just nib.Nifti1Image(img, affine=affine, header=header)
if mp_pool is None:
sitk.WriteImage(image, filename)
if free_mem:
del image
gc.collect()
else:
mp_pool.apply_async(_save, args=(filename, image, free_mem,))
if free_mem:
del image
gc.collect()
def _save(filename, image, free_mem):
sitk.WriteImage(image, filename)
if free_mem:
del image
gc.collect()
if __name__ == "__main__":
# Note: Due to a bug in SimpleITK 2.1.x a version of SimpleITK < 2.1.0 is required for loading images. Further, we can't copy the images and masks, but have to load them and resample both to the same spacing.
# Conversion instructions:
# 1. All sets, parts and CSVs need to be downloaded from https://ribfrac.grand-challenge.org/dataset/
# 2. Unzip ribfrac-train-images-1.zip (will be unzipped as Part1) and ribfrac-train-images-2.zip (will be unzipped as Part2), move content from Part2 to Part1 and rename the folder to imagesTr
# 3. Unzip ribfrac-train-labels-1.zip (will be unzipped as Part1) and ribfrac-train-labels-2.zip (will be unzipped as Part2), move content from Part2 to Part1 and rename the folder to labelsTr
# 4. Unzip ribfrac-val-images.zip and add content to imagesTr, repeat with ribfrac-val-labels.zip
# 5. Unzip ribfrac-test-images.zip and rename it to imagesTs
pool = mp.Pool(processes=20)
dataset_load_path = "/home/k539i/Documents/network_drives/E132-Projekte/Projects/2021_Gotkowski_RibFrac_RibSeg/original/RibFrac/"
dataset_save_path = "/home/k539i/Documents/network_drives/E132-Projekte/Projects/2021_Gotkowski_RibFrac_RibSeg/preprocessed/Task154_RibFrac_multi_label/"
preprocess_dataset(dataset_load_path, dataset_save_path, pool)
print("Still saving images in background...")
pool.close()
pool.join()
print("All tasks finished.")
labels = {0: "background", 1: "displaced_rib_fracture", 2: "non_displaced_rib_fracture", 3: "buckle_rib_fracture", 4: "segmental_rib_fracture", 5: "unidentified_rib_fracture"}
generate_dataset_json(join(dataset_save_path, 'dataset.json'), join(dataset_save_path, "imagesTr"), None, ('CT',), labels, "Task154_RibFrac_multi_label")
|