AutoSeg4ETICA / nnunet /dataset_conversion /Task035_ISBI_MSLesionSegmentationChallenge.py
Chris Xiao
upload files
c642393
# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import shutil
from collections import OrderedDict
import numpy as np
import SimpleITK as sitk
import multiprocessing
from batchgenerators.utilities.file_and_folder_operations import *
def convert_to_nii_gz(filename):
f = sitk.ReadImage(filename)
sitk.WriteImage(f, os.path.splitext(filename)[0] + ".nii.gz")
os.remove(filename)
def convert_for_submission(source_dir, target_dir):
files = subfiles(source_dir, suffix=".nii.gz", join=False)
maybe_mkdir_p(target_dir)
for f in files:
splitted = f.split("__")
case_id = int(splitted[1])
timestep = int(splitted[2][:-7])
t = join(target_dir, "test%02d_%02d_nnUNet.nii" % (case_id, timestep))
img = sitk.ReadImage(join(source_dir, f))
sitk.WriteImage(img, t)
if __name__ == "__main__":
# convert to nifti.gz
dirs = ['/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTr',
'/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs',
'/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/labelsTr']
p = multiprocessing.Pool(3)
for d in dirs:
nii_files = subfiles(d, suffix='.nii')
p.map(convert_to_nii_gz, nii_files)
p.close()
p.join()
def rename_files(folder):
all_files = subfiles(folder, join=False)
# there are max 14 patients per folder, starting with 1
for patientid in range(1, 15):
# there are certainly no more than 10 time steps per patient, starting with 1
for t in range(1, 10):
patient_files = [i for i in all_files if i.find("%02.0d_%02.0d_" % (patientid, t)) != -1]
if not len(patient_files) == 4:
continue
flair_file = [i for i in patient_files if i.endswith("_flair_pp.nii.gz")][0]
mprage_file = [i for i in patient_files if i.endswith("_mprage_pp.nii.gz")][0]
pd_file = [i for i in patient_files if i.endswith("_pd_pp.nii.gz")][0]
t2_file = [i for i in patient_files if i.endswith("_t2_pp.nii.gz")][0]
os.rename(join(folder, flair_file), join(folder, "case__%02.0d__%02.0d_0000.nii.gz" % (patientid, t)))
os.rename(join(folder, mprage_file), join(folder, "case__%02.0d__%02.0d_0001.nii.gz" % (patientid, t)))
os.rename(join(folder, pd_file), join(folder, "case__%02.0d__%02.0d_0002.nii.gz" % (patientid, t)))
os.rename(join(folder, t2_file), join(folder, "case__%02.0d__%02.0d_0003.nii.gz" % (patientid, t)))
for d in dirs[:-1]:
rename_files(d)
# now we have to deal with the training masks, we do it the quick and dirty way here by just creating copies of the
# training data
train_folder = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTr'
for patientid in range(1, 6):
for t in range(1, 6):
fnames_original = subfiles(train_folder, prefix="case__%02.0d__%02.0d" % (patientid, t), suffix=".nii.gz", sort=True)
for f in fnames_original:
for mask in [1, 2]:
fname_target = f[:-12] + "__mask%d" % mask + f[-12:]
shutil.copy(f, fname_target)
os.remove(f)
labels_folder = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/labelsTr'
for patientid in range(1, 6):
for t in range(1, 6):
for mask in [1, 2]:
f = join(labels_folder, "training%02d_%02d_mask%d.nii.gz" % (patientid, t, mask))
if isfile(f):
os.rename(f, join(labels_folder, "case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask)))
tr_files = []
for patientid in range(1, 6):
for t in range(1, 6):
for mask in [1, 2]:
if isfile(join(labels_folder, "case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask))):
tr_files.append("case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask))
ts_files = []
for patientid in range(1, 20):
for t in range(1, 20):
if isfile(join("/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs",
"case__%02.0d__%02.0d_0000.nii.gz" % (patientid, t))):
ts_files.append("case__%02.0d__%02.0d.nii.gz" % (patientid, t))
out_base = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/'
json_dict = OrderedDict()
json_dict['name'] = "ISBI_Lesion_Segmentation_Challenge_2015"
json_dict['description'] = "nothing"
json_dict['tensorImageSize'] = "4D"
json_dict['reference'] = "see challenge website"
json_dict['licence'] = "see challenge website"
json_dict['release'] = "0.0"
json_dict['modality'] = {
"0": "flair",
"1": "mprage",
"2": "pd",
"3": "t2"
}
json_dict['labels'] = {
"0": "background",
"1": "lesion"
}
json_dict['numTraining'] = len(subfiles(labels_folder))
json_dict['numTest'] = len(subfiles('/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs')) // 4
json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i[:-7], "label": "./labelsTr/%s.nii.gz" % i[:-7]} for i in
tr_files]
json_dict['test'] = ["./imagesTs/%s.nii.gz" % i[:-7] for i in ts_files]
save_json(json_dict, join(out_base, "dataset.json"))
case_identifiers = np.unique([i[:-12] for i in subfiles("/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task035_ISBILesionSegmentation/imagesTr", suffix='.nii.gz', join=False)])
splits = []
for f in range(5):
cases = [i for i in range(1, 6) if i != f+1]
splits.append(OrderedDict())
splits[-1]['val'] = np.array([i for i in case_identifiers if i.startswith("case__%02d__" % (f + 1))])
remaining = [i for i in case_identifiers if i not in splits[-1]['val']]
splits[-1]['train'] = np.array(remaining)
maybe_mkdir_p("/media/fabian/nnunet/Task035_ISBILesionSegmentation")
save_pickle(splits, join("/media/fabian/nnunet/Task035_ISBILesionSegmentation", "splits_final.pkl"))