"""Data fetcher module. Do not move this file from `data` folder.
The data fetcher function names match the directory structure of the test data.
For example, to load data in the `data/dwi/ge` directory, there should be a
fetcher function named `dwi_ge`.
"""
import os
import json
import numpy as np
import nibabel as nib
from dipy.io import read_bvals_bvecs
DIR_DATA = os.path.dirname(os.path.realpath(__file__))
[docs]def get_filepaths(directory, expected_filenames):
"""Get filepaths in directory; check their names match expected_filenames
Parameters
----------
directory : str
full path to directory
expected_filenames : list
list of strings with expected file names
Returns
-------
list
list of strings with full paths to files (sorted alphabetically)
"""
# Ensure expected_filenames is sorted alphabetically
expected_filenames = sorted(expected_filenames)
# Get filenames in directory ensuring alphabetical order
filenames = sorted(os.listdir(directory))
# Ignore "__init__.py" files
if "__init__.py" in filenames:
filenames.remove("__init__.py")
# Check filenames match expected_filenames
not_match_msg = (f"Expected files in {directory}:\n{expected_filenames}\n"
f"This doesn't match the found files:\n{filenames}")
assert (filenames == expected_filenames), not_match_msg
# Make list of file paths
filepaths = []
for filename in filenames:
filepaths.append(os.path.join(directory, filename))
return filepaths
[docs]def dwi_ge():
"""Fetches dwi/ge dataset
Returns
-------
numpy.ndarray
image data
numpy.ndarray
affine matrix for image data
numpy.ndarray
array of b-values
numpy.ndarray
array of b-vectors
"""
# Initialise hard-coded list of file names that are the expected files
# in this test dataset. If the actual files in the directory don't match
# this list this means that the test dataset has been corrupted.
expected_filenames = ['00014__Cor_DWI_RT.bval',
'00014__Cor_DWI_RT.bvec',
'00014__Cor_DWI_RT.json',
'00014__Cor_DWI_RT.nii.gz',
'box_01.nii.gz',
'box_02.nii.gz',
'box_check.png']
# Initialise path to dwi/ge
dir_dwi_ge = os.path.join(DIR_DATA, "dwi", "ge")
# Get filepaths in directory and check their names match expected_filenames
filepaths = get_filepaths(dir_dwi_ge, expected_filenames)
# Read bvals, bvecs, and DWI data into numpy arrays
bval_path = filepaths[0]
bvec_path = filepaths[1]
nii_path = filepaths[3]
bvals, bvecs = read_bvals_bvecs(bval_path, bvec_path)
nii = nib.load(nii_path)
data = nii.get_fdata()
affine = nii.affine
return data, affine, bvals, bvecs
[docs]def r2star_ge():
"""Fetches r2star/ge dataset
Returns
-------
numpy.ndarray
image data
numpy.ndarray
affine matrix for image data
numpy.ndarray
array of echo times, in seconds
"""
# Initialise hard-coded list of file names that are the expected files
# in this test dataset. If the actual files in the directory don't match
# this list this means that the test dataset has been corrupted.
# Note that these file names are sorted alphabetically and not sorted by
# increasing echo time. The sort by echo time will be done later below.
expected_filenames = ['00016__InPhase_Cor_R2_Mapping_BH_+_phase_e1.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e1.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e10.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e10.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e11.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e11.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e12.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e12.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e2.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e2.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e3.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e3.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e4.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e4.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e5.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e5.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e6.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e6.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e7.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e7.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e8.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e8.nii.gz',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e9.json',
'00016__InPhase_Cor_R2_Mapping_BH_+_phase_e9.nii.gz']
# Initialise path to r2star/ge
dir_r2star_ge = os.path.join(DIR_DATA, "r2star", "ge")
# Get filepaths in directory and check their names match expected_filenames
filepaths = get_filepaths(dir_r2star_ge, expected_filenames)
# Load magnitude data and corresponding echo times (in the orig)
image = []
echo_list = []
for filepath in filepaths:
if filepath.endswith(".nii.gz"):
# Load NIfTI and only save the magnitude data (index 0)
data = nib.load(filepath)
image.append(data.get_fdata()[..., 0])
elif filepath.endswith(".json"):
# Retrieve list of echo times in the original order
with open(filepath, 'r') as json_file:
hdr = json.load(json_file)
echo_list.append(hdr['EchoTime'])
# Move echo dimension to 4th dimension
image = np.moveaxis(np.array(image), 0, -1)
echo_list = np.array(echo_list)
# Sort by increasing echo time
sort_idxs = np.argsort(echo_list)
echo_list = echo_list[sort_idxs]
image = image[..., sort_idxs]
return image, data.affine, echo_list
[docs]def r2star_siemens():
"""Fetches r2star/siemens dataset
Returns
-------
numpy.ndarray
image data
numpy.ndarray
affine matrix for image data
numpy.ndarray
array of echo times, in seconds
"""
# Initialise hard-coded list of file names that are the expected files
# in this test dataset. If the actual files in the directory don't match
# this list this means that the test dataset has been corrupted.
# Note that these file names are sorted alphabetically and not sorted by
# increasing echo time. The sort by echo time will be done later below.
expected_filenames = ['00025__bh3x_r2star_inphase_volume_e1.json',
'00025__bh3x_r2star_inphase_volume_e1.nii.gz',
'00025__bh3x_r2star_inphase_volume_e10.json',
'00025__bh3x_r2star_inphase_volume_e10.nii.gz',
'00025__bh3x_r2star_inphase_volume_e11.json',
'00025__bh3x_r2star_inphase_volume_e11.nii.gz',
'00025__bh3x_r2star_inphase_volume_e12.json',
'00025__bh3x_r2star_inphase_volume_e12.nii.gz',
'00025__bh3x_r2star_inphase_volume_e2.json',
'00025__bh3x_r2star_inphase_volume_e2.nii.gz',
'00025__bh3x_r2star_inphase_volume_e3.json',
'00025__bh3x_r2star_inphase_volume_e3.nii.gz',
'00025__bh3x_r2star_inphase_volume_e4.json',
'00025__bh3x_r2star_inphase_volume_e4.nii.gz',
'00025__bh3x_r2star_inphase_volume_e5.json',
'00025__bh3x_r2star_inphase_volume_e5.nii.gz',
'00025__bh3x_r2star_inphase_volume_e6.json',
'00025__bh3x_r2star_inphase_volume_e6.nii.gz',
'00025__bh3x_r2star_inphase_volume_e7.json',
'00025__bh3x_r2star_inphase_volume_e7.nii.gz',
'00025__bh3x_r2star_inphase_volume_e8.json',
'00025__bh3x_r2star_inphase_volume_e8.nii.gz',
'00025__bh3x_r2star_inphase_volume_e9.json',
'00025__bh3x_r2star_inphase_volume_e9.nii.gz']
# Initialise path to r2star/siemens
dir_r2star_siemens = os.path.join(DIR_DATA, "r2star", "siemens")
# Get filepaths in directory and check their names match expected_filenames
filepaths = get_filepaths(dir_r2star_siemens, expected_filenames)
# Load magnitude data and corresponding echo times (in the orig)
image = []
echo_list = []
for filepath in filepaths:
if filepath.endswith(".nii.gz"):
# Load NIfTI and only save the magnitude data (index 0)
data = nib.load(filepath)
image.append(data.get_fdata())
elif filepath.endswith(".json"):
# Retrieve list of echo times in the original order
with open(filepath, 'r') as json_file:
hdr = json.load(json_file)
echo_list.append(hdr['EchoTime'])
# Move echo dimension to 4th dimension
image = np.moveaxis(np.array(image), 0, -1)
echo_list = np.array(echo_list)
# Sort by increasing echo time
sort_idxs = np.argsort(echo_list)
echo_list = echo_list[sort_idxs]
image = image[:, :, :, sort_idxs]
return image, data.affine, echo_list
[docs]def r2star_philips():
"""Fetches r2star/philips dataset
Returns
-------
numpy.ndarray
image data
numpy.ndarray
affine matrix for image data
numpy.ndarray
array of echo times, in seconds
"""
# Initialise hard-coded list of file names that are the expected files
# in this test dataset. If the actual files in the directory don't match
# this list this means that the test dataset has been corrupted.
# Note that these file names are sorted alphabetically and not sorted by
# increasing echo time. The sort by echo time will be done later below.
expected_filenames = ['01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e1.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e1.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e10.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e10.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e11.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e11.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e12.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e12.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e13.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e13.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e3.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e3.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e4.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e4.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e5.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e5.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e6.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e6.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e7.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e7.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e8.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e8.nii.gz',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e9.json',
'01401__Kidney_T2star_m-FFE_3x3x5_SPIR_volume_inphase_e9.nii.gz']
# Initialise path to r2star/philips
dir_r2star_philips = os.path.join(DIR_DATA, "r2star", "philips")
# Get filepaths in directory and check their names match expected_filenames
filepaths = get_filepaths(dir_r2star_philips, expected_filenames)
# Load magnitude data and corresponding echo times (in the orig)
image = []
echo_list = []
for filepath in filepaths:
if filepath.endswith(".nii.gz"):
# Load NIfTI and only save the magnitude data (index 0)
data = nib.load(filepath)
image.append(data.get_fdata())
elif filepath.endswith(".json"):
# Retrieve list of echo times in the original order
with open(filepath, 'r') as json_file:
hdr = json.load(json_file)
echo_list.append(hdr["EchoTime"])
# Move echo dimension to 4th dimension
image = np.moveaxis(np.array(image), 0, -1)
echo_list = np.array(echo_list)
# Sort by increasing echo time
sort_idxs = np.argsort(echo_list)
echo_list = echo_list[sort_idxs]
image = image[:, :, :, sort_idxs]
return image, data.affine, echo_list
[docs]def b0_ge():
"""Fetches b0/ge dataset
Returns
-------
numpy.ndarray
image data - Magnitude
numpy.ndarray
image data - Phase
numpy.ndarray
affine matrix for image data
numpy.ndarray
array of echo times, in seconds
"""
# Initialise hard-coded list of file names that are the expected files
# in this test dataset. If the actual files in the directory don't match
# this list this means that the test dataset has been corrupted.
# Note that these file names are sorted alphabetically and not sorted by
# increasing echo time. The sort by echo time will be done later below.
expected_filenames = ['00009__3D_B0_map_VOL_e1.json',
'00009__3D_B0_map_VOL_e1.nii.gz',
'00009__3D_B0_map_VOL_e2.json',
'00009__3D_B0_map_VOL_e2.nii.gz']
# Initialise path to b0/ge
dir_b0_ge = os.path.join(DIR_DATA, "b0", "ge")
# Get filepaths in directory and check their names match expected_filenames
filepaths = get_filepaths(dir_b0_ge, expected_filenames)
# Load magnitude, real and imaginary data and corresponding echo times
magnitude = []
real = []
imaginary = []
echo_list = []
for filepath in filepaths:
if filepath.endswith(".nii.gz"):
# Load NIfTI and save the magnitude data (index 0)
data = nib.load(filepath)
magnitude.append(data.get_fdata()[..., 0])
# Save the real data (index 1) - I STILL NEED TO CONFIRM!
real.append(data.get_fdata()[..., 1])
# Save the imaginary data (index 2) - I STILL NEED TO CONFIRM!
imaginary.append(data.get_fdata()[..., 2])
elif filepath.endswith(".json"):
# Retrieve list of echo times in the original order
with open(filepath, 'r') as json_file:
hdr = json.load(json_file)
echo_list.append(hdr['EchoTime'])
# Move echo dimension to 4th dimension
magnitude = np.moveaxis(np.array(magnitude), 0, -1)
real = np.moveaxis(np.array(real), 0, -1)
imaginary = np.moveaxis(np.array(imaginary), 0, -1)
# Calculate Phase image => tan-1(Im/Re)
# np.negative is used to change the sign - as discussed with Andy Priest
phase = np.negative(np.arctan2(imaginary, real))
echo_list = np.array(echo_list)
# Sort by increasing echo time
sort_idxs = np.argsort(echo_list)
echo_list = echo_list[sort_idxs]
magnitude = magnitude[..., sort_idxs]
phase = phase[..., sort_idxs]
return magnitude, phase, data.affine, echo_list
def _load_b0_siemens_philips(filepaths):
"""General function to retrieve siemens b0 data from list of filepaths
Returns
-------
numpy.ndarray
image data - Magnitude
numpy.ndarray
image data - Phase
numpy.ndarray
affine matrix for image data
numpy.ndarray
array of echo times, in seconds
"""
# Load magnitude, real and imaginary data and corresponding echo times
data = []
affines = []
image_types = []
echo_times = []
for filepath in filepaths:
if filepath.endswith(".nii.gz"):
# Load data in NIfTI files
nii = nib.load(filepath)
data.append(nii.get_fdata())
affines.append(nii.affine)
# Load necessary information from corresponding .json files
json_path = filepath.replace(".nii.gz", ".json")
with open(json_path, 'r') as json_file:
hdr = json.load(json_file)
image_types.append(hdr['ImageType'])
echo_times.append(hdr['EchoTime'])
# Sort by increasing echo time
sort_idxs = np.argsort(echo_times)
data = np.array([data[i] for i in sort_idxs])
echo_times = np.array([echo_times[i] for i in sort_idxs])
image_types = [image_types[i] for i in sort_idxs]
# Move measurements (time) dimension to 4th dimension
data = np.moveaxis(data, 0, -1)
# Separate magnitude and phase images
magnitude_idxs = ["M" in i for i in image_types]
phase_idxs = ["P" in i for i in image_types]
magnitude = data[..., magnitude_idxs]
phase = data[..., phase_idxs]
echo_times_magnitude = echo_times[magnitude_idxs]
echo_times_phase = echo_times[phase_idxs]
# Assign unique echo times for output
echo_times_are_equal = (echo_times_magnitude == echo_times_phase).all()
if echo_times_are_equal:
echo_times = echo_times_magnitude
else:
raise ValueError("Magnitude and phase echo times must be equal")
# If all affines are equal, initialise the affine for output
affines_are_equal = (np.array([i == affines[0] for i in affines])).all()
if affines_are_equal:
affine = affines[0]
else:
raise ValueError("Affine matrices of input data are not all equal")
return magnitude, phase, affine, echo_times
[docs]def b0_siemens(dataset_id):
"""Fetches b0/siemens_{dataset_id} dataset
dataset_id : int
Number of the dataset to load:
- dataset_id = 1 to load "b0\siemens_1"
- dataset_id = 2 to load "b0\siemens_2"
Returns
-------
See outputs of _load_b0_siemens_philips
"""
POSSIBLE_DATASET_IDS = [1, 2]
# Initialise hard-coded list of file names that are the expected files
# in the test dataset. If the actual files in the directory don't match
# this list this means that the test dataset has been corrupted.
# Note these file names are sorted alphabetically and may not be sorted
# by increasing echo time. The sort by echo time will be done later below.
if dataset_id == 1:
expected_filenames = ['00010__bh_b0map_3D_default_e1.json',
'00010__bh_b0map_3D_default_e1.nii.gz',
'00010__bh_b0map_3D_default_e2.json',
'00010__bh_b0map_3D_default_e2.nii.gz',
'00011__bh_b0map_3D_default_e1.json',
'00011__bh_b0map_3D_default_e1.nii.gz',
'00011__bh_b0map_3D_default_e2.json',
'00011__bh_b0map_3D_default_e2.nii.gz']
elif dataset_id == 2:
expected_filenames = ['00044__bh_b0map_fa3_default_e1.json',
'00044__bh_b0map_fa3_default_e1.nii.gz',
'00044__bh_b0map_fa3_default_e2.json',
'00044__bh_b0map_fa3_default_e2.nii.gz',
'00045__bh_b0map_fa3_default_e1.json',
'00045__bh_b0map_fa3_default_e1.nii.gz',
'00045__bh_b0map_fa3_default_e2.json',
'00045__bh_b0map_fa3_default_e2.nii.gz']
else:
error_msg = f"`dataset_id` must be one of {POSSIBLE_DATASET_IDS}"
raise ValueError(error_msg)
# Initialise path to b0/siemens_{dataset_id}
dir_b0_siemens = os.path.join(DIR_DATA, "b0", "siemens" + f"_{dataset_id}")
# Get filepaths in directory and check their names match expected_filenames
filepaths = get_filepaths(dir_b0_siemens, expected_filenames)
# Load data
magnitude, phase, affine, echo_times = _load_b0_siemens_philips(filepaths)
return magnitude, phase, affine, echo_times
[docs]def b0_philips(dataset_id):
"""Fetches b0/philips_{dataset_id} dataset
dataset_id : int
Number of the dataset to load:
- dataset_id = 1 to load "b0\philips_1"
- dataset_id = 2 to load "b0\philips_2"
Returns
-------
See outputs of _load_b0_siemens_philips
"""
POSSIBLE_DATASET_IDS = [1, 2]
# Initialise hard-coded list of file names that are the expected files
# in the test dataset. If the actual files in the directory don't match
# this list this means that the test dataset has been corrupted.
# Note these file names are sorted alphabetically and may not be sorted
# by increasing echo time. The sort by echo time will be done later below.
if dataset_id == 1:
expected_filenames = ['00801__B0_map_expiration_default_e1.json',
'00801__B0_map_expiration_default_e1.nii.gz',
'00801__B0_map_expiration_default_e2_real.json',
'00801__B0_map_expiration_default_e2_real.nii.gz']
elif dataset_id == 2:
expected_filenames = ['01401__B0_map_expiration_volume_2DMS_product_e1.json',
'01401__B0_map_expiration_volume_2DMS_product_e1.nii.gz',
'01401__B0_map_expiration_volume_2DMS_product_e1_ph.json',
'01401__B0_map_expiration_volume_2DMS_product_e1_ph.nii.gz',
'01401__B0_map_expiration_volume_2DMS_product_e2.json',
'01401__B0_map_expiration_volume_2DMS_product_e2.nii.gz',
'01401__B0_map_expiration_volume_2DMS_product_e2_ph.json',
'01401__B0_map_expiration_volume_2DMS_product_e2_ph.nii.gz']
else:
error_msg = f"`dataset_id` must be one of {POSSIBLE_DATASET_IDS}"
raise ValueError(error_msg)
# Initialise path to b0/philips_{dataset_id}
dir_b0_philips = os.path.join(DIR_DATA, "b0", "philips" + f"_{dataset_id}")
# Get filepaths in directory and check their names match expected_filenames
filepaths = get_filepaths(dir_b0_philips, expected_filenames)
# Load data
if dataset_id == 2:
magnitude, phase, affine, echo_times = _load_b0_siemens_philips(
filepaths)
elif dataset_id == 1:
error_msg = ("Functionality to read datasets where phase data was not "
"saved for both echoes separately is not implemented")
raise ValueError(error_msg)
return magnitude, phase, affine, echo_times