Preserve datatype and precision with nibabel

Abstract

How to make nibabel losslessly copy a NIfTI image preserving data type and intensity scaling parameters? In particular if the input uses an integer datatype and where scl_slope is not one?

Details

I want to process an OpenNeuro BIDS dataset where the images have been defaced and regions outside the scalp set to zero (to avoid ghosting of facial features). Many of these clinical scans were acquired with interpolation (resulting in artificially high resolution) and large fields of view. Susequent processing is much faster if I can first crop these images to remove exterior slices that are all zeros. I wrote a simple script to do this, but by default it wants to convert the data to float32, whereas I would like to preserve the int16 datatype as this is a lossless operation and I the goal is to reduce disk and ram usage.

A minimal example of my issue is the script below, which can be applied to the uint8 single_subj_T1.nii image that is supplied with SPM12. fslhd reports a scl_slope 0.003906 and scl_inter 0.000000, so even though the raw data is in the range 0…255, the scaled data returned by get_fdata is 0…1, so saving astype(nifti_img.get_data_dtype()) results in an image that is essentially 0s.

  • nibabel tells me that get_data() is deprecated, so it looks like we should try to use get_fdata()
  • Strangely, when describing the header, nibabel reports scl_slope : nan and scl_inter : nan
import nibabel as nib
import numpy as np
import os
import glob

def crop_nifti_image(input_path, output_path):
    try:
        # Load NIfTI file
        nifti_img = nib.load(input_path)
        print(nifti_img.header)
        data = nifti_img.get_fdata()
        affine = nifti_img.affine

        # Find non-zero indices along each axis
        non_zero_rows = np.any(data != 0, axis=(1, 2))
        non_zero_cols = np.any(data != 0, axis=(0, 2))
        non_zero_slices = np.any(data != 0, axis=(0, 1))

        # Determine cropping boundaries
        start_row, end_row = np.argmax(non_zero_rows), len(non_zero_rows) - np.argmax(np.flip(non_zero_rows))
        start_col, end_col = np.argmax(non_zero_cols), len(non_zero_cols) - np.argmax(np.flip(non_zero_cols))
        start_slice, end_slice = np.argmax(non_zero_slices), len(non_zero_slices) - np.argmax(np.flip(non_zero_slices))

        # Check if any voxels are removed
        if start_row == 0 and end_row == len(non_zero_rows) and \
           start_col == 0 and end_col == len(non_zero_cols) and \
           start_slice == 0 and end_slice == len(non_zero_slices):
            print("No voxels removed. Original file remains unchanged.")
            return

        # Crop data and adjust affine transformation
        cropped_data = data[start_row:end_row, start_col:end_col, start_slice:end_slice]
        cropped_affine = affine.copy()
        cropped_affine[:3, 3] += affine[:3, :3].dot([start_row, start_col, start_slice])

        # Save the cropped NIfTI file with the same data type as the original image
        cropped_img = nib.Nifti1Image(cropped_data.astype(nifti_img.get_data_dtype()), cropped_affine)
        nib.save(cropped_img, output_path)

        # Report the number of rows, columns, and slices removed
        rows_removed = len(non_zero_rows) - (end_row - start_row)
        cols_removed = len(non_zero_cols) - (end_col - start_col)
        slices_removed = len(non_zero_slices) - (end_slice - start_slice)
        print(f"Cropped image saved to {output_path}. Rows removed: {rows_removed}, Columns removed: {cols_removed}, Slices removed: {slices_removed}")

    except Exception as e:
        print(f"Error: {e}")

def crop_and_save_images(file_filter, output_folder):
    try:
        # Find files that match the filter
        matching_files = glob.glob(file_filter)
        
        if not matching_files:
            print("No files found matching the specified filter.")
            return
        print(f"{len(matching_files)} found")
        matching_files = sorted(matching_files)

        # Create the output folder if it doesn't exist
        os.makedirs(output_folder, exist_ok=True)

        for file_path in matching_files:
            # Generate output path in the specified output folder with the same filename stem
            file_name = os.path.basename(file_path)
            output_path = os.path.join(output_folder, file_name.replace('.nii', '_cropped.nii.gz'))
            # Apply the crop_nifti_image function and save the cropped image
            crop_nifti_image(file_path, output_path)

    except Exception as e:
        print(f"Error: {e}")

# Example usage:
#filename_filter = '/Users/chris/Desktop/Huge/sub-*_FLAIR.nii.gz'
#output_folder = '/Users/chris/Desktop/Smallish'
filename_filter = './single_subj_T1.nii'
output_folder = './'
crop_and_save_images(filename_filter, output_folder)

If I’m reading this correctly, the thing you want to do is provide unscaled data, a slope and an intercept, and just have nibabel make you an image. I don’t think there is an interface for that; img.dataobj will always be treated as having been appropriately scaled, so saving with scale factors will invert the scaling to create the on-disk array to save:

on_disk_array = np.asarray(
    (img.dataobj[:] - scl_inter) / scl_slope,
    dtype=img.get_data_dtype()
)

By default (when the scl_slope and scl_inter fields are nan), the scl_slope and scl_inter are calculated to utilize the full range.

If you want to specify them manually, you can retrieve the slope and intercepts of read images from img.dataobj and set them on the header of the target image: header.set_slope_inter(dataobj.slope, dataobj.inter).

If you just want to keep the on-disk dtype, and are okay with slightly changed slopes/intercepts if the min/max values of your data array change:

cropped_img = nb.Nifti1Image(cropped_data, cropped_affine, header=nifti_img.header)

If you want to keep the exact scale factors:

cropped_img = nb.Nifti1Image(cropped_data, cropped_affine, header=nifti_img.header)
cropped_img.header.set_slope_inter(nifti_img.dataobj.slope, nifti_img.dataobj.inter)

Just to give a bit of reasoning, having headers copied from one image to another hard-coding the scale factors seemed like a major footgun. You could end up with floating point numbers with pointless scale factors or values that can’t be encoded in the integer range with the given scale factors and so would need to be truncated or zeroed out.

Thus, while the img.dataobj preserves all the information needed to interpret the data block, the img.header has its slope and intercept blanked by default. If the target on-disk datatype requires scaling to encode the range of voxel values, the factors are calculated. If they are explicitly set, the caller is assumed to know what they’re doing.

TBH I find it unsatisfying, and would be interested to consider other approaches that also prevent silent data destruction.


To set the dtype to the original dtype, you have the following options:

# 1: Reuse the header to keep the dtype
cropped_img = nb.Nifti1Image(cropped_data, cropped_affine, header=nifti_img.header)
# 2: Specify the dtype directly (as of nibabel 4.0)
cropped_img = nb.Nifti1Image(cropped_data, cropped_affine, dtype=nifti_img.get_data_dtype())
# 3: Specify dtype after creation
cropped_img = nb.Nifti1Image(cropped_data, cropped_affine)
cropped_img.set_data_dtype(nifti_img.get_data_dtype())

cropped_data.astype(...) is an unsafe cast, so casting 0-1 float data to 'uint8' will truncate everything but 1s to 0.


One last note, we do have a way of slicing an image and making the appropriate adjustments to the affine:

cropped_img = nifti_img.slicer[start_row:end_row, start_col:end_col, start_slice:end_slice]

See Image Slicing.

@effigies thanks for your solution. In case anyone wants it, here is my code that implements the solution. There are two caveats:

  • It crops files in place, replacing originals with cropped files.
  • When running on some operating systems, it is important to pass the filter string in quotes, as the shell can autoexpand the wildcard *. So usage looks like this:
python crop.py './sub-*_FLAIR.nii.gz'

Here is the script:

import nibabel as nib
import numpy as np
import glob
import sys

def crop_nifti_image(filename):
    """
    Crop a NIfTI image by removing zero-valued voxels from the margins.

    Parameters:
        filename (str): Path to the input NIfTI file.

    Returns:
        None: The function saves the cropped image and prints relevant information.
    """
    try:
        # Load NIfTI file
        nifti_img = nib.load(filename)
        data = nifti_img.get_fdata()
        affine = nifti_img.affine

        # Find non-zero indices along each axis
        non_zero_rows = np.any(data != 0, axis=(1, 2))
        non_zero_cols = np.any(data != 0, axis=(0, 2))
        non_zero_slices = np.any(data != 0, axis=(0, 1))

        # Determine cropping boundaries
        start_row, end_row = np.argmax(non_zero_rows), len(non_zero_rows) - np.argmax(np.flip(non_zero_rows))
        start_col, end_col = np.argmax(non_zero_cols), len(non_zero_cols) - np.argmax(np.flip(non_zero_cols))
        start_slice, end_slice = np.argmax(non_zero_slices), len(non_zero_slices) - np.argmax(np.flip(non_zero_slices))

        # Check if any voxels are removed
        if start_row == 0 and end_row == len(non_zero_rows) and \
           start_col == 0 and end_col == len(non_zero_cols) and \
           start_slice == 0 and end_slice == len(non_zero_slices):
            print(f"No need to crop '{filename}'")
            return

        # Crop data and adjust affine transformation
        cropped_img = nifti_img.slicer[start_row:end_row, start_col:end_col, start_slice:end_slice]

        # Save the cropped NIfTI file with the same data type as the original image
        nib.save(cropped_img, filename)

        # Report the number of rows, columns, and slices removed
        rows_removed = len(non_zero_rows) - (end_row - start_row)
        cols_removed = len(non_zero_cols) - (end_col - start_col)
        slices_removed = len(non_zero_slices) - (end_slice - start_slice)

        print(f"Cropped {rows_removed}*{cols_removed}*{slices_removed} Rows*Cols*Slices Cropped from '{filename}'")

    except Exception as e:
        print(f"Error: {e}")

def crop_and_save_images(file_filter):
    """
    Crop and save NIfTI images based on a file filter.

    Parameters:
        file_filter (str): File filter (glob pattern) to match NIfTI files.

    Returns:
        None: The function prints information about the cropping process.
    """
    try:
        fnms = sorted(glob.glob(file_filter))
        fnms.sort()

        print(f"{len(fnms)} found matching {file_filter}")

        if len(fnms) < 1:
            return

        for fnm in fnms:
            crop_nifti_image(fnm)

    except Exception as e:
        print(f"Error: {e}")

if __name__ == '__main__':
    """
    Remove zero-filled voxels on the outside edges of NIfTI image(s).

    Parameters:
        in_filter (str): Filter to identify images to be cropped.
            n.b. surround with quotes to prevent shell wildcard expansion.

    Example:
        python crop.py './sub-*_FLAIR.nii.gz'
    """
    if len(sys.argv) < 2:
        print('Please provide filename filter')
        sys.exit()

    in_filter = sys.argv[1]
    crop_and_save_images(in_filter)

2 Likes

That looks like a good solution. In AFNI, there is the 3dAutobox command that does this:

3dAutobox -prefix mydset_ab.nii.gz -noclust mydset.nii.gz

The default uses clustering, which you may not want. There are options for adding zero padding too.