Calculating Jaccard Similarity with Background mask for Neurovault collection

neuroimaging
nilearn
fmri

#1

Hello,

I am working on detecting outliers in the Neurovault collection 1952 by computing the Jaccard index between individual images and the thresholded background mask. I have written the program as below-

import warnings
import numpy as np
from scipy import stats
from sklearn.decomposition import FastICA
from sklearn.random_projection import GaussianRandomProjection
from nilearn import datasets
from nilearn.image import smooth_img
from nilearn.datasets import load_mni152_brain_mask
from nilearn.input_data import NiftiMasker
from nilearn import plotting
from nilearn.datasets import fetch_neurovault, fetch_neurovault_ids
from nilearn.datasets.neurovault import basic_collection_terms, NotNull
import matplotlib.pyplot as plt
from nilearn import masking
from nilearn import image
from sklearn.metrics import jaccard_similarity_score
from sklearn.metrics.pairwise import cosine_similarity

print("Fetching Neurovault images; "
      "if you haven't downloaded any Neurovault data before "
      "this will take several minutes.")
nv_data = fetch_neurovault_ids(collection_ids=[1952])

images = nv_data['images']
avg_mask = masking.compute_background_mask(images)
thresholded_avg = image.threshold_img(avg_mask, threshold="50%")

print("\nReshaping and masking images.\n")

# Reshape and convert the images in collection to 2D mask.
with warnings.catch_warnings():
    warnings.simplefilter('ignore', UserWarning)
    warnings.simplefilter('ignore', DeprecationWarning)

    mask_img = load_mni152_brain_mask()
    masker = NiftiMasker(
        mask_img=mask_img, memory='nilearn_cache', memory_level=50)
    masker = masker.fit()

    X = []
    is_usable = np.ones((len(images),), dtype=bool)

    for index, image_path in enumerate(images):
        image = smooth_img(image_path, fwhm=None)
        try:
            X.append(masker.transform(image))
        except Exception as e:
            meta = nv_data['images_meta'][index]
            print("Failed to mask/reshape image: id: {0}; "
                  "name: '{1}'; collection: {2}; error: {3}".format(
                      meta.get('id'), meta.get('name'),
                      meta.get('collection_id'), e))
            is_usable[index] = False

X = np.vstack(X)

# Convert background mask to 2D array 
masker = NiftiMasker()
fmri_masked = masker.fit_transform(thresholded_avg)

print(fmri_masked.shape)
pexp = []

for i in range(X.shape[0]):
    pexp.append(jaccard_similarity_score(X[i, :].reshape(-1, 1).T, fmri_masked).flatten())
pexp = np.array(pexp)


print(pexp)

I would like to know if the approach I am taking with the code is correct.