Hi Bertrand,
absolutely. I put together a minimum working example by tweaking the Haxby multiclass tutorial.
Please see my questions (#?#) in the code…
### Imports
from nilearn import datasets
import numpy as np
import pandas as pd
import seaborn as sns
from nilearn.image import load_img, index_img
from nilearn.decoding import Decoder
from sklearn.model_selection import LeaveOneGroupOut
from nilearn.input_data import NiftiMasker
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix
from joblib import Parallel, delayed
### Load and prep haxby data
haxby_dataset = datasets.fetch_haxby()
func_filename = haxby_dataset.func[0]
mask_filename = haxby_dataset.mask
labels = pd.read_csv(haxby_dataset.session_target[0], sep=" ")
y = labels['labels']
session = labels['chunks']
non_rest = (y != 'rest')
y = y[non_rest]
session = session[non_rest]
func_file = index_img(load_img(func_filename), non_rest)
unique_conditions, order = np.unique(y, return_index=True)
unique_conditions = unique_conditions[np.argsort(order)]
### Nilearn Decoder
cv = LeaveOneGroupOut()
decoder = Decoder(estimator='svc', mask=mask_filename, standardize=True, cv=cv, n_jobs=-1, verbose=3)
decoder.fit(func_file, y, groups=session)
#?# Does the above (decoder.cv_scores_) actually compute MULTICLASS (OvR) scores?
# Visualize results
decoder_scores = pd.DataFrame(decoder.cv_scores_)
decoder_scores['Average'] = decoder_scores.mean(axis=1)
decoder_scores = decoder_scores.reset_index(drop=True)
sns.barplot(x='variable', y='value', data=decoder_scores.melt())
### Sklearn
nifti_masker = NiftiMasker(mask_img=mask_filename, standardize=True)
X = nifti_masker.fit_transform(func_filename)
# Remove the "rest" condition
X = X[non_rest]
session = session[non_rest]
svc_ova = OneVsRestClassifier(Pipeline([
('svc', SVC(kernel='linear'))
]))
### Function to fit and predict on fold with Sklearn Model, returns multiclass accuracies
def cv_sklearn(train, test):
X_train, y_train = X[train], y.iloc[train]
print(train, test)
svc_ova.fit(X_train, y_train)
X_test, y_test = X[test], y.iloc[test]
y_pred = svc_ova.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels=unique_conditions, normalize='true').diagonal()
return cm
cms = Parallel(n_jobs=12)(delayed(cv_sklearn)(train, test) for (train, test) in cv.split(X,y,session))
### Visualize Sklearn Multiclass scores
sklearn_accs = pd.DataFrame(cms)
sklearn_accs.columns = unique_conditions
sklearn_accs['Average'] = sklearn_accs.mean(axis=1)
sklearn_accs = sklearn_accs.reset_index(drop=True)
sns.barplot(x='variable', y='value', data=sklearn_accs.melt())
#?# Why are these scores much lower than those from the Decoder object?
### Likewise:
cv_scores_ova = cross_val_score(svc_ova, X, y, session, cv=cv, verbose=3, n_jobs=-1)
print('OvA:', cv_scores_ova.mean())
#?# Why is this value much lower than the "average" from the Decoder?
Thanks much for any help here. I mainly want to know which pipeline to use/trust when doing multiclass prediction.