'spm_mat_file' Level1Design error running nipype analysis

Hi everyone,

I am quite new to nipype, and more specifically spm.
I am trying to fit a GLM on neuroimaging data (BOLD5000 dataset, which is BIDS formatted dataset), taking into account some noise regressors computed by fMRIPrep pipeline (motions, CSF, Global Signal, WhiteMatter…). My code is highly inspired from the nipype tutorial example_1stlevel_analysis. Still, I cannot manage it to work since an error persists :

TraitError: The trait ‘spm_mat_file’ of a Level1DesignOutputSpec instance is an existing file name, but the path ‘/home/or/SPM.mat’ does not exist.

This error is quite strange because if I am not mistaken, the spm.Level1Design Module should be responsible for the creation of the SPM.mat design matrix, and it seems that it wants to use it, as if it was necessary for it to be created already… (Feel free to correct me if I’m wrong !) I have tested some tricks, like ‘touch’ it in my output_dir before running the pipeline, but the file is deleted once the pipeline is launched and the graph enabled.

I have found a similar topic here, but the solution found by the asker does not seem to cover my problem…

My code is linked to this topic, you can read it.
I use WSL2 and i’m working with VSCode. I have Python 3.6 installed on WSL2. My version of Nipype is 1.7.1.

### Author: Yohan BORNACHOT (YBT1)
### Date: 06/05/2022
### Purpose: Script for parametrical GLM (only Task vs Baseline condition) --> inspired by fmri_spm_glm.py and code from nipype tutorial, docs here : 
### https://nipype.readthedocs.io/en/latest/users/examples/fmri_nipy_glm.html
### https://miykael.github.io/nipype_tutorial/notebooks/example_1stlevel.html
### This code participate in building a uniform python pipeline for image reconstruction from fMRI data
### This code uses Nipype package that is an interface between SPM (which uses Matlab) and Python

## Requirements : WSL2


import os
from pathlib import Path
from nipype.interfaces import spm
import nipype.interfaces.io as nio  # Data i/o
import nipype.interfaces.utility as util  # utility
import nipype.pipeline.engine as pe  # pypeline engine
import nipype.algorithms.modelgen as model
import nipype.interfaces.matlab as mlab  # how to run matlab
from nipype.algorithms.misc import Gunzip # To unzip .gz files
from bids import BIDSLayout


def subject_info(subject_id):
    """ Get subject-specific information about the experimental paradigm (used by spm.SpecifyModel to generate an SPM design matrix)"""
    from nipype.interfaces.base import Bunch
    from pathlib import Path
    import pandas as pd
    print(f"Subject ID: {subject_id}")
    LINUX_PATH = True
    file_prefix = "/mnt/c" if LINUX_PATH else "C:"
    nb_sessions = 1
    nb_runs = 9
    
    # Adds subject information for each run (no session consideration here)
    condition_names = ['Presence_image'] # Only one condition to test because the only task is to show image to subjects 
                                         # (there would be several if taking class of objects into account)
    subjectinfo = []
    data_dir = Path(f"{file_prefix}/Users/YBT1/Documents/BOLD5000")
    for s in range(1, nb_sessions+1):
        for r in range(1, nb_runs+1):
            # Load info from events.tsv file
            file_dir = data_dir/f"{subject_id}/ses-{s:02}/func/"
            event_file_path = file_dir/f"{subject_id}_ses-{s:02}_task-5000scenes_run-{r:02}_events.tsv"
            event_info = pd.read_csv(event_file_path, sep="\t", usecols=['onset', 'duration'])
            onsets = event_info['onset'].tolist()
            durations = event_info['duration'].tolist()
            
            # Load regressors info from bold_confounds.tsv file
            regressor_file_path = data_dir/f"derivatives/fmriprep/{subject_id}/ses-{s:02}/func/{subject_id}_ses-{s:02}_task-5000scenes_run-{r:02}_bold_confounds.tsv"
            columns_of_interest = ['X', 'Y', 'Z', 'RotX', 'RotY', 'RotZ', 'GlobalSignal', 'CSF', 'WhiteMatter']
            regressors = pd.read_csv(regressor_file_path, sep="\t", usecols=columns_of_interest)
            
            regressors = [list(regressors[col]) for col in regressors.columns]
            
            # Insert run-related info in subject info
            subjectinfo.insert((s-1)*nb_runs+(r-1),
                               Bunch(conditions=condition_names,
                                     onsets=[onsets],
                                     durations=[durations],
                                     amplitudes=None,
                                     tmod=None,
                                     pmod=None,
                                     regressor_names=columns_of_interest,
                                     regressors=regressors))
    print("len(subjectinfo) = ", len(subjectinfo))
    return subjectinfo


if __name__ == "__main__":
    LINUX_PATH = True
    path_prefix = "/mnt/c" if LINUX_PATH else "C:"

    # Set the way matlab should be called
    mlab.MatlabCommand.set_default_matlab_cmd(f"{path_prefix}/Program\ Files/MATLAB/R2022a/bin/Matlab.exe -nodesktop -nosplash")
    mlab.MatlabCommand.set_default_paths(f"{path_prefix}/Users/YBT1/Documents/MATLAB/spm12/")
    if LINUX_PATH:
        spm.SPMCommand.set_mlab_paths(paths=f"{path_prefix}/Users/YBT1/Documents/MATLAB/spm12/")
    else:
        spm.SPMCommand.set_mlab_paths(paths=Path(f"{path_prefix}/Users/YBT1/Documents/MATLAB/spm12/"))
    
    # Init parameters
    data_dir = f"{path_prefix}/Users/YBT1/Documents/BOLD5000/derivatives/fmriprep/" # Data location
    bold5000_dir = f"{path_prefix}/Users/YBT1/Documents/BOLD5000/" # BOLD5000 dir
    # bold_dir = f"{path_prefix}/Users/YBT1/Documents/Reconstruction_pipeline/2-GLM1/bold_signals/" # Where to copy BOLD files
    output_dir = f"{path_prefix}/Users/YBT1/Documents/Reconstruction_pipeline/sub-CSI1/2-GLM1/results2/" # Output files location
    subject_list = ['sub-CSI1'] #, 'sub-CSI2', 'sub-CSI3', 'sub-CSI4'] # Specify the subject directories
    n_sessions = 1
    n_runs = 9
    session_list = [f"ses-{i:02}" for i in range(1,n_sessions+1)]
    run_list = [f"run-{i:02}" for i in range(1,n_runs+1)]
    
    print(f"session_list = {session_list}")
    print(f"run_list = {run_list}")
    
    # Creates a node that iterates over the subjects
    infosource = pe.Node(interface=util.IdentityInterface(fields=['subject_id']), name="infosource") #, 'session_id', 'run_id']), name="infosource")
    infosource.iterables = [('subject_id', subject_list)] #, ('session_id', session_list), ('run_id', run_list)]
    
    # SelectFiles - Get data and fill in info about data layer (alternative to DataGrabber)
    func_files = "{subject_id}/ses-01/func/{subject_id}_ses-01_task-5000scenes_run-0[1-9]_bold_space-T1w_preproc.nii.gz"
    template = {'func': func_files}
    selectfiles = pe.Node(
        interface=nio.SelectFiles(template, base_directory=data_dir, sort_filelist=True), 
        name="selectfiles")

    # Create a node that unzips input files (this is necessary for SPM to run)
    gunzipper = pe.MapNode(
        interface=Gunzip(), iterfield=['in_file'], 
        name='gunzipper')
    
    # Get Subject Info - get subject specific condition information
    getsubjectinfo = pe.Node(
        interface=util.Function(input_names=['subject_id'], 
                                output_names=['subject_info'], 
                                function=subject_info), 
        name='getsubjectinfo')

    # SpecifyModel - Generates SPM-specific Model
    modelspec = pe.Node(
        interface=model.SpecifySPMModel(concatenate_runs=False, 
                                        input_units='secs', 
                                        output_units='secs', 
                                        time_repetition=2., 
                                        high_pass_filter_cutoff=120),
        name="modelspec")

    # Level1Design - Generates an SPM design matrix
    level1design = pe.Node(
        interface=spm.Level1Design(bases={'hrf': {'derivs': [1, 0]}}, 
                                   timing_units='secs', 
                                   interscan_interval=2., 
                                   model_serial_correlations='FAST'), 
        name="level1design")

    # EstimateModel - estimate the parameters of the model
    level1estimate = pe.Node(
        interface=spm.EstimateModel(estimation_method={'Classical': 1}), 
        name="level1estimate")

    # EstimateContrast - estimates contrasts
    condition_names = ['Presence_image']
    cont01 = ('Task>Baseline', 'T', condition_names, [1.0])
    level1conest = pe.Node(
        interface=spm.EstimateContrast(contrasts = [cont01]),
        name="level1conest")

    # Define a DataSink Node where outputs should be stored
    # NOTE: The boolean parameterization = False ensures that the files are directly saved
    # in the datasink folder. Otherwise for each subject there would be another folder (e.g. "_subject_123") created.
    substitutions = [('_subject_id_', '')]
    datasink = pe.Node(
        interface=nio.DataSink(base_directory=output_dir,
                               parametrization=False,
                               substitutions = substitutions), 
        name="datasink")

    # Setup pipeline (enable connections between previously declared nodes)
    # Fields of the parent node in the workflow is connected to the corresponding field of its child node
    # For instance, here 'subject_id' field of the infosource node must match the 'subject_id' field of the selectfiles node
    l1pipeline = pe.Workflow(name="level1")
    l1pipeline.base_dir = output_dir
    # Connect up the 1st-level analysis components
    l1pipeline.connect([
        (infosource, selectfiles, [('subject_id', 'subject_id')]),
        (infosource, getsubjectinfo, [('subject_id', 'subject_id')]),
        (getsubjectinfo, modelspec, [('subject_info', 'subject_info')]),
        # (infosource, level1conest, [('contrasts', 'contrasts')]),
        (selectfiles, gunzipper, [('func', 'in_file')]),
        (gunzipper, modelspec, [('out_file', 'functional_runs')]),
        (modelspec, level1design, [('session_info', 'session_info')]),
        (level1design, level1estimate, [('spm_mat_file', 'spm_mat_file')]),
        (level1estimate, level1conest, [('spm_mat_file', 'spm_mat_file'), ('beta_images', 'beta_images'), ('residual_image', 'residual_image')]),
        (level1conest, datasink, [('spm_mat_file', '1stLevel.@spm_mat'), ('spmT_images', '1stLevel.@T'), ('con_images', '1stLevel.@con'), ('spmF_images', '1stLevel.@F'), ('ess_images', '1stLevel.@ess')])
    ])
    
    # Visualize the created workflow
    l1pipeline.write_graph(graph2use='flat', format='png', dotfilename=Path(output_dir)/'graph_flat.dot', simple_form=True)

    # Run the pipeline
    l1pipeline.run() # plugin_args={'n_procs': 10})

Any help would be highly appreciated ! Thanks in advance for your time.