import pandas as pd from tedana import workflows import json import os import re parser = argparse.ArgumentParser( description='Give me a path to your fmriprep output and number of cores to run') parser.add_argument('--fmriprepDir',default=None, type=str,help="This is the full path to your fmriprep dir") parser.add_argument('--bidsDir',default=None, type=str,help="This is the full path to your BIDS directory") parser.add_argument('--cores',default=None, type=int,help="This is the number of parallel jobs to run") args = parser.parse_args() #inputs prep_data = args.fmriprepDir bids_dir=args.bidsDir cores=args.cores # # Obtain Echo files #find the prefix and suffix to that echo # echo_images=[f for root, dirs, files in os.walk(prep_data) for f in files if ('_echo-' in f)& (f.endswith('_bold.nii.gz'))] #Make a list of filenames that match the prefix image_prefix_list=[re.search('(.*)_echo-',f).group(1) for f in echo_images] image_prefix_list=set(image_prefix_list) #Make a dataframe where C1 is Sub C2 is inputFiles and C3 is Echotimes data=[] for acq in image_prefix_list: #Use RegEx to find Sub sub="sub-"+re.search('sub-(.*)_task',acq).group(1) #Make a list of the json's w/ appropriate header info from BIDS ME_headerinfo=[os.path.join(root, f) for root, dirs, files in os.walk(bids_dir) for f in files if (acq in f)& (f.endswith('_bold.json'))] #Read Echo times out of header info and sort echo_times=[json.load(open(f))['EchoTime'] for f in ME_headerinfo] echo_times.sort() #Find images matching the appropriate acq prefix acq_image_files=[os.path.join(root, f) for root, dirs, files in os.walk(prep_data) for f in files if (acq in f) & ('echo' in f) & (f.endswith('_desc-preproc_bold.nii.gz'))] acq_image_files.sort() out_dir= os.path.join( os.path.abspath( os.path.dirname( prep_data )), "tedana/%s"%(sub)) print(prep_data,out_dir) data.append([sub,acq_image_files,echo_times,out_dir]) InData_df=pd.DataFrame(data=data,columns=['sub','EchoFiles','EchoTimes','OutDir']) args=zip(InData_df['sub'].tolist(), InData_df['EchoFiles'].tolist(), InData_df['EchoTimes'].tolist(), InData_df['OutDir'].tolist()) #Changes can be reasonably made to #fittype: 'loglin' is faster but maybe less accurate than 'curvefit' #tedpca:'mdl'Minimum Description Length returns the least number of components (default) and recommeded #'kic' Kullback-Leibler Information Criterion medium aggression # 'aic' Akaike Information Criterion least aggressive; i.e., returns the most components. #gscontrol: post-processing to remove spatially diffuse noise. options implemented here are... #global signal regression (GSR), minimum image regression (MIR), #But anatomical CompCor, Go Decomposition (GODEC), and robust PCA can also be used def RUN_Tedana(sub,EchoFiles,EchoTimes,OutDir): time.sleep(2) print(sub+'\n') if os.path.isdir(OutDir): print('Tedana was previously run for Sub %s remove directory if they need to be reanalyzed'%(sub)) else: workflows.tedana_workflow( EchoFiles, EchoTimes, out_dir=OutDir, prefix="sub-%s_task-sharedreward_space-Native"%(sub), fittype="curvefit", tedpca="kic", verbose=True, gscontrol=None) from multiprocessing import Pool pool = Pool(cores) results = pool.starmap(RUN_Tedana, args)