import pandas as pd from tedana import workflows import json import os import re import argparse # Use argparse to pass arguments identify where bids and fmriprep info is information #arguments to put in the bash command parser = argparse.ArgumentParser( description='Give me a path to your fmriprep output and number of cores to run') parser.add_argument('--fmriprepDir',default=None, type=str,help="This is the full path to your fmriprep dir") parser.add_argument('--bidsDir',default=None, type=str,help="This is the full path to your BIDS directory") parser.add_argument('--cores',default=None, type=int,help="This is the number of parallel jobs to run") args = parser.parse_args() #inputs prep_data = args.fmriprepDir bids_dir=args.bidsDir cores=args.cores # # Obtain Echo files #find the prefix and suffix to that echo of all subdirectories# echo_images=[f for root, dirs, files in os.walk(prep_data) for f in files if ('_echo-' in f)& (f.endswith('_bold.nii.gz'))] #Make a list of filenames that match the prefix from before echo- image_prefix_list=[re.search('(.*)_echo-',f).group(1) for f in echo_images] #discard duplicates image_prefix_list=set(image_prefix_list) #Make a dataframe where C1 is Sub C2 is inputFiles and C3 is Echotimes data=[] for acq in image_prefix_list: #Use RegEx to find Sub (sub- + subno from re.search) #sub="sub-"+re.search('sub-(.*)_task',acq).group(1) sub = re.search('sub-[^_]+_ses-[^_]+', acq).group(0) run_prefix=acq #Make a list of the json's w/ appropriate header info from BIDS ME_headerinfo=[os.path.join(root, f) for root, dirs, files in os.walk(bids_dir) for f in files if (acq in f)& (f.endswith('_bold.json'))] #Read Echo times out of header info and sort echo_times = [json.load(open(f))['EchoTime'] for f in ME_headerinfo] # Current versions of TEDANA require TE in milliseconds #convert strign to floating number first echo_times= [float(x) for x in echo_times] echo_times= [1000*x for x in echo_times] echo_times.sort() # A quick test print warning if any(x < 1 for x in echo_times): print("Warning: Echo Times Maybe incorrectly indicated. \n The current script assumes values being read from _bold.json files contain echo times in seconds") #Find images matching the appropriate acq prefix acq_image_files=[os.path.join(root, f) for root, dirs, files in os.walk(prep_data) for f in files if (acq in f) & ('echo' in f) & (f.endswith('_desc-preproc_bold.nii.gz'))] acq_image_files.sort() out_dir= os.path.join( os.path.abspath( os.path.dirname( prep_data )), "tedana_AIC_new/%s"%(sub)) #os.makedirs(out_dir, exist_ok=True) print(run_prefix,prep_data,out_dir) #add all the info to data data.append([sub,acq_image_files,echo_times,out_dir,run_prefix]) #converts collected list into a dataframe InData_df=pd.DataFrame(data=data,columns=['sub','EchoFiles','EchoTimes','OutDir','RunPrefix']) args=zip(InData_df['sub'].tolist(), InData_df['EchoFiles'].tolist(), InData_df['EchoTimes'].tolist(), InData_df['OutDir'].tolist(), InData_df['RunPrefix'].tolist()) #Changes can be reasonably made to #fittype: 'loglin' is faster but maybe less accurate than 'curvefit' #tedpca:'mdl'Minimum Description Length returns the least number of components (default) and recommeded #'kic' Kullback-Leibler Information Criterion medium aggression # 'aic' Akaike Information Criterion least aggressive; i.e., returns the most components. #gscontrol: post-processing to remove spatially diffuse noise. options implemented here are... #global signal regression (GSR), minimum image regression (MIR), #But anatomical CompCor, Go Decomposition (GODEC), and robust PCA can also be used #define function to pass to multiprocess def RUN_Tedana(sub,EchoFiles,EchoTimes,OutDir,run_prefix): print(sub+'\n') expected_output_filename = f"{run_prefix}_desc-denoised_bold.nii.gz" expected_output_path = os.path.join(OutDir, expected_output_filename) # Check if the output directory exists AND the expected output file exists if os.path.isdir(OutDir) and os.path.exists(expected_output_path): print(f'Tedana analysis for Sub {sub} appears to be complete (found {expected_output_filename}). Skipping.') else: workflows.tedana_workflow( EchoFiles, EchoTimes, out_dir=OutDir, prefix="%s_space-Native"%(run_prefix), fittype="curvefit", tedpca="aic", verbose=True, gscontrol=None) from multiprocessing import Pool pool = Pool(cores) results = pool.starmap(RUN_Tedana, args)