Hi All,
I preprocessed my data but forgot to have it output QC images etc. I thought I’d just re-run the data pipeline on the same output directory after setting outdir_ingress: run: On
Yet, all outputs are being regenerated, (new modified dates, and regular processing times), instead of just the additions (QC images). I thought it wouldn’t do this, despite the working directory being removed.
Josh
%YAML 1.1
---
# CPAC Pipeline Configuration YAML file
# Version 1.8.7.dev1
#
# http://fcp-indi.github.io for more info.
#
# Tip: This file can be edited manually with a text editor for quick modifications.
pipeline_setup:
# Name for this pipeline configuration - useful for identification.
# This string will be sanitized and used in filepaths
pipeline_name: APPCore_2
output_directory:
# Quality control outputs
quality_control:
# Generate eXtensible Connectivity Pipeline-style quality control files
generate_xcpqc_files: On
# Generate quality control pages containing preprocessing and derivative outputs.
generate_quality_control_images: On
# Directory where C-PAC should write out processed data, logs, and crash reports.
# - If running in a container (Singularity/Docker), you can simply set this to an arbitrary
# name like '/outputs', and then map (-B/-v) your desired output directory to that label.
# - If running outside a container, this should be a full path to a directory.
path: /outputs
# (Optional) Path to a BIDS-Derivatives directory that already has outputs.
# - This option is intended to ingress already-existing resources from an output
# directory without writing new outputs back into the same directory.
# - If provided, C-PAC will ingress the already-computed outputs from this directory and
# continue the pipeline from where they leave off.
# - If left as 'None', C-PAC will ingress any already-computed outputs from the
# output directory you provide above in 'path' instead, the default behavior.
source_outputs_dir: None
# Set to True to make C-PAC ingress the outputs from the primary output directory if they
# exist, even if a source_outputs_dir is provided
# - Setting to False will pull from source_outputs_dir every time, over-writing any
# calculated outputs in the main output directory
# - C-PAC will still pull from source_outputs_dir if the main output directory is
# empty, however
pull_source_once: On
# Include extra versions and intermediate steps of functional preprocessing in the output directory.
write_func_outputs: Off
# Include extra outputs in the output directory that may be of interest when more information is needed.
write_debugging_outputs: Off
# Output directory format and structure.
# Options: default, ndmg
output_tree: default
system_config:
# Stop worklow execution on first crash?
fail_fast: Off
# Random seed used to fix the state of execution.
# If unset, each process uses its own default.
# If set, a `random.log` file will be generated logging the random seed and each node to which that seed was applied.
# If set to a positive integer (up to 2147483647), that integer will be used to seed each process that accepts a random seed.
# If set to 'random', a random positive integer (up to 2147483647) will be generated and that seed will be used to seed each process that accepts a random seed.
random_seed:
# Prior to running a pipeline C-PAC makes a rough estimate of a worst-case-scenario maximum concurrent memory usage with high-resoltion data, raising an exception describing the recommended minimum memory allocation for the given configuration.
# Turning this option off will allow pipelines to run without allocating the recommended minimum, allowing for more efficient runs at the risk of out-of-memory crashes (use at your own risk)
raise_insufficient: On
# A callback.log file from a previous run can be provided to estimate memory usage based on that run.
observed_usage:
# Path to callback log file with previously observed usage.
# Can be overridden with the commandline flag `--runtime_usage`.
callback_log: /mnt/Friar/rs/BIDS_DATA/derivatives/cpac1.8.6proc001/outputs/callback.log
# Percent. E.g., `buffer: 10` would estimate 1.1 * the observed memory usage from the callback log provided in "usage".
# Can be overridden with the commandline flag `--runtime_buffer`.
buffer: 10
# Select Off if you intend to run CPAC on a single machine.
# If set to On, CPAC will attempt to submit jobs through the job scheduler / resource manager selected below.
on_grid:
run: Off
# Sun Grid Engine (SGE), Portable Batch System (PBS), or Simple Linux Utility for Resource Management (SLURM).
# Only applies if you are running on a grid or compute cluster.
resource_manager: SGE
SGE:
# SGE Parallel Environment to use when running CPAC.
# Only applies when you are running on a grid or compute cluster using SGE.
parallel_environment: mpi_smp
# SGE Queue to use when running CPAC.
# Only applies when you are running on a grid or compute cluster using SGE.
queue: all.q
# The maximum amount of memory each participant's workflow can allocate.
# Use this to place an upper bound of memory usage.
# - Warning: 'Memory Per Participant' multiplied by 'Number of Participants to Run Simultaneously'
# must not be more than the total amount of RAM.
# - Conversely, using too little RAM can impede the speed of a pipeline run.
# - It is recommended that you set this to a value that when multiplied by
# 'Number of Participants to Run Simultaneously' is as much RAM you can safely allocate.
maximum_memory_per_participant: 10.1
# The maximum amount of cores (on a single machine) or slots on a node (on a cluster/grid)
# to allocate per participant.
# - Setting this above 1 will parallelize each participant's workflow where possible.
# If you wish to dedicate multiple cores to ANTS-based anatomical registration (below),
# this value must be equal or higher than the amount of cores provided to ANTS.
# - The maximum number of cores your run can possibly employ will be this setting multiplied
# by the number of participants set to run in parallel (the 'Number of Participants to Run
# Simultaneously' setting).
max_cores_per_participant: 4
# The number of cores to allocate to ANTS-based anatomical registration per participant.
# - Multiple cores can greatly speed up this preprocessing step.
# - This number cannot be greater than the number of cores per participant.
num_ants_threads: 4
# The number of cores to allocate to processes that use OpenMP.
num_OMP_threads: 4
# The number of participant workflows to run at the same time.
# - The maximum number of cores your run can possibly employ will be this setting
# multiplied by the number of cores dedicated to each participant (the 'Maximum Number of Cores Per Participant' setting).
num_participants_at_once: 1
# Full path to the FSL version to be used by CPAC.
# If you have specified an FSL path in your .bashrc file, this path will be set automatically.
FSLDIR: FSLDIR
working_directory:
# Directory where C-PAC should store temporary and intermediate files.
# - This directory must be saved if you wish to re-run your pipeline from where you left off (if not completed).
# - NOTE: As it stores all intermediate files, this directory can grow to become very
# large, especially for data with a large amount of TRs.
# - If running in a container (Singularity/Docker), you can simply set this to an arbitrary
# name like '/work', and then map (-B/-v) your desired output directory to that label.
# - If running outside a container, this should be a full path to a directory.
# - This can be written to '/tmp' if you do not intend to save your working directory.
path: /tmp
# Deletes the contents of the Working Directory after running.
# This saves disk space, but any additional preprocessing or analysis will have to be completely re-run.
remove_working_dir: On
log_directory:
# Whether to write log details of the pipeline run to the logging files.
run_logging: On
path: /outputs/logs
# Configuration options for logging visualizations of the workflow graph
graphviz:
# Configuration for a graphviz visualization of the entire workflow. See https://fcp-indi.github.io/docs/developer/nodes#CPAC.pipeline.nipype_pipeline_engine.Workflow.write_graph for details about the various options
entire_workflow:
# Whether to generate the graph visualization
generate: On
# Options: [orig, hierarchical, flat, exec, colored]
graph2use: [orig, hierarchical]
# Options: [svg, png]
format: [svg]
# The node name will be displayed in the form `nodename (package)` when On or `nodename.Class.package` when Off
simple_form: On
crash_log_directory:
# Directory where CPAC should write crash logs.
path: /outputs/crash
outdir_ingress:
run: On