import numpy as np  # A library for working with numerical data
from nilearn.maskers import NiftiMasker  # A class for masking and denoising fMRI data
import pandas as pd
from nilearn.masking import apply_mask, unmask  # Functions for (un)masking fMRI data

sub = 'MTL0002'
ses = 'FU84'
# Apply the mask to the data image to get a 2d array
# Files from fMRIPrep
data_file = f"/project/rrg-villens/dataset/PreventAD/mri/derivatives/fmriprep-20.2.7/fmriprep_w2n3/derivatives_APBv24_1_0/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-rest_run-01_space-MNI152NLin2009cAsym_res-2_trimmed_desc-preproc_bold.nii.gz"
mask_file = f"/project/rrg-villens/dataset/PreventAD/mri/derivatives/fmriprep-20.2.7/fmriprep_w2n3/derivatives_APBv24_1_0/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-rest_run-01_space-MNI152NLin2009cAsym_res-2_desc-brain_mask.nii.gz"
data = apply_mask(data_file, mask_file)

# Files from tedana (after running on fMRIPrepped data)
mixing_file = f"/project/rrg-villens/dataset/PreventAD/mri/derivatives/fmriprep-20.2.7/fmriprep_w2n3/derivatives_APBv24_1_0/tedana_trimmed/sub-{sub}_ses-{ses}/sub-{sub}_ses-{ses}_task-_space-Native_desc-ICA_mixing.tsv"
metrics_file = f"/project/rrg-villens/dataset/PreventAD/mri/derivatives/fmriprep-20.2.7/fmriprep_w2n3/derivatives_APBv24_1_0/tedana_trimmed/sub-{sub}_ses-{ses}/sub-{sub}_ses-{ses}_task-_space-Native_desc-tedana_metrics.tsv"
confounds_file = f"/project/rrg-villens/dataset/PreventAD/mri/derivatives/fmriprep-20.2.7/fmriprep_w2n3/derivatives_APBv24_1_0/sub-{sub}/ses-{ses}/func/sub-{sub}_ses-{ses}_task-rest_run-01_desc-confounds_timeseries.tsv"

# Load the mixing matrix
mixing_df = pd.read_table(mixing_file)  # Shape is time-by-components

# Load the component table
metrics_df = pd.read_table(metrics_file)
rejected_columns = metrics_df.loc[metrics_df["classification"] == "rejected", "Component"]
accepted_columns = metrics_df.loc[metrics_df["classification"] == "accepted", "Component"]

# Select "bad" components from the mixing matrix
rejected_components = mixing_df[rejected_columns].to_numpy()
accepted_components = mixing_df[accepted_columns].to_numpy()

# Fit GLM to accepted components, rejected components and nuisance regressors
# (after adding a constant term)
regressors = np.hstack(
    (
        rejected_components,
        accepted_components,
        np.ones((mixing_df.shape[0], 1)),
    ),
)
betas = np.linalg.lstsq(regressors, data, rcond=None)[0][:-1]

# Denoise the data using the betas from just the bad components
confounds_idx = np.arange(rejected_components.shape[1])
pred_data = np.dot(rejected_components, betas[confounds_idx, :])
data_denoised = data - pred_data

# Save to file
denoised_img = unmask(data_denoised, mask_file)
denoised_img.to_filename(
    f"/project/rrg-villens/javan/tedana_fmriprep_nonaggrdenoised/sub-{sub}_ses-{ses}_task-rest_space-MNI152NLin2009cAsym_desc-nonaggrDenoised_bold.nii.gz"
)