I am using theephysData.raw.timestamps object to compute a time array for the LFP series so I can relate events back to the behavioural and spiking data. The documentation describes it as…2 columns file containing time synchronisation information for the AP binary file: sample index in the first column and session time in the second column. Note that sample indices may not be integers But I have found it stored in two different ways. This caused a bug in my preprocessing script to make the times array. I solved it but now I am concerned I am using the wrong data to interpolate a time array.
Documentaion:
How ever I discovered that the object was saved in an inconsistent manner. For instance for eid = '572a95d1-39ca-42e1-8424-5c9ffcb2df87'
it is saved as described in the documentation sort of. It is a list object with two equally long 1d numpy arrays. Presumably the first beign the samples in AP band and the sencod being the reference time.
How ever most of the eid’s I used had the object saved in a different manner. This eid, eid = '0c828385-6dd6-4842-a702-c5075f5f5e81
’ the object is also a two entry list. But the first entry is a 1dnumpy array of (1392,) and the other is a 2d array of shape (229, 2). I have been using the second array as the two column array for computing times.
These are the experiments I’ve run my pipeline on:
testing_list = ['0c828385-6dd6-4842-a702-c5075f5f5e81','111c1762-7908-47e0-9f40-2f2ee55b6505','8a3a0197-b40a-449f-be55-c00b23253bbf','1a507308-c63a-4e02-8f32-3239a07dc578','1a507308-c63a-4e02-8f32-3239a07dc578','73918ae1-e4fd-4c18-b132-00cb555b1ad2',
'73918ae1-e4fd-4c18-b132-00cb555b1ad2','09b2c4d1-058d-4c84-9fd4-97530f85baf6','5339812f-8b91-40ba-9d8f-a559563cc46b','034e726f-b35f-41e0-8d6c-a22cc32391fb','83e77b4b-dfa0-4af9-968b-7ea0c7a0c7e4','83e77b4b-dfa0-4af9-968b-7ea0c7a0c7e4','931a70ae-90ee-448e-bedb-9d41f3eda647',
'd2832a38-27f6-452d-91d6-af72d794136c','dda5fc59-f09a-4256-9fb5-66c67667a466','e2b845a1-e313-4a08-bc61-a5f662ed295e','a4a74102-2af5-45dc-9e41-ef7f5aed88be','572a95d1-39ca-42e1-8424-5c9ffcb2df87','781b35fd-e1f0-4d14-b2bb-95b7263082bb',
'b01df337-2d31-4bcc-a1fe-7112afd50c50','e535fb62-e245-4a48-b119-88ce62a6fe67','614e1937-4b24-4ad3-9055-c8253d089919','7f6b86f9-879a-4ea2-8531-294a221af5d0','824cf03d-4012-4ab1-b499-c83a92c5589e','4b00df29-3769-43be-bb40-128b1cba6d35','ff96bfe1-d925-4553-94b5-bf8297adf259']
Here is some example code you can use to verify what I mean:
#IBL SWR detector
import os
import subprocess
import numpy as np
import pandas as pd
from scipy import io, signal, stats
from scipy.signal import lfilter
#from fitter import Fitter, get_common_distributions, get_distributions
import scipy.ndimage
from scipy.ndimage import gaussian_filter
from scipy.ndimage import gaussian_filter1d
from scipy import interpolate
import matplotlib.pyplot as plt
# for ripple detection
import ripple_detection
from ripple_detection import filter_ripple_band
import ripple_detection.simulate as ripsim # for making our time vectors
import piso #can be difficult to install, https://piso.readthedocs.io/en/latest/
from tqdm import tqdm
from iblatlas.atlas import AllenAtlas
from iblatlas.regions import BrainRegions
from one.api import ONE
import spikeglx
from brainbox.io.one import load_channel_locations
from brainbox.io.spikeglx import Streamer
from neurodsp.voltage import destripe_lfp
#THIS CODE WORKS THIS CODE LOOPS THROUGH THE SESSIONS AND DOWNLOADS THE DATA, WE NEED TO ADD THE RIPPLE DETECTION CODE TO REMOVE THE DATA AFTER
from neurodsp.voltage import destripe_lfp
from ibllib.plots import Density
import time # for debugging
import traceback
import logging
from multiprocessing import Pool, Process, Queue
from one.api import ONE
ONE.setup(base_url='https://openalyx.internationalbrainlab.org', silent=True)
one = ONE(password='international')
# Parameters, (input output file paths and thresholds)
testing_list = ['0c828385-6dd6-4842-a702-c5075f5f5e81','111c1762-7908-47e0-9f40-2f2ee55b6505','8a3a0197-b40a-449f-be55-c00b23253bbf','1a507308-c63a-4e02-8f32-3239a07dc578','1a507308-c63a-4e02-8f32-3239a07dc578','73918ae1-e4fd-4c18-b132-00cb555b1ad2',
'73918ae1-e4fd-4c18-b132-00cb555b1ad2','09b2c4d1-058d-4c84-9fd4-97530f85baf6','5339812f-8b91-40ba-9d8f-a559563cc46b','034e726f-b35f-41e0-8d6c-a22cc32391fb','83e77b4b-dfa0-4af9-968b-7ea0c7a0c7e4','83e77b4b-dfa0-4af9-968b-7ea0c7a0c7e4','931a70ae-90ee-448e-bedb-9d41f3eda647',
'd2832a38-27f6-452d-91d6-af72d794136c','dda5fc59-f09a-4256-9fb5-66c67667a466','e2b845a1-e313-4a08-bc61-a5f662ed295e','a4a74102-2af5-45dc-9e41-ef7f5aed88be','572a95d1-39ca-42e1-8424-5c9ffcb2df87','781b35fd-e1f0-4d14-b2bb-95b7263082bb',
'b01df337-2d31-4bcc-a1fe-7112afd50c50','e535fb62-e245-4a48-b119-88ce62a6fe67','614e1937-4b24-4ad3-9055-c8253d089919','7f6b86f9-879a-4ea2-8531-294a221af5d0','824cf03d-4012-4ab1-b499-c83a92c5589e','4b00df29-3769-43be-bb40-128b1cba6d35','ff96bfe1-d925-4553-94b5-bf8297adf259']
from one.api import ONE
ONE.setup(base_url='https://openalyx.internationalbrainlab.org', silent=True)
one = ONE(password='international')
eids_with_timestamps_error = ['572a95d1-39ca-42e1-8424-5c9ffcb2df87','0c828385-6dd6-4842-a702-c5075f5f5e81']
corresponding_probe_with_error = ['f9d8aacd-b2a0-49f2-bd71-c2f5aadcfdd1','0851db85-2889-4070-ac18-a40e8ebd96ba']
no_error_eid = ['0c828385-6dd6-4842-a702-c5075f5f5e81']
# pick one...
#eid = eids_with_timestamps_error[0]
#eid = no_error_eid[0]
session_id = eid # just to dealwith some other stuff later
probelist, probenames = one.eid2pid(eid) # probe_id is pid in the IBL tutorials
print(f'Probe IDs: {probelist}, Probe names: {probenames}')
band = 'lf' # either 'ap','lf'
#for probe_id in pid:
for this_probe in range(len(probelist)):
probe_name = probenames[this_probe]
probe_id = probelist[this_probe]
print(probe_id)
# first check if this probe even has CA1 channels on it, no need to process if not
print("getting channels data")
collectionname = f'alf/{probe_name}/pykilosort' # ensures channels are all from this probe
channels = one.load_object(eid, 'channels', collection=collectionname)
channels.allen2017_25um_acronym = br.id2acronym( channels['brainLocationIds_ccf_2017'] )
# get the timestamps for this lfp recording
#2 columns file containing time synchronisation information for the AP binary file:
# sample index in the first column and session time in the second column. Note that sample indices may not be integers
lfp_time_timestamps_path = one.list_datasets(eid, collection=f'raw_ephys_data/{probe_name}', filename='*timestamps.npy')
lfp_time_timestamps, _ = one.load_datasets(eid, lfp_time_timestamps_path, download_only=False)
# Now verify with .shape
print( lfp_time_timestamps[0].shape)
print( lfp_time_timestamps[1].shape)
This was causing a bug in my preprocessing script. My solution in my preprocessing script was as follows:
# get the timestamps for this lfp recording
#2 columns file containing time synchronisation information for the AP binary file:
# sample index in the first column and session time in the second column. Note that sample indices may not be integers
lfp_time_timestamps_path = one.list_datasets(eid, collection=f'raw_ephys_data/{probe_name}', filename='*timestamps.npy')
lfp_time_timestamps, _ = one.load_datasets(eid, lfp_time_timestamps_path, download_only=False)
# Divide the first column by 12 to get the LFP samples they correspond to
# some timestamps obects are lsits witht he two columns in the second entry, others are just a list with two 1d arrays
if len(lfp_time_timestamps[1].shape)==2:
lfp_time_timestamps = lfp_time_timestamps[1]
adjusted_samples = lfp_time_timestamps[:, 0] / 12 # devide by 12 for the LFP sampling rate, (1/12th of AP band)
adjusted_times = lfp_time_timestamps[:, 1]
else:
adjusted_samples = lfp_time_timestamps[0] / 12 # devide by 12 for the LFP sampling rate, (1/12th of AP band)
adjusted_times = lfp_time_timestamps[1]
# Create an array of sample numbers for your signal
sample_numbers = np.arange(destriped.shape[1])
Which version is the correct way to have saved the data and have I computed my times array correctly?