123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338 |
- # include modules to the path
- import sys, os
- parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
- sys.path.append(parent_dir)
- sys.path.append(os.path.join(parent_dir, 'session'))
- import os, json, h5py, time
- import numpy as np
- import scipy.ndimage as ndi
- from scipy import signal
- from head_direction import head_direction
- from spatial import place_field_2D, map_stats, get_field_patches, best_match_rotation_polar
- from spatial import bins2meters, cart2pol, pol2cart
- from spiketrain import instantaneous_rate, spike_idxs
- from spiking_metrics import mean_firing_rate, isi_cv, isi_fano
- from session.utils import get_sessions_list, get_sampling_rate, cleaned_epochs
- from session.adapters import load_clu_res, H5NAMES, create_dataset
- def pack(session_path):
- """
- Pack independent tracking datasets into a single HDF5 file.
-
- File has the following structure:
-
- /raw
- /positions - raw positions from .csv
- /events - raw events from .csv
- /sounds - raw sounds from .csv
- /islands - raw island infos from .csv (if exists)
- /processed
- /timeline - matrix of [time, x, y, speed, HD, trial_no, sound_id] sampled at 100Hz,
- data is smoothed using gaussian kernels,
- inter-trial intervals have trial_no = 0
- /trial_idxs - matrix of trial indices to timeline
- /sound_idxs - matrix of sound indices to timeline
-
- each dataset has an attribute 'headers' with the description of columns.
- """
- params_file = [x for x in os.listdir(session_path) if x.endswith('.json')][0]
- with open(os.path.join(session_path, params_file)) as json_file:
- parameters = json.load(json_file)
-
- h5name = os.path.join(session_path, '%s.h5' % params_file.split('.')[0])
- with h5py.File(h5name, 'w') as f: # overwrite mode
- # -------- save raw data ------------
- raw = f.create_group('raw')
- raw.attrs['parameters'] = json.dumps(parameters)
- for ds_name in ['positions', 'events', 'sounds', 'islands']:
- filename = os.path.join(session_path, '%s.csv' % ds_name)
- if not os.path.exists(filename):
- continue
-
- with open(filename) as ff:
- headers = ff.readline()
- data = np.loadtxt(filename, delimiter=',', skiprows=1)
- ds = raw.create_dataset(ds_name, data=data)
- ds.attrs['headers'] = headers
-
- # TODO - saving contours! and get file names from the config
- # with open(os.path.join(session_path, '%s.csv' % 'contours')) as ff:
- # data = ff.readlines()
-
- # headers = data[0] # skip headers line
- # contours = [[(x.split(':')[0], x.split(':')[1]) for x in contour.split(',')] for contour in data[1:]]
- # contours = [np.array(contour) for contour in contours]
- # read raw data and normalize to session start
- positions = np.array(f['raw']['positions'])
- s_start, s_end = positions[:, 0][0], positions[:, 0][-1]
- positions[:, 0] = positions[:, 0] - s_start
- events = np.array(f['raw']['events'])
- events[:, 0] = events[:, 0] - s_start
- sounds = np.array(f['raw']['sounds'])
- sounds[:, 0] = sounds[:, 0] - s_start
- # squeeze - if session was interrupted, adjust times
- # to have a continuous timeline
- end_idxs = np.where(events[:, 5] == -1)[0]
- if len(end_idxs) > 1:
- # diffs in time beetween pauses
- deltas = [events[idx + 1][0] - events[idx][0] for idx in end_idxs[:-1]]
- for df, delta in zip(end_idxs, deltas): # squeezing events
- events[df+1:][:, 0] = events[df+1:][:, 0] - delta
- end_idxs = np.where(np.diff(sounds[:, 0]) > 20)[0] # squeezing sounds
- for df, delta in zip(end_idxs, deltas):
- sounds[df+1:][:, 0] = sounds[df+1:][:, 0] - delta
- end_idxs = np.where(np.diff(positions[:, 0]) > 20)[0] # squeezing positions - more than 20? secs pauses
- for df, delta in zip(end_idxs, deltas):
- positions[df+1:][:, 0] = positions[df+1:][:, 0] - delta
- parameters['experiment']['timepoints'] = [positions[df+1][0] for df in end_idxs] # update session parameters
- parameters['experiment']['session_duration'] = positions[-1][0]
- # -------- save processed ------------
- proc = f.create_group('processed')
- proc.attrs['parameters'] = json.dumps(parameters)
- # TODO remove outliers - position jumps over 20cm?
- #diffs_x = np.diff(positions[:, 1])
- #diffs_y = np.diff(positions[:, 2])
- #dists = np.sqrt(diffs_x**2 + diffs_y**2)
- #np.where(dists > 0.2 / pixel_size)[0]
- # convert timeline to 100 Hz
- time_freq = 100 # at 100Hz
- s_start, s_end = positions[:, 0][0], positions[:, 0][-1]
- times = np.linspace(s_start, s_end, int((s_end - s_start) * time_freq))
- pos_at_freq = np.zeros((len(times), 3))
- curr_idx = 0
- for i, t in enumerate(times):
- if curr_idx < len(positions) - 1 and \
- np.abs(t - positions[:, 0][curr_idx]) > np.abs(t - positions[:, 0][curr_idx + 1]):
- curr_idx += 1
- pos_at_freq[i] = (t, positions[curr_idx][1], positions[curr_idx][2])
- # save trials
- t_count = len(np.unique(events[events[:, -1] != 0][:, -2]))
- trials = np.zeros((t_count, 6))
- for i in range(t_count):
- t_start_idx = (np.abs(pos_at_freq[:, 0] - events[2*i][0])).argmin()
- t_end_idx = (np.abs(pos_at_freq[:, 0] - events[2*i + 1][0])).argmin()
- state = 0 if events[2*i + 1][-1] > 1 else 1
- trials[i] = (t_start_idx, t_end_idx, events[2*i][1], events[2*i][2], events[2*i][3], state)
- trial_idxs = proc.create_dataset('trial_idxs', data=trials)
- trial_idxs.attrs['headers'] = 't_start_idx, t_end_idx, target_x, target_y, target_r, fail_or_success'
- # save sounds
- sound_idxs = np.zeros((len(sounds), 2))
- left_idx = 0
- delta = 10**5
- for i in range(len(sounds)):
- while left_idx < len(pos_at_freq) and \
- np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx]) < delta:
- delta = np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx])
- left_idx += 1
- sound_idxs[i] = (left_idx, sounds[i][1])
- delta = 10**5
- sound_idxs = proc.create_dataset('sound_idxs', data=sound_idxs)
- sound_idxs.attrs['headers'] = 'timeline_idx, sound_id'
- # building timeline
- width = 50 # 100 points ~= 1 sec with at 100Hz
- kernel = signal.gaussian(width, std=(width) / 7.2)
- x_smooth = np.convolve(pos_at_freq[:, 1], kernel, 'same') / kernel.sum()
- y_smooth = np.convolve(pos_at_freq[:, 2], kernel, 'same') / kernel.sum()
- # speed
- dx = np.sqrt(np.square(np.diff(x_smooth)) + np.square(np.diff(y_smooth)))
- dt = np.diff(pos_at_freq[:, 0])
- speed = np.concatenate([dx/dt, [dx[-1]/dt[-1]]])
- # head direction
- temp_tl = np.column_stack([pos_at_freq[:, 0], x_smooth, y_smooth, speed])
- hd = head_direction(temp_tl)
- # trial numbers
- trials_data = np.zeros(len(temp_tl))
- for i, trial in enumerate(trials):
- idx1, idx2 = trial[0], trial[1]
- trials_data[int(idx1):int(idx2)] = i + 1
- # sounds played
- sound_tl = np.zeros(len(temp_tl))
- curr_sound_idx = 0
- for i in range(len(temp_tl)):
- if curr_sound_idx + 1 >= len(sounds):
- break
- if temp_tl[i][0] > sounds[curr_sound_idx][0]:
- curr_sound_idx += 1
- sound_tl[i] = sounds[curr_sound_idx][1]
- timeline = proc.create_dataset('timeline', data=np.column_stack(\
- [pos_at_freq[:, 0], x_smooth, y_smooth, speed, hd, trials_data, sound_tl]
- ))
- timeline.attrs['headers'] = 'time, x, y, speed, hd, trial_no, sound_ids'
-
- return h5name
- def write_units(sessionpath):
- filebase = os.path.basename(os.path.normpath(sessionpath))
- h5name = os.path.join(sessionpath, filebase + '.h5')
- # loading unit data
- units = load_clu_res(sessionpath) # spikes are in samples, not seconds
- sampling_rate = get_sampling_rate(sessionpath)
-
- # loading trajectory
- with h5py.File(h5name, 'r') as f:
- tl = np.array(f['processed']['timeline']) # time, X, Y, speed, HD, trials, sounds
-
- # packing
- with h5py.File(h5name, 'a') as f:
- if not 'units' in f:
- f.create_group('units')
- for electrode_idx in units.keys():
- unit_idxs = units[electrode_idx]
- for unit_idx, spiketrain in unit_idxs.items():
- unit_name = '%s-%s' % (electrode_idx, unit_idx)
- s_times = spiketrain/sampling_rate
- i_rate = instantaneous_rate(s_times, tl[:, 0])
- s_idxs = spike_idxs(s_times, tl[:, 0])
- with h5py.File(h5name, 'a') as f:
- if not unit_name in f['units']:
- grp = f['units'].create_group(unit_name)
- create_dataset(h5name, '/units/%s' % unit_name, H5NAMES.spike_times, s_times)
- create_dataset(h5name, '/units/%s' % unit_name, H5NAMES.inst_rate, i_rate)
- create_dataset(h5name, '/units/%s' % unit_name, H5NAMES.spike_idxs, s_idxs)
- def write_spiking_metrics(sessionpath):
- filebase = os.path.basename(os.path.normpath(sessionpath))
- h5name = os.path.join(sessionpath, filebase + '.h5')
- epochs = cleaned_epochs(sessionpath)
-
- with h5py.File(h5name, 'r') as f:
- unit_names = [name for name in f['units']]
-
- for unit_name in unit_names:
- with h5py.File(h5name, 'r') as f:
- st = np.array(f['units'][unit_name][H5NAMES.spike_times['name']])
-
- mfr_vals = np.zeros(len(epochs))
- isi_cv_vals = np.zeros(len(epochs))
- isi_fn_vals = np.zeros(len(epochs))
- for i, epoch in enumerate(epochs):
- st_cut = st[(st > epoch[0]) & (st < epoch[1])]
- mfr_vals[i] = mean_firing_rate(st_cut)
- isi_cv_vals[i] = isi_cv(st_cut)
- isi_fn_vals[i] = isi_fano(st_cut)
-
- create_dataset(h5name, '/units/%s' % unit_name, H5NAMES.mfr, np.array(mfr_vals))
- create_dataset(h5name, '/units/%s' % unit_name, H5NAMES.isi_cv, np.array(isi_cv_vals))
- create_dataset(h5name, '/units/%s' % unit_name, H5NAMES.isi_fano, np.array(isi_fn_vals))
- def write_spatial_metrics(sessionpath):
- metric_names = (H5NAMES.o_maps, H5NAMES.f_maps, H5NAMES.sparsity, H5NAMES.selectivity, \
- H5NAMES.spat_info, H5NAMES.peak_FR, H5NAMES.f_patches, H5NAMES.f_COM, \
- H5NAMES.pfr_center, H5NAMES.occ_info, H5NAMES.o_patches, H5NAMES.o_COM)
- xy_range = [-0.5, 0.5, -0.5, 0.5] # make fixed for cross-comparisons
- bin_size = 0.02
- filebase = os.path.basename(os.path.normpath(sessionpath))
- h5name = os.path.join(sessionpath, filebase + '.h5')
- epochs = cleaned_epochs(sessionpath)
- with h5py.File(h5name, 'r') as f:
- tl = np.array(f['processed']['timeline']) # time, X, Y, speed, etc.
- unit_names = [name for name in f['units']]
-
- s_rate_pos = round(1.0 / np.diff(tl[:, 0]).mean())
- run_idxs = np.where(tl[:, 3] > 0.04)[0]
- for unit_name in unit_names:
- with h5py.File(h5name, 'r') as f:
- spk_idxs = np.array(f['units'][unit_name][H5NAMES.spike_idxs['name']])
- spk_idxs = np.intersect1d(spk_idxs, run_idxs)
- collected = []
- for epoch in epochs:
- epoch_idxs = np.where((tl[:, 0] > epoch[0]) & (tl[:, 0] < epoch[1]))[0]
- # filter for epoch and speed > 4cm/s
- unit_pos = tl[np.intersect1d(spk_idxs, epoch_idxs)][:, 1:3]
- traj_pos = tl[epoch_idxs][:, 1:3]
- # compute 2D maps: occupancy and firing rate (place fields)
- #xy_range = [tl[:, 1].min(), tl[:, 1].max(), tl[:, 2].min(), tl[:, 2].max()]
- o_map, s1_map, s2_map, f_map = place_field_2D(traj_pos, unit_pos, s_rate_pos, bin_size=bin_size, xy_range=xy_range)
- # firing map metrics
- sparsity, selectivity, spat_info, peak_FR = map_stats(f_map, o_map)
- # place field metrics
- patches = get_field_patches(f_map) # 2D matrix, patches labeled according to the size
- #f_sizes = np.bincount(patches.flat)[1:] # 1D array of field sizes, sorted
- if f_map.max() == 0:
- f_COM_rho, f_COM_phi, pfr_rho, pfr_phi = 0, 0, 0, 0
- else:
- x_in_b, y_in_b = ndi.center_of_mass(f_map, labels=patches, index=1) # largest field COM, in bins
- f_COM_rho, f_COM_phi = cart2pol(*bins2meters(x_in_b, y_in_b, xy_range)) # largest field COM, in polar coords.
- x, y = np.where(f_map == np.max(f_map)) # location of the peak unit firing, in bins
- pfr_rho, pfr_phi = cart2pol(*bins2meters(x[0], y[0], xy_range)) # location of the peak unit firing, in polar
-
- # same for occupancy
- _, _, occ_info, _ = map_stats(o_map, o_map)
- o_patches = get_field_patches(o_map) # 2D matrix, patches labeled according to the size
- x, y = ndi.center_of_mass(o_map, labels=o_patches, index=1) # largest field COM, in bins
- o_COM_rho, o_COM_phi = cart2pol(*bins2meters(x, y, xy_range)) # largest field COM, in polar coords.
- # order should match metric_names defined above
- collected.append([o_map, f_map, sparsity, selectivity, spat_info, peak_FR, \
- patches, (f_COM_rho, f_COM_phi), (pfr_rho, pfr_phi), occ_info, \
- o_patches, (o_COM_rho, o_COM_phi)])
-
- for i in range(len(collected[0])): # iterate over metrics
- dataset = np.array([x[i] for x in collected]) # metric data for each epoch
- create_dataset(h5name, '/units/%s' % unit_name, metric_names[i], dataset)
- def write_best_match_rotation(sessionpath):
- filebase = os.path.basename(os.path.normpath(sessionpath))
- h5name = os.path.join(sessionpath, filebase + '.h5')
- with h5py.File(h5name, 'r') as f:
- unit_names = [name for name in f['units']]
- for unit_name in unit_names:
- # assuming maps for first 3 epochs are already there
- with h5py.File(h5name, 'r') as f:
- maps = np.array(f['units'][unit_name][H5NAMES.f_maps['name']])
- corr_profiles = np.zeros((3, 360))
- for i, idxs in enumerate([(0, 1), (1, 2), (0, 2)]):
- corr_profiles[i], phi = best_match_rotation_polar(maps[idxs[0]], maps[idxs[1]])
- create_dataset(h5name, '/units/%s' % unit_name, H5NAMES.best_m_rot, corr_profiles)
|