heterogeneous_populations.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. import os
  2. import numpy as np
  3. import pandas as pd
  4. import rlxnix as rlx
  5. from joblib import Parallel, delayed
  6. from IPython import embed
  7. from ..util import load_white_noise_stim, mutual_info, DelayType
  8. spike_buffer = {}
  9. stimulus_name = "gwn300Hz10s0.3.dat"
  10. def population_spikes(df, data_location, population_size, subtract_delay=True):
  11. """Assembles the spike times for a 'population' of neurons.
  12. Parameters
  13. ----------
  14. df : pd.DataFrame
  15. The DataFrame containing the information about where to find the recorded white noise responses.
  16. population_size : int
  17. The size of the population, i.e. the number of randomly selected trials.
  18. subtract_delay : bool, optional
  19. Defines whether the neuronal delay should be subtracted from the spike times, by default True
  20. Returns
  21. -------
  22. list:
  23. list of spike times of the randomly selected trials.
  24. list:
  25. list containing whether or not the response needs to be inverted.
  26. list:
  27. the respective cell ids
  28. list:
  29. the respective trial ids.
  30. """
  31. cell_indices = np.arange(len(df.dataset_id.unique()))
  32. np.random.shuffle(cell_indices)
  33. cells = df.dataset_id.unique()[cell_indices[:population_size]]
  34. spikes = []
  35. inversion_needed = []
  36. cell_ids = []
  37. trial_ids = []
  38. for c in cells:
  39. if c in spike_buffer.keys():
  40. # print(f"Found spikes of cell {c} in buffer!")
  41. all_spikes = spike_buffer[c]
  42. else:
  43. # print(f"Reading spikes of cell {c} from file!")
  44. dataset = rlx.Dataset(os.path.join(data_location, c + ".nix"))
  45. trace_name = "spikes-1" if "spikes-1" in dataset.event_traces else "Spikes-1"
  46. all_spikes = dataset.event_traces[trace_name].data_array[:]
  47. spike_buffer[c] = all_spikes
  48. cell_trials = df[df.dataset_id == c]
  49. cell_indexes = cell_trials.index.values
  50. trial_index = cell_indexes[np.random.randint(len(cell_indexes))]
  51. trial = cell_trials[cell_trials.index == trial_index]
  52. trial_spikes = all_spikes[(all_spikes >= trial.start_time.values[0]) & (all_spikes < trial.end_time.values[0])] - trial.start_time.values[0]
  53. if subtract_delay:
  54. trial_spikes -= trial.delay.values[0] # compensate for the delay between stimulus and response
  55. spikes.append(trial_spikes[trial_spikes > 0])
  56. inversion_needed.append(trial.inverted.values[0])
  57. cell_ids.append(c)
  58. trial_ids.append(trial.stimulus_index.values[0])
  59. return spikes, inversion_needed, cell_ids, trial_ids
  60. def mutual_info_heterogeneous_population(df, data_location, stim_location, population_size=10, delay=0.01, repeats=10,
  61. kernel_sigma=0.00125, saving=False, result_location=".", delay_type=DelayType.Equal):
  62. """Calculates the mutual information between the stimulus and the population response assembled from random selections of neuronal responses.
  63. The population response is the firing rate (estimated by kernel convolution with a Gaussian kernel of the given kernel sigma) averaged across responses.
  64. Parameters
  65. ----------
  66. df : pandas.DataFrame
  67. The DataFrame holding information about the white noise driven responses recorded in the neurons.
  68. data_location : _type_
  69. The folder containing the raw data.
  70. stim_location : _type_
  71. The folder in which the stimuli are stored.
  72. population_size : int, optional
  73. The population size, by default 10
  74. delay : float, optional
  75. The desired average artificial delay between stimulus and response, if negative, the cell delay will not be subtracted, by default 0.01
  76. repeats : int, optional
  77. Number of repetitions for the given combination of kernel and delay, by default 10
  78. kernel_sigma : float, optional
  79. The standard deviation of the simulated 'synaptic' gaussian kernel, by default 0.00125
  80. saving : bool, optional
  81. if True, the population responses are stored to disk, by default False
  82. result_location : str, optional
  83. The location the population responses should be stored to, by default "."
  84. delay_type : DelayType, optional
  85. The delay type to be used for the artificial delay, by default DelayType.Equal
  86. Returns
  87. -------
  88. list of dictionaries
  89. containing the analysis results
  90. """
  91. coherences = []
  92. frequency = []
  93. _, stimulus = load_white_noise_stim(os.path.join(stim_location, stimulus_name))
  94. results = []
  95. for i in range(repeats):
  96. r = {"pop_size": population_size, "delay": delay, "true_delay": 0.0, "kernel_sigma": kernel_sigma, "num_inversions": 0,
  97. "population_rate": 0.0, "rate_modulation": 0.0, "snr": 0.0, "mi":0.0, "mi_100": 0.0, "mi_200": 0.0, "mi_300": 0.0,
  98. "cell_ids":[], "mtag_ids":[], "trial_ids":[], "result_file":"", "delay_type": str(delay_type) }
  99. print(" "*200, end="\r")
  100. print(f"delay type: {delay_type}, population size: {population_size}, delay: {delay:.5f}s, kernel: {kernel_sigma:.5f}, repeat: {i}", end="\r" )
  101. subtract_delay = delay >= 0.0
  102. pop_spikes, inversion_needed, cell_ids, trial_ids = population_spikes(df, data_location, population_size, subtract_delay)
  103. f, c, mis, rates, true_delay = mutual_info(pop_spikes, delay, inversion_needed, stimulus, freq_bin_edges=[0, 100, 200, 300], kernel_sigma=kernel_sigma, delay_type=delay_type)
  104. coherences.append(c)
  105. frequency = f
  106. smoothing_kernel = np.ones(19)/19
  107. sc = np.convolve(c, smoothing_kernel, mode="same")
  108. max_coh = np.max(sc[f <= 300])
  109. peak_f = f[np.where(sc == max_coh)]
  110. upper_cutoff = f[(sc <= max_coh/np.sqrt(2)) & (f > peak_f)][0]
  111. lower_cutoff = f[(sc <= max_coh/np.sqrt(2)) & (f < peak_f)][-1] if len(f[(sc <= max_coh/np.sqrt(2)) & (f < peak_f)]) > 0 else 0.0
  112. avg_response = np.mean(rates, axis=1) if population_size > 1 else rates
  113. avg_rate = np.mean(avg_response)
  114. rate_error = np.std(rates, axis=1) if population_size > 1 else None
  115. variability = np.mean(rate_error, axis=0) if rate_error is not None else None
  116. snr = np.std(avg_rate) / variability if variability is not None else None
  117. outfile_name = "pop_size%i_repeat%i_delay%.5f.npz" % (population_size, i, delay)
  118. outfile_full = os.path.join(result_location, outfile_name)
  119. if saving:
  120. np.savez_compressed(outfile_full, coherences=coherences, frequencies=frequency, avg_rate=avg_rate, rate_std=rate_error)
  121. r["true_delay"] = true_delay
  122. r["num_inversions"] = np.sum(inversion_needed)
  123. r["population_rate"] = avg_rate
  124. r["rate_modulation"] = np.std(avg_response)
  125. r["peak_coh"] = max_coh
  126. r["upper_cutoff"] = upper_cutoff
  127. r["lower_cutoff"] = lower_cutoff
  128. r["peak_freq"] = peak_f
  129. r["snr"] = snr if snr is not None else 0.0
  130. r["variability"] = variability if variability is not None else 0.0
  131. r["mi"] = mis[0]
  132. r["mi_100"] = mis[1]
  133. r["mi_200"] = mis[2]
  134. r["mi_300"] = mis[3]
  135. r["cell_ids"] = cell_ids
  136. r["trial_ids"] = trial_ids
  137. r["result_file"] = outfile_name if not saving else ""
  138. results.append(r)
  139. return results
  140. def run_heterogeneous_population_analysis(whitenoise_trial_file, data_location, stim_location,
  141. population_sizes=[2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30],
  142. delays=[0.0, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.0125, 0.015],
  143. kernels=[0.000125, 0.00025, 0.0005, 0.001, 0.002, 0.003, 0.005, 0.01, 0.015, 0.02, 0.025],
  144. repeats=50, saving=False, num_cores=1, delay_type=DelayType.Equal):
  145. """Runs the population analysis for heterogeneous populations.
  146. Parameters
  147. ----------
  148. whitenoise_trial_file : str
  149. The dataframe containing the information of the white noise recordings.
  150. data_location : str
  151. The path to the raw data.
  152. stim_location : str
  153. The path to the folder containing the stimuli.
  154. population_sizes : list of ints, optional
  155. List of population sizes, by default [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
  156. delays : list, optional
  157. list of artificial delays added to the spike times. -1 indicates that the original delay should not be changed, by default [0.0, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.0125, 0.015]
  158. kernels : list, optional
  159. list of Gaussian kernel standard deviations used for firing rate estimation, by default [0.000125, 0.00025, 0.0005, 0.001, 0.002, 0.003, 0.005, 0.01, 0.015, 0.02, 0.025]
  160. repeats : int, optional
  161. Maximum number of repetitions for each combination of population sizes, delays, kernels. For each repetition a new set of responses will be used. By default 50
  162. saving : bool, optional
  163. Flag that indicates whether or not coherence spectra, firing rates etc should be saved, by default False
  164. num_cores : int, optional
  165. number of parallel jobs spawned to run the analyses, by default 1
  166. delay_type : DelayType
  167. The type of delay added to the spike responses. By default DelayType.Equal
  168. Returns
  169. -------
  170. pd.DataFrame
  171. The DataFrame containing the analysis results.
  172. """
  173. result_dicts = []
  174. whitenoise_trials = pd.read_csv(whitenoise_trial_file, sep=";", index_col=0)
  175. whitenoise_trials = whitenoise_trials[(whitenoise_trials.stimfile == stimulus_name) & (whitenoise_trials.duration == 10)] # & (whitenoise_trials.inverted == False)]
  176. conditions = []
  177. for k in kernels:
  178. for ps in population_sizes:
  179. for d in delays:
  180. conditions.append((k, ps, d))
  181. processed_list = Parallel(n_jobs=num_cores)(delayed(mutual_info_heterogeneous_population)(whitenoise_trials, data_location, stim_location,
  182. population_size=condition[1],
  183. delay=condition[2],
  184. repeats=repeats,
  185. kernel_sigma=condition[0],
  186. saving=saving,
  187. delay_type=delay_type) for condition in conditions)
  188. for pr in processed_list:
  189. if pr is not None:
  190. result_dicts.extend(pr)
  191. df = pd.DataFrame(result_dicts)
  192. return df