PKleis
/
PACK_in-vivo


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
							# -*- coding: utf-8 -*-
"""
Created on Wed Oct 14 11:18:44 2020

@author: kleisp
This is a script for getting matrices of snippets around the stimulation pulses in PACK-expressing mice, 
which can later be used for plotting the snippets on top of each other and for line length
calculations.
Note that recordings with 0.1 Hz and 0.05 Hz illumination need to be treated separately
One needs to load dictionaries containing recIDs with corresponding smr filenames for using raw data
Abbreviations:
dict- dictionary
stim- light pulse
pre- before light pulse
post-after light pulse
snipmat- matrix containing snippets

"""

import os
os.chdir('C:\EAfiles\MyCode')
from glob import glob
import numpy as np
import pickle
import pandas as pd
from scipy import stats

os.chdir('C:\EAfiles\EAdetection')

import core.ea_management as eam
reload(eam)

#%% (1) define functions to extract snippets before and after the stim
''' 'pre' or 'post' are the snippet length in seconds, here 2 is used, sr=sampling rate (here 500)'''

# get the spike trace before stimulation (pre):
#stimtime is one element from stimtimes array
def get_spiketrace_pre(stimtimes, datatrace, pre, sr):
    cutout_pre = np.int(pre*sr)
    stim_index= np.int(stimtimes*sr) 
    spiketrace = datatrace[stim_index-cutout_pre:stim_index]
    return spiketrace
  
# get the spike trace after(post) stimulation:

def get_spiketrace_post(stimtime, datatrace, post, sr):
    cutout_post = np.int(post*sr)
    stim_index= np.int(stimtime*sr)
    spiketrace = datatrace[stim_index:stim_index+cutout_post]
    return spiketrace

#get snippet before each stimulation and insert it into a matrix, called snipmat:
  
def get_snipmat_pre(datatrace, recID, pre, stimtimes,  sr):
    cutout_pre = np.int(pre*sr)
    N_stim = len(stimtimes) #number of stimuli
    snippts = cutout_pre
    snipmat = np.zeros((N_stim,snippts))
    for tt,stimtime in enumerate(stimtimes):
        spiketrace = get_spiketrace_pre(stimtime, datatrace, pre, sr) # instead of datatrace you can use zTrace
        snipmat[tt] = spiketrace
    return snipmat  
  
#get snippet after each stimulation and insert it into a matrix, called snipmat:
def get_snipmat_post(datatrace, recID, post, stimtimes,  sr):
    cutout_post = np.int(post*sr)
    N_stim = len(stimtimes) #number of stimuli
    snippts = cutout_post
    snipmat = np.zeros((N_stim,snippts))
    for tt,stimtime in enumerate(stimtimes):
        spiketrace = get_spiketrace_post(stimtime, datatrace, post, sr) # stimtime is just one timepoint from stimtimes
        snipmat[tt] = spiketrace
    return snipmat   
  
#%% (2) defining the function to get line length of each snippet(snipmat line length)

def get_snipmat_linelength(snipmat_pre, snipmat_post, linelength_dict_pre, linelength_dict_post, recID):  
  
        linelength_list=[]
        linelength_s_list=[]
        rms_list=[]
        count=0
        for i in xrange(snipmat_pre.shape[0]):
            data=snipmat_pre[i]
            N_datapoints=len(data)
            duration=N_datapoints/500 #sr=500
            linelength = np.sum(np.abs(np.diff(data)))
            linelength_list.append(linelength)
            linelength_s=linelength/duration
            linelength_s_list.append(linelength_s)
            count=count+1
            
            linelength_dict_pre[recID]={}
            
        linelength_dict_pre[recID]['sem pre-pulse linelength/second']=stats.sem(linelength_s_list) 
        linelength_dict_pre[recID]['mean pre-pulse linelength/second']=np.mean(linelength_s_list) 
        linelength_dict_pre[recID]['mean pre-pulse linelength']=np.mean(linelength_list)   
        linelength_dict_pre[recID]['pulse number']=count
        
            
        linelength_list=[]
        linelength_s_list=[]
        rms_list=[]
        count=0
        for i in xrange(snipmat_post.shape[0]):
            data=snipmat_post[i]
            N_datapoints=len(data)
            duration=N_datapoints/500
            linelength = np.sum(np.abs(np.diff(data)))
            linelength_list.append(linelength)
            linelength_s=linelength/duration
            linelength_s_list.append(linelength_s)
            count=count+1
            linelength_dict_post[recID]={}

        
        linelength_dict_post[recID]['sem post-pulse linelength/second']=stats.sem(linelength_s_list) 
        linelength_dict_post[recID]['mean post-pulse linelength/second']=np.mean(linelength_s_list) 
        linelength_dict_post[recID]['mean post-pulse linelength']=np.mean(linelength_list)  
        linelength_dict_post[recID]['pulse number']=count
        
#%% in order to exclude unwanted recordings
      
pathtobadrecIDs = 'C:\EAfiles\ID_Dateien\\badrecIDs.txt' 
badrecIDs = []
with open (pathtobadrecIDs,'r') as f:
    badIDs = f.read().splitlines()
    badrecIDs = badrecIDs + badIDs
    

#%% (3) Example of using the script to extract the snipmats
#Saline PACK mice 0.05Hz: get snipmat dicts (for before and after 0.05 Hz pulses) (downsampled data)

os.chdir('C:\EAfiles\EAdetection')
ymlfiles = glob('C:\EAfiles\yml_setup_PK43\*.yml') #takes all yml files from this folder
ymlfile = ymlfiles
lines = np.arange(0, len(ymlfile)) #as many lines as there are ymlfiles
snipmat_005Hz_pre_controls={}
snipmat_005Hz_post_controls={}
sr= 500
pre, post = 2, 2 #2 seconds before and after pulse    
for i in lines:
    recID = os.path.basename(ymlfile[i]).split('__')[0] #getting the IDs
    print (i)
    print (recID)
  
    if recID in badrecIDs: 
        pass
    
    else:
        excelpath = 'C:\EAfiles\Excel\\startstop_controls_HCc.xlsx'
        df = pd.read_excel(excelpath) #reading an excel file with start and stop times of each recording
        df.set_index('recID', inplace=True) #recID
        stimstop = int(df.stop[recID]) #stop time
        if recID=='PK16_005Hz1_HCc': #adding an exception (can skip this step if you have no exceptions)
            stimstart=int(df.start[recID]) 
        else:
            stimstart=10 #the stim usually starts at 10s
        stimstep=20 #speriod is 20 s
        stimtimes=np.arange(stimstart,stimstop,stimstep)
        
        aRec = eam.Rec(init_ymlpath=ymlfile[i])
        snipmat_005Hz_pre_controls[recID]={}
        data=aRec.raw_data 
        #z_data=(data-np.mean(data))/np.std(data)
        snipmat_pre=get_snipmat_pre(data, recID, pre, stimtimes, sr=500)
        snipmat_005Hz_pre_controls[recID]=snipmat_pre

        snipmat_005Hz_post_controls[recID]={}
        snipmat_post=get_snipmat_post(data, recID, post, stimtimes, sr=500)
        snipmat_005Hz_post_controls[recID]=snipmat_post

        
# save snipmat_dict 

filename = 'C:\EAfiles\MyDicts\snipmat_005Hz_pre_controls'
outfile = open(filename, 'wb') #write binary: mit Erlaubnis zu (über)schreiben
pickle.dump(snipmat_005Hz_pre_controls, outfile)
outfile.close()

filename = 'C:\EAfiles\MyDicts\snipmat_005Hz_post_controls'
outfile = open(filename, 'wb') #write binary: mit Erlaubnis zu (über)schreiben
pickle.dump(snipmat_005Hz_post_controls, outfile)
outfile.close()

#%% (4) Example of using the script to create dictionaries with line length before and after 0.05Hz pulses from snipmats above

os.chdir('C:\EAfiles\EAdetection')
ymlfiles = glob('C:\EAfiles\yml_setup_PK43\*.yml') #takes all yml files from this folder
ymlfile = ymlfiles
lines = np.arange(0, len(ymlfile)) #as many lines as there are ymlfiles
linelength_dict_005Hz_pre={}
linelength_dict_005Hz_post={}


filename = 'C:\EAfiles\MyDicts\snipmat_005Hz_pre_controls'
infile = open(filename,'rb') #read binary
snipmat_005Hz_pre_controls = pickle.load(infile)
infile.close()


filename = 'C:\EAfiles\MyDicts\snipmat_005Hz_post_controls'
infile = open(filename,'rb') #read binary
snipmat_005Hz_post_controls = pickle.load(infile)
infile.close()


for i in lines:
    recID = os.path.basename(ymlfile[i]).split('__')[0] #getting the IDs
    print (i)
    print (recID)
    
    if recID in badrecIDs: #template also needs to be excluded
        #badrecIDs = ['EPxy_xHz_HCi1','...']
        pass
    
    else:
        snipmat_pre=snipmat_005Hz_pre_controls[recID]
        snipmat_post=snipmat_005Hz_post_controls[recID]
        get_snipmat_linelength(snipmat_pre, snipmat_post, linelength_dict_005Hz_pre, linelength_dict_005Hz_post, recID)
        
        
filename = 'C:\EAfiles\MyDicts\linelength_dict_005Hz_post'
outfile = open(filename, 'wb') #write binary: mit Erlaubnis zu (über)schreiben
pickle.dump(linelength_dict_005Hz_post, outfile)
outfile.close()

filename = 'C:\EAfiles\MyDicts\linelength_dict_005Hz_pre'
outfile = open(filename, 'wb') #write binary: mit Erlaubnis zu (über)schreiben
pickle.dump(linelength_dict_005Hz_pre, outfile)
outfile.close()

#%% make an excel file of the pre- and post-pulse linelengths (mean and SEM)
recIDs = np.array(linelength_dict_005Hz_pre.keys())

flavour = 'mean pre-pulse linelength/second'
mean_pre_linelengths = np.array([linelength_dict_005Hz_pre[recID][flavour] for recID in linelength_dict_005Hz_pre.keys()])

flavour = 'sem pre-pulse linelength/second'
sem_pre_linelengths = np.array([linelength_dict_005Hz_pre[recID][flavour] for recID in linelength_dict_005Hz_pre.keys()])

flavour = 'mean post-pulse linelength/second'
mean_post_linelengths = np.array([linelength_dict_005Hz_post[recID][flavour] for recID in linelength_dict_005Hz_pre.keys()])

flavour = 'sem post-pulse linelength/second'
sem_post_linelengths = np.array([linelength_dict_005Hz_post[recID][flavour] for recID in linelength_dict_005Hz_pre.keys()])

flavour = 'pulse number'
n_pulses = np.array([linelength_dict_005Hz_pre[recID][flavour] for recID in linelength_dict_005Hz_pre.keys()])


os.chdir('C:\EAfiles\RESULTS')

df_sr=pd.DataFrame(columns=
    ['recID', 
    'mean pre-pulse [mv/s]',
    'sem pre-pulse [mv/s]',
    'mean post-pulse [mv/s]',
    'sem post-pulse [mv/s]',
    'pulse number'])

df_sr['recID'] = recIDs
df_sr['mean pre-pulse [mv/s]'] = mean_pre_linelengths 
df_sr['sem pre-pulse [mv/s]'] = sem_pre_linelengths 
df_sr['mean post-pulse [mv/s]'] = mean_post_linelengths 
df_sr['sem post-pulse [mv/s]'] = sem_post_linelengths 
df_sr['pulse number'] = n_pulses

df_sr.to_excel('Test_snipmat-005Hz-linelengths-control-PK43.xlsx')