script1_line-length_analysis.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Wed Oct 9 17:29:15 2019
  4. @author: kleisp
  5. """
  6. import os
  7. os.chdir('C:\EAfiles\EAdetection')
  8. import core.ea_management as eam
  9. from core.helpers import open_hdf5
  10. from glob import glob
  11. #import phases_ictalInter as pii
  12. #import core.ea_analysis as eana
  13. import pandas as pd
  14. import numpy as np
  15. os.chdir('C:\EAfiles\EAdetection')
  16. reload(eam)
  17. #%% function for getting line length directly from resampled raw data
  18. def get_linelength(filename,ll_dict_controls, recID):
  19. excelpath = 'C:\EAfiles\Excel\\startstop_PK_HCc.xlsx' #insert the directory for an excel file containing the start and stop times for each recording (first column: recID, second column: start time, third column stop time)
  20. df = pd.read_excel(excelpath) #reading an excel file with start and stop times (s) of a recording
  21. df.set_index('recID', inplace=True) #recID
  22. ll_dict_controls[recID]['start'] = int(df.start[recID]) #start time
  23. ll_dict_controls[recID]['stop'] = int(df.stop[recID]) #stop time
  24. sr=500 #sampling rate of downsampled data
  25. data_dict=open_hdf5(filename,group=None,read_maskedarr=False)
  26. data=data_dict['data']['trace']
  27. reccut=data[int(ll_dict_controls[recID]['start']*sr):int(ll_dict_controls[recID]['stop']*sr)] #cut data according to start and stop times
  28. N_datapoints=len(reccut)
  29. duration=N_datapoints/sr
  30. linelength = np.sum(np.abs(np.diff(reccut))) #line length is the sum of subsequent datapoints -->derivative
  31. linelength_s=linelength/duration
  32. ll_dict_controls[recID]['linelength/second']=linelength_s
  33. ll_dict_controls[recID]['linelength']=linelength
  34. #%% get linelength directly from h5 (downsampled data)
  35. os.chdir(r'C:\EAfiles\DATA\h5files') #enter directory, where all the needed h5 files are
  36. files = glob(r'C:\EAfiles\DATA\h5files\*.h5') #takes all h5 files from this folder
  37. hfile = files
  38. lines = np.arange(0, len(hfile)) #as many lines as there are h5 files
  39. ll_dict_controls={}
  40. ll_dict_controls_d={}
  41. for i in lines:
  42. recID = os.path.basename(hfile[i]).split('__')[0] #getting the IDs
  43. print (i)
  44. print (recID)
  45. filename=os.path.basename(hfile[i])
  46. ll_dict_controls[recID]= {}
  47. get_linelength(filename,ll_dict_controls, recID)
  48. #%% creating arrays with data
  49. recIDs = np.array(ll_dict_controls.keys())
  50. flavour = 'linelength/second'
  51. ii_linelengths = np.array([ll_dict_controls[recID][flavour] for recID in ll_dict_controls.keys()])
  52. #%% create excel file
  53. os.chdir('C:\EAfiles\RESULTS')
  54. df_sr=pd.DataFrame(columns=
  55. ['recID',
  56. 'linelength/s'])
  57. df_sr['recID'] = recIDs
  58. df_sr['linelength/s'] = ii_linelengths
  59. df_sr.to_excel('test_linelengths.xlsx') #name the file