Browse Source

upload intan to matlab data conversion code

Balazs Hangya 4 years ago
parent
commit
fdde3fd685
4 changed files with 273 additions and 0 deletions
  1. 36 0
      common_avg_ref.m
  2. 29 0
      disc.m
  3. 104 0
      oedisc.m
  4. 104 0
      read_openephys.m

+ 36 - 0
common_avg_ref.m

@@ -0,0 +1,36 @@
+function common_avg = common_avg_ref(filepath,varargin)
+%COMMON_AVG_REF   Common average referencing.
+%   AVG = COMMON_AVG_REF(PATH) calculates average of 32 open ephys
+%   recording channels for performing offline common average referencing.
+%
+%   AVG = COMMON_AVG_REF(PATH,CHANNEL_NUMBER,EXCLUDE_CHANNELS) takes
+%   optional input arguments for number of recording channels (default, 32)
+%   and any potential channels to exclude from averaging.
+%
+%   See also LOAD_OPEN_EPHYS_DATA.
+
+% Panna Hegedüs, Balazs Hangya 2017/09
+
+% Default arguments
+prs = inputParser;
+addRequired(prs,'filepath',@(s)exist(s,'dir'))   % data path
+addOptional(prs,'channel_number',32,@isnumeric)   % number of recording channels
+addOptional(prs,'exclude_channels',[],@isnumeric)   % exclude these channels from averaging
+addOptional(prs,'rawdatafiletag','',@ischar)   % tags for raw data filenames
+parse(prs,filepath,varargin{:})
+g = prs.Results;
+
+% Calculate sum of all channels
+channelsum = [];
+for iC = 1:g.channel_number
+    if ~ismember(iC,g.exclude_channels)   % exclude channels
+        data = load_open_ephys_data([filepath '\' '100_CH' num2str(iC) g.rawdatafiletag '.continuous']);
+        pcs = horzcat(channelsum,data);
+        channelsum = sum(pcs,2);
+    end
+end
+
+% Calculate average
+NumChannels = g.channel_number - length(g.exclude_channels);
+common_avg = channelsum / NumChannels;   % average
+common_avg = [common_avg common_avg common_avg common_avg];  % repeat four times to subtract from tetrode data

+ 29 - 0
disc.m

@@ -0,0 +1,29 @@
+function [spt, pk] = disc(unit,thres)
+%DISC   Unit discrimination.
+%   [SPT PK] = DISC(UNIT,THR) returns the location (SPT) and value (PK) of
+%   UNIT peaks above THR.
+
+% Input argument check
+narginchk(2,2)
+unit = unit(:)';   % row vector
+
+% Segments above threshold
+dsc = find(unit>=thres);   % find all points above threshold 
+if isempty(dsc)  % no spikes
+    spt = [];
+    pk = [];
+    return
+end
+lendsc = length(dsc);
+df = diff(dsc);   % distance of points above threshold
+gaps = find(df>1);   % find gaps between segments above threshold
+gaps = [0 gaps lendsc];   % there's a spike between every two gaps
+
+% Find peaks for each segment
+NumSpk = length(gaps) - 1;   % number of spikes
+[spt, pk] = deal(nan(1,NumSpk));
+for iS = 1:NumSpk
+    [mx, mxloc] = max(unit(dsc(gaps(iS)+1:gaps(iS+1))));
+    spt(iS) = dsc(1,gaps(iS)+mxloc);
+    pk(iS) = mx;   % value at peak
+end

+ 104 - 0
oedisc.m

@@ -0,0 +1,104 @@
+function [AllTimeStamps, AllWaveForms] = oedisc(data,ts,sr,thr,varargin)
+%OEDISC   Unit discrimination.
+%   [T W] = OEDISC(DATA,TS,SR,THR) performs threshold discrimination of
+%   continuous timestamped (TS) unit data (DATA) sampled at the rate SR
+%   using the specified threshold (THR). Peak times (T, 'TimeStamps') and
+%   spike waveforms (W, 'WaveForms') are saved for each tetrode. A 750 us
+%   censored period is applied and the larger spike is kept. Time window
+%   for waveform data is set to -300 to 1000 us.
+%
+%   OEDISC(DATA,TS,SR,THR,DR) saves the results in the specified directory 
+%   (DR). If DR is empty, the data is not saved.
+%
+%   See also READ_INTAN_DATA.
+
+%   Balazs Hangya, Institute of Experimental Medicine
+%   hangya.balazs@koki.mta.hu
+%   30-Jan-2018
+
+% Default arguments
+prs = inputParser;
+addRequired(prs,'data',@isnumeric)   % raw or filtered data
+addRequired(prs,'ts',@isnumeric)   % timestamps
+addRequired(prs,'sr',@isnumeric)   % sampling rate
+addRequired(prs,'thr',@isnumeric)   % discrimination threshold
+addOptional(prs,'resdir',['C:' filesep 'Balazs' filesep '_data' filesep 'Intan' filesep 'Tina' filesep],...
+    @(s)ischar(s)|isempty(s))   % results directory
+addParameter(prs,'Filtering','enable',@(s)ischar(s)|...
+    ismember(s,{'disable','enable'}))   % switch for filtering
+parse(prs,data,ts,sr,thr,varargin{:})
+g = prs.Results;
+
+% File name
+savestr0 = ['save(''' g.resdir filesep 'TT'];
+
+% Sampling rate
+nqf = sr / 2;   % Nyquist freq.
+deadtime = 0.00075;   % 750 us dead time
+dtp = deadtime * sr;   % dead time in data points
+
+% Waveform window
+win = [-0.0003 0.0006];   % -300 to 600 us
+winp = round(win*sr);   % waveform window in data points
+
+% Threshold discrimintaion
+NumChannels = size(data,2);   % number of channels - 16
+NumTetrodes = NumChannels / 4;   % number of tetrodes - 4
+[tvdisc, tpeaks, AllTimeStamps, AllWaveForms] = deal(cell(1,NumTetrodes));
+for iT = 1:NumTetrodes
+    [cvdisc, tdata, cpeaks] = deal(cell(1,4));
+    for iC = 1:4
+        chnum = (iT - 1) * 4 + iC;  % current channel
+        disp(['tetrode: ' num2str(iT) '   Channel: ' num2str(chnum)])
+        cdata = data(:,chnum)';   % data from one channel
+        if isequal(g.Filtering,'enable')
+            [b,a] = butter(3,[700 7000]/nqf,'bandpass');   % Butterworth filter
+            unit = filter(b,a,cdata);  % filter
+        elseif isequal(g.Filtering,'disable')
+            unit = cdata;
+        else
+            error('oedisc:InputArg','Unsupported input argument for filtering.')
+        end
+        [cvdisc{iC}, cpeaks{iC}] = disc(-unit,thr);   % discriminate (spike times)
+        tdata{iC} = -unit;   % data from the current tetrode
+    end
+    tvdisc{iT} = cell2mat(cvdisc);
+    tpeaks{iT} = cell2mat(cpeaks);
+    [tvdisc{iT}, inx] = sort(tvdisc{iT},'ascend');  % all spike times from one tetrode
+    tpeaks{iT} = tpeaks{iT}(inx);
+        
+    % Dead time
+    dtv = diff(tvdisc{iT});   % ISI
+    dtpk = diff(tpeaks{iT});   % comparison of neighboring peaks
+    while any(dtv<dtp)
+        censor_inx = dtv < dtp;   % ISI < dead time
+        peak_comp = dtpk > 0;
+        delete_inx1 = censor_inx & peak_comp;
+        delete_inx2 = [0 censor_inx & ~peak_comp];
+        tvdisc{iT}([find(delete_inx1) find(delete_inx2)]) = [];
+        tpeaks{iT}([find(delete_inx1) find(delete_inx2)]) = [];
+        dtv = diff(tvdisc{iT});   % ISI
+        dtpk = diff(tpeaks{iT});   % comparison of neighboring peaks
+    end
+    tvdisc{iT}(tvdisc{iT}<=-winp(1)|tvdisc{iT}>=size(data,1)-winp(2)) = [];   % we may lose some spikes near the ends of the file
+    
+    % Waveform
+    winx = repmat(tvdisc{iT}(:)+winp(1),1,sum(abs(winp))) + repmat(0:sum(abs(winp))-1,length(tvdisc{iT}),1);
+    wv = nan(size(winx,1),4,size(winx,2));   % waveforms: spikes x channels x time
+    for iC = 1:4
+        wv(:,iC,:) = tdata{iC}(winx);   % waveform data
+    end
+    WaveForms = wv;
+    AllWaveForms{iT} = WaveForms;
+    
+    % Spike times
+    spike_times = ts(tvdisc{iT});
+    TimeStamps = spike_times;
+    AllTimeStamps{iT} = TimeStamps;
+    savestr = [savestr0 num2str(iT) ''',''WaveForms'',''TimeStamps'');'];
+    
+    % Save
+    if ~isempty(g.resdir)
+        eval(savestr)
+    end
+end

+ 104 - 0
read_openephys.m

@@ -0,0 +1,104 @@
+function read_openephys(varargin)
+%READ_OPENEPHYS   Spike detection for open ephys data.
+%   READ_OPENEPHYS loads open ephys continuous files, subtracts the average
+%   of all channels (referencing), performs filtering, censoring and spike
+%   detection of tetrode channels. Files are savet in TT*.mat files for
+%   clustering.
+%
+%   Optional input arguments:
+%       DATADIR - path name for reading data
+%       RESDIR - path name for writing result files
+%       TTSPEC - numerical array for subslecting tetrodes
+%       RAWDATAFILETAG - if the raw data files have a name tag appended
+%
+%   Parameter-value input argumets:
+%       REFERENCE - allows referenceing options; default: 'common_avg',
+%           common average referencing; set to 'none' to apply no offline
+%           referencing
+%
+%   See also OEDISC.
+
+%   Balazs Hangya and Panna Hegedus
+%   Laboratory of Systems Neuroscience
+%   Institute of Experimental Medicine, Budapest, Hungary
+
+% Default arguments
+prs = inputParser;
+addOptional(prs,'datadir','n:\Pancsi\HDB12\2017-02-28_17-57-37_HDB12\',@(s)isempty(s)|isdir(s))   % data directory
+addOptional(prs,'resdir','',@(s)isempty(s)|isdir(s))   % results directory
+addOptional(prs,'TTspec',1:8,@isnumeric)   % selected tetrodes (default: 32 channels, 8 tetrodes)
+addOptional(prs,'rawdatafiletag','',@ischar)   % switch for filtering
+addParameter(prs,'reference','common_avg',@(s)ischar(s)|isempty(s))   % switch for referencing
+parse(prs,varargin{:})
+g = prs.Results;
+
+% Tetrodes to convert
+NumChannels = 32;   % number of channels
+NumTetrodes = NumChannels / 4;   % number of tetrodes
+if nargin < 3 || isempty(g.TTspec)
+    g.TTspec = 1:NumTetrodes;
+end
+
+% Directories
+if isequal(g.datadir(end),'\')
+    g.datadir = g.datadir(1:end-1);
+end
+
+if isempty(g.resdir)
+    if any(ismember(g.datadir,'-'))
+        sessiontag = 'a';
+        cmps = strsplit(g.datadir,'\');  % path components
+        animalID = cmps{cellfun(@(s)~isempty(s),regexp(cmps,'HDB(\d+)'))};
+        pd = regexp(cmps{end},'(\d+)-(\d+)-(\d+)','tokens');  % extract date
+        sessionID = [pd{1}{1}(3:4) pd{1}{2} pd{1}{3} sessiontag];
+        g.resdir = fullfile(getpref('cellbase','datapath'),animalID,sessionID);
+    else
+        cmps = strsplit(g.datadir,'\');  % path components
+        animalID = cmps{cellfun(@(s)~isempty(s),regexp(cmps,'HDB(\d+)'))};
+        sessionID = cmps{end};
+    end
+    g.resdir = fullfile(getpref('cellbase','datapath'),animalID,sessionID);
+    g.resdir = fullfile('e:\HDBpavlovian_cellbase\',animalID,sessionID);
+end
+
+SaveFeatures = true;   % save MClust feature files
+
+% Common average reference
+switch g.reference
+    case 'common_avg'
+        common_avg = common_avg_ref(g.datadir,32,[],g.rawdatafiletag);
+    case {'','none'}
+        common_avg = 0;
+    otherwise
+        error('read_openephys: Unknown reference option.')
+end
+
+% Spike detection
+for iT = g.TTspec
+    [tdata, ts] = deal(cell(1,4));
+    for iC = 1:4
+        basechannel = (iT - 1) * 4;
+        [tdata{iC}, ts{iC}, info] = load_open_ephys_data([ g.datadir '\' '100_CH' num2str(basechannel+iC) g.rawdatafiletag '.continuous']);
+    end
+    data = [tdata{1} tdata{2} tdata{3} tdata{4}];
+    data = data - common_avg;
+    if isequal(ts{1},ts{2},ts{3},ts{4})   % we assume that continuous channels share timestamps
+        tss = ts{1};
+    else
+        error('Timestamp error.')
+    end
+    [AllTimeStamps, AllWaveForms] = oedisc(data,tss,info.header.sampleRate,30,[]);   % filter and detect spikes
+    TimeStamps = AllTimeStamps{1};
+    WaveForms = AllWaveForms{1};
+    TTname = ['TT' num2str(iT)];
+    TT_path = fullfile(g.resdir,TTname);
+    save(TT_path, 'TimeStamps','WaveForms');
+    
+    if SaveFeatures   % pre-calculate MClust features
+        TTdata.TimeStamps = TimeStamps;
+        TTdata.WaveForms = WaveForms;
+        openephys_SaveMClustFeatures(TTdata,{'Amplitude';'Energy';'WavePC1';'Time'},[1 1 1 1],TT_path)
+    end
+    
+    clearvars -except g common_avg filepath resdir SaveFeatures iT
+end