pyxd.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933
  1. """Interface representation of xdphys files
  2. Class `XDdata`
  3. --------------
  4. """
  5. import os.path
  6. import re
  7. import gzip
  8. from types import MethodType
  9. from collections import OrderedDict, namedtuple
  10. import numpy as np
  11. import pandas as pd
  12. class XDbase():
  13. """Base class for different xdphys files.
  14. Used to avoid code duplication. Not intended to be used directly.
  15. Known subclasses:
  16. * XDdata (formerly XDfile)
  17. * XDcal
  18. """
  19. # For compatibility check, minimum supported xdphys version
  20. _XDPHYS_MIN_VER = '' # XDbase should be valid for all versions
  21. # Program name in fileinfo -- Must be set by subclasses
  22. _XDPHYS_PROG_NAME = ''
  23. # Mapping dictionary for lazy-evaluated attributes,
  24. # see __getattr__ for how it's used. Keys are attribute names, values are
  25. # function names. MUST be copied by subclasses before being altered!
  26. _lazy_dict = {}
  27. def __init__(self, filepath):
  28. """Create a new object, representing an xdphys file"""
  29. self.path = os.path.abspath(filepath)
  30. self._file = FileHandler(self.path)
  31. # Parse the first, special line. This is used to check if the file can
  32. # be read and is indeed an xdphys data file.
  33. if not self.fileinfo.ver >= self._XDPHYS_MIN_VER:
  34. raise RuntimeError(
  35. f"The class {self.__class__} does not support xdphys versions "
  36. f"below '{self._XDPHYS_MIN_VER}', but file {self.path} has "
  37. f"version '{self.fileinfo.ver}'. "
  38. f"If available, try using a '_legacy' version instead."
  39. )
  40. def __repr__(self):
  41. return f'{self.__class__.__module__}.{self.__class__.__name__}(r"{self.path!s}")'
  42. def __str__(self):
  43. return f'{self.__class__.__name__}("{self.path!s}")'
  44. def __getattr__(self, name):
  45. """Get lazy loaded attributes
  46. This function is called whenever an attribute is accessed that is not
  47. yet in the objects `__dict__`. The proper parsing function will be
  48. triggered and the (now available) attribute is returned.
  49. """
  50. if name in self._lazy_dict:
  51. # Find the function needed to make attribute `name` available,
  52. # and execute it:
  53. getattr(self, self._lazy_dict[name])()
  54. assert name in self.__dict__, ("Executing function"
  55. f" `{self.__class__.__name__}.{self._lazy_dict[name]}` "
  56. f"did not make `{name}` "
  57. "available as promised.")
  58. # Return the (now available) attribute:
  59. return getattr(self, name)
  60. raise AttributeError(f"'{self.__class__.__name__}' has not attribute '{name}'")
  61. def __dir__(self):
  62. return super().__dir__() + list(self._lazy_dict.keys()) + ['stimulus', 'trace']
  63. def _convert_value(self, value):
  64. """
  65. """
  66. assert isinstance(value, str), "_convert_value can only convert str objects"
  67. # Strip leading and trailing white space:
  68. value = value.strip()
  69. if "," in value:
  70. # Parse a list of values, recursively
  71. try:
  72. return self._convert_list(value)
  73. except:
  74. # unsupported list specification, leave as is
  75. pass
  76. if ":" in value:
  77. # Parse a range of values, recursively
  78. try:
  79. return self._convert_range(value)
  80. except:
  81. # unsupported range specification, leave as is
  82. pass
  83. try:
  84. # convert to int if possible AND str representation does not change
  85. if str(int(value)) == value:
  86. value = int(value)
  87. except ValueError:
  88. # conversion to int failed, let's try float:
  89. try:
  90. value = float(value)
  91. except:
  92. # neither conversion to int nor to float were satisfying
  93. pass
  94. return value
  95. def _convert_range(self, range_spec):
  96. r_start, r_stop, r_step = (self._convert_value(e) for e in range_spec.split(':'))
  97. r_n = ((r_stop - r_start) / r_step)+1
  98. if r_n % 1 == 0:
  99. r_n = int(r_n)
  100. else:
  101. raise ValueError("Unsupported range specification '{}'".format(range_spec))
  102. return np.linspace(r_start, r_stop, r_n)
  103. def _convert_list(self, list_spec):
  104. templist = [self._convert_value(e) for e in list_spec.split(',')]
  105. return np.concatenate([[e] if np.isscalar(e) else e for e in templist]).astype(np.float)
  106. def _clean_key(self, key):
  107. """Cleanup xdphys param keys
  108. Removes whitespace and illegal characters. Also converts to lowercase.
  109. """
  110. # For debugging, test if key is really a str:
  111. assert isinstance(key, str), "XDbase._clean_key(): key must be a `str` obj"
  112. # strip white space and convert to lowercase
  113. key = key.strip().lower()
  114. # remove illegal leading characters
  115. key = re.sub('^[^a-z_]+', '', key)
  116. # replace other illegal characters by underscores, thereby
  117. # merge successive characters into one underscore
  118. key = re.sub('[^0-9a-z_]+', '_', key)
  119. return key
  120. def _parse_infoline(self):
  121. """Read the first line of an xdphys file.
  122. Typically, the first line looks like this:
  123. ;; xdphys gen ver 2.8.1-1
  124. This line contains the recording `mod` (like 'gen', 'abi', 'itd', ...)
  125. and the xdphys version number (like '2.8.1-1')
  126. Sets attributes:
  127. - fileinfo
  128. - _seek_comments
  129. """
  130. with self._file as f:
  131. f.seek(0) # We know this must be the first line
  132. l = f.nextline()
  133. self._seek_comments = f.tell()
  134. m = re.match(';; {} ([^ ]+) ver (.*)'.format(self._XDPHYS_PROG_NAME), l)
  135. if not m:
  136. raise ValueError(f"Cannot read fileinfo from file {self.path!r}. Probably not an xdphys file.")
  137. # Use a namedtuple to make fileinfo accessible by dot-syntax:
  138. fileinfo_tuple = namedtuple('fileinfo', ('mod', 'ver'))
  139. self.fileinfo = fileinfo_tuple(mod = m.group(1),ver = m.group(2))
  140. # Attributes made available by _parse_infoline():
  141. _lazy_dict['fileinfo'] = "_parse_infoline"
  142. _lazy_dict['_seek_comments'] = "_parse_infoline"
  143. def _parse_comments(self):
  144. with self._file as f:
  145. # seek COMMENTS line
  146. f.seek(self._seek_comments)
  147. # Next line must be "COMMENTS":
  148. if not f.nextline() == "COMMENTS":
  149. raise ValueError(f"COMMENTS expected at position {self._seek_comments}")
  150. self.comments = []
  151. # read next line:
  152. l = f.nextline()
  153. while not l == "END_COMMENTS":
  154. # add comments line by line without leading semicolons:
  155. self.comments.append(l.lstrip(';'))
  156. # read next line:
  157. l = f.nextline()
  158. self._seek_params = f.tell()
  159. # Attributes made available by _parse_comments():
  160. _lazy_dict['comments'] = "_parse_comments"
  161. _lazy_dict['_seek_params'] = "_parse_comments"
  162. def _read_params_line(self, l):
  163. """Read one line `l` of the PARAMS block and add to self.params.
  164. """
  165. # Ignore lines starting with a semicolon (silently):
  166. if l.startswith(';'):
  167. # this is a comment, skip!
  168. return
  169. #with self._file as f:
  170. # self.params[f'PARAMS_COMMENT_{f.tell()-len(l)-1}'] = l
  171. #return
  172. m = re.match("(?:([^.=]+)\.)?([^.=]+)=(.*)", l)
  173. if m:
  174. groupkey, key, val = m.groups()
  175. # Resolve special case for "ana-decimate" to make it consistent:
  176. if key.startswith('ana'):
  177. groupkey = 'ana'
  178. key = key[4:] # 4 == len('ana-') == len('ana_')
  179. if groupkey is not None:
  180. groupkey = self._clean_key(groupkey)
  181. key = self._clean_key(key)
  182. # parse values as int, float, list, ...
  183. val = self._convert_value(val)
  184. if groupkey is None:
  185. # Simple param of form `key=value`
  186. assert (key not in self.params) or self.params[key] == val, f"Duplicate PARAM {key}"
  187. self.params[key] = val
  188. else:
  189. # Grouped param of form `groupkey.key=value`
  190. if not groupkey in self.params:
  191. self.params[groupkey] = OrderedDict()
  192. assert (key not in self.params[groupkey]) or (self.params[groupkey][key] == val), f"Duplicate PARAM {groupkey}.{key}"
  193. self.params[groupkey][key] = val
  194. else:
  195. raise ValueError('non-matching PARAM line: "{}"'.format(l))
  196. def _parse_params(self):
  197. with self._file as f:
  198. # seek PARAMS line
  199. f.seek(self._seek_params)
  200. # Next line must be "PARAMS":
  201. if not f.nextline() == "PARAMS":
  202. raise ValueError(f"PARAMS expected at position {self._seek_params}")
  203. self.params = OrderedDict()
  204. # read next line:
  205. l = f.nextline()
  206. while not l == "END_PARAMS":
  207. # add params line by line:
  208. self._read_params_line(l)
  209. # read next line:
  210. l = f.nextline()
  211. self._seek_rasterdata = f.tell()
  212. # Attributes made available by _parse_params():
  213. _lazy_dict['params'] = "_parse_params"
  214. _lazy_dict['_seek_rasterdata'] = "_parse_params"
  215. class XDdata(XDbase):
  216. """XDdata object represents a single xdphys data file
  217. It handles parsing of the file, section-wise.
  218. """
  219. # dtype of analog raster data (stimulus and trace):
  220. _RASTER_DTYPE = np.dtype("int16").newbyteorder('>')
  221. # Minimum supported xdphys version, use XDdata_legacy for older versions
  222. _XDPHYS_MIN_VER = '2.8.0'
  223. _XDPHYS_PROG_NAME = 'xdphys'
  224. # @see: XDbase._lazy_dict
  225. _lazy_dict = XDbase._lazy_dict.copy()
  226. def _parse_trials(self):
  227. with self._file as f:
  228. # seek RASTERDATA line
  229. f.seek(self._seek_rasterdata)
  230. # Next line must be "RASTERDATA":
  231. if not f.nextline() == "RASTERDATA":
  232. raise ValueError(f"RASTERDATA expected at position {self._seek_rasterdata}")
  233. # "nrasters=" line:
  234. l = f.nextline()
  235. if not l.startswith("nrasters="):
  236. raise ValueError(f"nrasters= expected in RASTERDATA at position {self._seek_rasterdata}")
  237. self.nrasters = int(l[9:]) # 9 == len('nrasters=')
  238. self.trials = []
  239. self.nevents = []
  240. self.events = []
  241. self._seek_ad = {}
  242. ### first "depvar=" line:
  243. l = f.nextline()
  244. for ktrial in range(self.nrasters):
  245. ### "depvar=" line (read before loop or at the end of last iteration):
  246. if l == "END_RASTERDATA" or l == '':
  247. # No more trials, go back:
  248. break
  249. t = OrderedDict(index = ktrial)
  250. m = self._depvar_re.fullmatch(l)
  251. if m:
  252. t.update((k, self._convert_value(m.group(g))) for k, g in self._depvar_re_groups)
  253. elif l == 'depvar=-6666 <SPONT>':
  254. # "spontaneous" trial have no params, fill with NaN:
  255. # changed to leave them empty (pandas can handle this)
  256. #t.update((k, np.nan) for k, g in self._depvar_re_groups)
  257. t['stim'] = 'spont'
  258. else:
  259. raise ValueError(f"Unable to parse depvar line: {l}\nFile: {self._file.path} near position {f.tell()}")
  260. self.trials.append(t)
  261. ### "nevents=" line:
  262. l = f.nextline()
  263. if not l.startswith("nevents="):
  264. raise ValueError(f"nevents= expected at position {f.tell() - len(l) -1}")
  265. self.nevents.append(int(l[8:])) # 8 == len('nevents=')
  266. events = []
  267. for kevent in range(self.nevents[-1]):
  268. l = f.nextline()
  269. events.append(tuple(int(e) for e in l.split("\t")))
  270. self.events.append(events)
  271. ### Analog data (TRACE or STIMULUS)
  272. # Read next line, can be one of 'STIMULUS', 'TRACE' or a "depvar=" line
  273. l = f.nextline()
  274. while l in ('STIMULUS', 'TRACE'):
  275. ADKEY = l
  276. adkey = ADKEY.lower()
  277. adreadsize = getattr(self, f"_readsize_{adkey}")
  278. l = f.nextline()
  279. if not l.startswith("channel="):
  280. raise ValueError(f"channel= expected at position {f.tell() - len(l) -1}")
  281. adchannel = int(l[8:]) # 8 == len('channel=')
  282. # Now we are exactly at the start of the ad data:
  283. pos = f.tell()
  284. self._seek_ad[(ktrial, adkey, adchannel)] = pos
  285. # we skip reading it because we usually don't need to:
  286. f.seek(pos + adreadsize + 1)
  287. l = f.nextline()
  288. if not l == f"END_{ADKEY}":
  289. raise ValueError(f"END_{ADKEY} expected at position {f.tell() - len(l) -1}")
  290. # Read the next 'STIMULUS', 'TRACE' or a "depvar=" line:
  291. l = f.nextline()
  292. self.ntrials = len(self.trials)
  293. # Attributes made available by _parse_trials():
  294. _lazy_dict['trials'] = "_parse_trials"
  295. _lazy_dict['ntrials'] = "_parse_trials"
  296. _lazy_dict['nrasters'] = "_parse_trials"
  297. _lazy_dict['nevents'] = "_parse_trials"
  298. _lazy_dict['events'] = "_parse_trials"
  299. _lazy_dict['_seek_ad'] = "_parse_trials"
  300. def _parse_ad(self, adkey = None):
  301. """
  302. """
  303. if adkey is None:
  304. self._parse_ad(adkey = 'stimulus')
  305. self._parse_ad(adkey = 'trace')
  306. return
  307. assert adkey in ('stimulus', 'trace')
  308. channels = getattr(self, f"{adkey}_channels")
  309. arr = np.full(shape = (self.ntrials, getattr(self, f"{adkey}_len"), len(channels)),
  310. fill_value = np.nan, dtype = self._RASTER_DTYPE.newbyteorder('='))
  311. readsize = getattr(self, f"_readsize_{adkey}")
  312. with self._file as f:
  313. for ktrial in range(self.ntrials):
  314. for kchan, chan in enumerate(channels):
  315. f.seek(self._seek_ad[(ktrial, adkey, chan)])
  316. r = f.read(readsize)
  317. if isinstance(r, bytes):
  318. r = r.decode('ascii')
  319. arr[ktrial, :, kchan] = np.frombuffer(
  320. bytes.fromhex(r.replace('\n', '')),
  321. dtype=self._RASTER_DTYPE) #.astype('int16')
  322. setattr(self, adkey, arr)
  323. def _parse_stimulus(self):
  324. self._parse_ad('stimulus')
  325. _lazy_dict['stimulus'] = "_parse_stimulus"
  326. def _parse_trace(self):
  327. self._parse_ad('trace')
  328. _lazy_dict['trace'] = "_parse_trace"
  329. def _ad_sizes(self):
  330. """"""
  331. hexlen = 2 * self._RASTER_DTYPE.itemsize
  332. lam = lambda m: m + (m-1) // 80
  333. # TRACE:
  334. self.trace_len = int(1e-3 * self.params['epoch'] * self.params['adfc'] / self.params['ana']['decimate'])
  335. self._readsize_trace = lam(hexlen * self.trace_len)
  336. self.trace_channels = [k for k in (1, 2) if self.params['ana'][f'channel{k}'] == 'on']
  337. # STIMULUS:
  338. self.stimulus_len = int(1e-3 * self.params['epoch'] * self.params['dafc'] / self.params['stim']['decimate'])
  339. self._readsize_stimulus = lam(hexlen * self.stimulus_len)
  340. self.stimulus_channels = [1, 2] if self.params['stim']['save'] == 1 else []
  341. # Attributes made available by _ad_sizes():
  342. _lazy_dict['stimulus_len'] = "_ad_sizes"
  343. _lazy_dict['stimulus_channels'] = "_ad_sizes"
  344. _lazy_dict['trace_len'] = "_ad_sizes"
  345. _lazy_dict['trace_channels'] = "_ad_sizes"
  346. _lazy_dict['_readsize_stimulus'] = "_ad_sizes"
  347. _lazy_dict['_readsize_trace'] = "_ad_sizes"
  348. def _depvar_mapping(self):
  349. """"""
  350. # All newer versions (seen "2.7xx" and "2.8xx") use
  351. gen_vars = ['abi', 'itd', 'iid', 'bc', 'stim', 'mono', 'two_snd']
  352. partial_depvar = "; ".join(["([^; ]+)"] * len(gen_vars))
  353. self._depvar_re = re.compile("depvar= ?([-0-9]+) <{}>".format(partial_depvar)) # for .gen files: "depvar=([0-9]+) <{}>" would be sufficient
  354. self._depvar_re_groups = tuple((gen_vars[k], k+2) for k in range(len(gen_vars)))
  355. # Attributes made available by _depvar_mapping():
  356. _lazy_dict['_depvar_re'] = "_depvar_mapping"
  357. _lazy_dict['_depvar_re_groups'] = "_depvar_mapping"
  358. def planned_trials(self):
  359. return self.params['reps'] * np.prod([
  360. np.asarray(v).size for v in self.params[self.fileinfo.mod].values()
  361. ])
  362. class XDdata_legacy(XDdata):
  363. """Subclass of XDdata to handle specialties from older xdphys versions.
  364. For all public API documentation, see XDdata.
  365. Versions seen and supported:
  366. samplelength in the following description is calculated as:
  367. int(_adfc_ * _epoch_ * 1e-3 / _ana_decimate_)
  368. where _adfc_ is the sampling frequency in Hz (int) either from the ana-file
  369. or from the PARAMS section, _epoch_ is the recording length in milliseconds,
  370. _ana_decimate_ is the decimation factor used to reduce data, effectively
  371. reducing the sampling frequency by the this factor.
  372. == v2.47 before June 1999 (exclusive) ==
  373. * "depvar"-lines are "mod"-specific
  374. * contains no analog data in the same files
  375. * MAY have an ana-file (.ana or .ana.gz) containing one analog trace channel
  376. * ana files start with b'W1' and contain chunks of data for every trial:
  377. - 512 bytes "header", repeated for every trial
  378. - 2 * samplelength bytes analog trace (binary, big endian int16)
  379. == v2.47 between June and July 1999 (both inclusive) ==
  380. * "depvar"-lines include five fields: 'abi', 'itd', 'iid', 'bc', 'stim'
  381. * contains analog data as HEX values, but not surrounded by TRACE/END_TRACE
  382. * MAY have an ana-file (.ana or .ana.gz) containing one analog trace channel
  383. * ana files start with b'2W' and contain a global header followed by chunks
  384. of data for each trial:
  385. - first 4 bytes indicate version (?) b'2W', 3rd and 4th byte unknown
  386. - global header includes the following fields [bytelength] as
  387. null-terminated strings, each written as fieldname=value:
  388. + adFc [15]
  389. + epoch [15]
  390. + ana-decimate [15]
  391. + ana-tomv [30]
  392. + ana-offset [30]
  393. + nrasters [15]
  394. Total header length, including first 4 bytes is 124 bytes
  395. - For every trial, there is a chunk:
  396. + depvar [30], similar to the header fields
  397. + 2 * samplelength bytes analog trace (binary, little endian int16)
  398. A specialty of these files is that the traces are saved in two places. It
  399. seemed (in some examples) that these traces are identical. The HEX values
  400. will be used to fill the XDdata_legacy.trace values for these files.
  401. However, the _parse_anafile(), if called explicitly, will add another
  402. attribute XDdata_legacy.ana_trace which contains traces from the ana-file.
  403. It will also attempt to create an index list such that the following is
  404. true:
  405. # needed to make ana_trace and ana_ktrial available:
  406. xfile._parse_anafile()
  407. xfile.ana_trace[xfile.ana_ktrial, :, :] == xfile.trace
  408. == v2.47 after July 1999 (exclusive) and all higher versions ==
  409. Files are organized as in the most recent version (which is 2.8.1-1). The
  410. XDdata_legacy class is not needed for any of these versions as they can be
  411. handled with (slightly) higher performance by the XDdata base class.
  412. """
  413. # TODO: Check if there is a .ana file and implement reading it, see legacy_xdphys for how that works!
  414. # We have to copy the _lazy_dict to make changes of our own:
  415. _lazy_dict = XDdata._lazy_dict.copy()
  416. _XDPHYS_MIN_VER = '2.47'
  417. def __init__(self, filepath):
  418. super().__init__(filepath)
  419. # Eventually, we have a separate file with analog data:
  420. if self.ana_path is not None:
  421. self.has_anafile = True
  422. self._anafile = FileHandler(self.ana_path, mode='rb')
  423. else:
  424. self.has_anafile = False
  425. if self.fileinfo.ver >= super()._XDPHYS_MIN_VER:
  426. import warnings
  427. print(f'{self!s}, {self.fileinfo}')
  428. warnings.warn("For performance reasons, it is not advised to use"
  429. " XDdata_legacy for files also supported by XDdata.")
  430. def _find_anafile_path(self):
  431. fn = (lambda fn: fn[:-3] if fn.endswith('.gz') else fn)(self.path)
  432. if os.path.exists(fn + '.ana'):
  433. self.ana_path = fn + '.ana'
  434. elif os.path.exists(fn + '.ana.gz'):
  435. self.ana_path = fn + '.ana.gz'
  436. else:
  437. self.ana_path = None
  438. _lazy_dict['ana_path'] = '_find_anafile_path'
  439. def _legacy_version(self):
  440. if self.params['timestamp'] <= 928195200:
  441. # Versions before June 1999,
  442. # 928195200 is 1999-06-01T00:00:00+00:00
  443. self.v247_subversion = '1999-05'
  444. elif (self.params['timestamp'] > 928195200 and
  445. self.params['timestamp'] <= 933465600):
  446. # Versions in June and July 1999,
  447. # 928195200 is 1999-06-01T00:00:00+00:00
  448. # 933465600 is 1999-08-01T00:00:00+00:00
  449. self.v247_subversion = '1999-06'
  450. elif self.params['timestamp'] > 933465600:
  451. # This pattern was encountered after July 1999,
  452. # 933465600 is 1999-08-01T00:00:00+00:00
  453. self.v247_subversion = '1999-08'
  454. else:
  455. raise ValueError(f"Sorry, can't figure out how to read your xdphys file {self.path}")
  456. _lazy_dict['v247_subversion'] = '_legacy_version'
  457. def _depvar_mapping(self):
  458. """"""
  459. # Support for various patterns in old files (version 2.47)
  460. if self.fileinfo.ver > '2.47' or self.v247_subversion >= '1999-08':
  461. super()._depvar_mapping()
  462. return
  463. elif self.v247_subversion <= '1999-05':
  464. # These patterns were encountered in versions before June 1999,
  465. # At this time, patterns were specific to the "mod" used:
  466. if self.fileinfo.mod == 'itd':
  467. self._depvar_re = re.compile("depvar=([-0-9]+) <\\1 us>")
  468. self._depvar_re_groups = (('itd', 1),)
  469. elif self.fileinfo.mod == 'iid':
  470. self._depvar_re = re.compile("depvar=([-0-9]+) <\\1 db, L=[-0-9]+, R=[-0-9]+>")
  471. self._depvar_re_groups = (('iid', 1),)
  472. elif self.fileinfo.mod == 'bf':
  473. self._depvar_re = re.compile("depvar=([-0-9]+) <\\1 Hz>")
  474. self._depvar_re_groups = (('stim', 1),)
  475. elif self.fileinfo.mod == 'abi':
  476. self._depvar_re = re.compile("depvar=([-0-9]+) <\\1 dbspl>")
  477. self._depvar_re_groups = (('abi', 1),)
  478. elif self.fileinfo.mod == 'nop':
  479. self._depvar_re = re.compile("depvar=([-0-9]+) ;rep number")
  480. # It does contain the index (group 1), which we ignore:
  481. self._depvar_re_groups = () # (('index', 1),)
  482. elif self.fileinfo.mod == 'rov':
  483. self._depvar_re = re.compile("depvar=([-0-9]+) <([-0-9]+) us, ([-0-9]+) db> ;itd, iid")
  484. # It does contain some weird index number (group 1), which we ignore:
  485. self._depvar_re_groups = (('itd', 2), ('iid', 3)) # (('indexnum', 1), ('itd', 2), ('iid', 3))
  486. else:
  487. raise NotImplementedError(f"Unknown depvar mapping for xdphys file with version={self.fileinfo.ver} mod={self.fileinfo.mod}")
  488. elif self.v247_subversion == '1999-06':
  489. # This pattern was only encountered in June and July 1999
  490. # There were no 'mono' or 'two_snd' columns:
  491. gen_vars = ['abi', 'itd', 'iid', 'bc', 'stim']
  492. partial_depvar = "; ".join(["([^; ]+)"] * len(gen_vars))
  493. self._depvar_re = re.compile("depvar= ?([-0-9]+) <{}>".format(partial_depvar)) # for .gen files: "depvar=([0-9]+) <{}>" would be sufficient
  494. self._depvar_re_groups = tuple((gen_vars[k], k+2) for k in range(len(gen_vars)))
  495. else:
  496. raise NotImplementedError(f"Unknown depvar mapping for xdphys file with version={self.fileinfo.ver} mod={self.fileinfo.mod}")
  497. def _parse_trials(self):
  498. if self.fileinfo.ver >= super()._XDPHYS_MIN_VER:
  499. # Higher version's files are parsed with the XDdata function:
  500. super()._parse_trials()
  501. return
  502. #
  503. with self._file as f:
  504. # seek RASTERDATA line
  505. f.seek(self._seek_rasterdata)
  506. # Next line must be "RASTERDATA":
  507. if not f.nextline() == "RASTERDATA":
  508. raise ValueError(f"RASTERDATA expected at position {self._seek_rasterdata}")
  509. # "nrasters=" line:
  510. l = f.nextline()
  511. if not l.startswith("nrasters="):
  512. raise ValueError(f"nrasters= expected in RASTERDATA at position {self._seek_rasterdata}")
  513. self.nrasters = int(l[9:]) # 9 == len('nrasters=')
  514. self.trials = []
  515. self.nevents = []
  516. self.events = []
  517. self._seek_ad = {}
  518. ### first "depvar=" line:
  519. l = f.nextline()
  520. for ktrial in range(self.nrasters):
  521. ### "depvar=" line (read before loop or at the end of last iteration):
  522. if l == "END_RASTERDATA" or l == '':
  523. # No more trials, go back:
  524. break
  525. t = OrderedDict(index = ktrial)
  526. m = self._depvar_re.fullmatch(l)
  527. if m:
  528. t.update((k, self._convert_value(m.group(g))) for k, g in self._depvar_re_groups)
  529. elif l == 'depvar=-6666 <SPONT>':
  530. # "spontaneous" trial have no params, fill with NaN:
  531. # changed to leave them empty (pandas can handle this)
  532. #t.update((k, np.nan) for k, g in self._depvar_re_groups)
  533. t['stim'] = 'spont'
  534. else:
  535. raise ValueError(f"Unable to parse depvar line: {l}")
  536. self.trials.append(t)
  537. ### "nevents=" line:
  538. l = f.nextline()
  539. if not l.startswith("nevents="):
  540. raise ValueError(f"nevents= expected at position {f.tell() - len(l) -1}")
  541. self.nevents.append(int(l[8:])) # 8 == len('nevents=')
  542. events = []
  543. for kevent in range(self.nevents[-1]):
  544. l = f.nextline()
  545. events.append(tuple(int(e) for e in l.split("\t")))
  546. self.events.append(events)
  547. ### Analog data
  548. if self.fileinfo.ver > '2.47':
  549. # Read next line, can be one of 'STIMULUS', 'TRACE' or a "depvar=" line
  550. l = f.nextline()
  551. while l in ('STIMULUS', 'TRACE'):
  552. ADKEY = l
  553. adkey = ADKEY.lower()
  554. adreadsize = getattr(self, f"_readsize_{adkey}")
  555. l = f.nextline()
  556. if not l.startswith("channel="):
  557. raise ValueError(f"channel= expected at position {f.tell() - len(l) -1}")
  558. adchannel = int(l[8:]) # 8 == len('channel=')
  559. # Now we are exactly at the start of the ad data:
  560. pos = f.tell()
  561. self._seek_ad[(ktrial, adkey, adchannel)] = pos
  562. # we skip reading it because we usually don't need to:
  563. f.seek(pos + adreadsize + 1)
  564. l = f.nextline()
  565. if not l == f"END_{ADKEY}":
  566. raise ValueError(f"END_{ADKEY} expected at position {f.tell() - len(l) -1}")
  567. # Read the next 'STIMULUS', 'TRACE' or a "depvar=" line:
  568. l = f.nextline()
  569. else:
  570. # For ver 2.47 file, there only is one trace saved in the
  571. # same file - if there is one at all.
  572. if not hasattr(self, '_v247_ad_in_file'):
  573. # We have to check if trace data is saved by probing the
  574. # first trial...
  575. # Remember where it started:
  576. pos = f.tell()
  577. # read one line to see if there is analog data at all:
  578. self._v247_ad_in_file = not f.nextline().startswith('depvar=')
  579. # Go back to where we started probing:
  580. f.seek(pos)
  581. if self._v247_ad_in_file:
  582. # Use default info, i.e. TRACE, channel=1
  583. ADKEY = 'TRACE' # default value
  584. adkey = ADKEY.lower()
  585. adreadsize = getattr(self, f"_readsize_{adkey}")
  586. adchannel = 1 # default value
  587. # Now we are exactly at the start of the ad data:
  588. pos = f.tell()
  589. self._seek_ad[(ktrial, adkey, adchannel)] = pos
  590. # we skip reading it because we usually don't need to:
  591. f.seek(pos + adreadsize + 1)
  592. # Read the next "depvar=" line:
  593. l = f.nextline()
  594. self.ntrials = len(self.trials)
  595. def _parse_anafile(self):
  596. assert self.has_anafile, f"There is no corresponding ana-file for {self!s}"
  597. if self.v247_subversion <= '1999-05':
  598. with self._anafile as f:
  599. # Probe version:
  600. head_chunk = f.read(16)
  601. assert head_chunk[0:2] == b'W1', f"This ana-file version cannot currently be read {head_chunk}"
  602. chunksize = (512 + self._readsize_trace)
  603. # go back to start to read trial by trial:
  604. self.trace = np.full(shape = (self.ntrials, self.trace_len, len(self.trace_channels)),
  605. fill_value = np.nan,
  606. dtype = self._RASTER_DTYPE.newbyteorder('='))
  607. self.ana_trial_heads = []
  608. for ktrial in range(self.ntrials):
  609. f.seek(ktrial * chunksize)
  610. chunk = f.read(chunksize)
  611. if not chunk[4:16] == head_chunk[4:16]:
  612. raise ValueError("Error reading ana-file '{fn}' at trial #{kt}")
  613. self.trace[ktrial, :, 0] = np.frombuffer(chunk[512:chunksize], dtype=self._RASTER_DTYPE)
  614. self.ana_trial_heads.append(chunk[:512])
  615. elif self.v247_subversion == '1999-06':
  616. # This code will never run as part of a lazy-evaluation routine,
  617. # because files in question already have the analog data saved
  618. # internally as hex-values.
  619. # TODO: this is NOT yet finished!!!
  620. with self._anafile as f:
  621. # Header of four bytes:
  622. head_chunk = f.read(4)
  623. assert head_chunk[0:2] == b'2W', f"This ana-file version cannot currently be read {head_chunk}"
  624. # Global header with fields:
  625. fields = [('adFc', 15), ('epoch', 15), ('ana-decimate', 15), ('ana-tomv', 30), ('ana-offset', 30), ('nrasters', 15)]
  626. d = OrderedDict()
  627. for fieldname, fieldlength in fields:
  628. fieldcontent = f.read(fieldlength).rstrip(b'\n\x00').decode('ascii')
  629. assert fieldcontent.startswith(fieldname), f"Field {fieldname} expected, but got {fieldcontent}"
  630. d[self._clean_key(fieldname)] = self._convert_value(fieldcontent[len(fieldname)+1:])
  631. # We already know all this, let's make sure it's correct:
  632. assert d['adfc'] == self.params['adfc'], 'Mismatch "adfc" between ana-file "{self.ana_path!s}" and params in {self!s}'
  633. assert d['epoch'] == self.params['epoch'], 'Mismatch "epoch" between ana-file "{self.ana_path!s}" and params in {self!s}'
  634. assert d['ana_decimate'] == self.params['ana']['decimate'], 'Mismatch "ana_decimate" between ana-file "{self.ana_path!s}" and params in {self!s}'
  635. assert d['ana_tomv'] == self.params['ana']['tomv'], 'Mismatch "ana_tomv" between ana-file "{self.ana_path!s}" and params in {self!s}'
  636. assert d['ana_offset'] == self.params['ana']['offset'], 'Mismatch "ana_offset" between ana-file "{self.ana_path!s}" and params in {self!s}'
  637. assert d['nrasters'] == self.ntrials, 'Mismatch between "nrasters" in ana-file "{self.ana_path!s}" and ntrials in {self!s}'
  638. # In order to align traces from ana-file and from HEX data,
  639. # figure out what depvar was used, and get the trials' values:
  640. depvar_key = self.params['depvar'].split(" ")[0]
  641. if not depvar_key == 'gen':
  642. if depvar_key == 'bf':
  643. depvar_key = 'stim'
  644. tdepvars = [t[depvar_key] for t in self.trials]
  645. self.ana_ktrial = [None] * self.ntrials
  646. self.ana_trace = np.full(shape = (self.ntrials, self.trace_len, 1),
  647. fill_value = np.nan,
  648. dtype = self._RASTER_DTYPE.newbyteorder('='))
  649. for kana in range(self.ntrials):
  650. depvarcontent = f.read(30).rstrip(b'\n\x00').decode('ascii')
  651. depvar = self._convert_value(depvarcontent[7:]) # 7 == len('depvar')+1
  652. self.ana_trace[kana,:,0] = np.fromfile(f, dtype=self._RASTER_DTYPE.newbyteorder('<'), count=self.trace_len)
  653. # See if we can find the same in self.trace
  654. if not depvar_key == 'gen':
  655. try:
  656. ktrial = tdepvars.index(depvar)
  657. while (not np.all(self.trace[ktrial,:,0] == self.ana_trace[kana,:,0])):
  658. ktrial = tdepvars.index(depvar, ktrial+1)
  659. tdepvars[ktrial] = None
  660. self.ana_ktrial[ktrial] = kana
  661. except ValueError:
  662. # This happens if no matching trace in self.trace was found:
  663. pass
  664. else:
  665. raise NotImplementedError("xdphys versions after July 1999 are not know to have ana files.")
  666. def _ad_sizes(self):
  667. """"""
  668. if self.fileinfo.ver >= super()._XDPHYS_MIN_VER:
  669. # Higher version's files are parsed with the XDdata function:
  670. super()._ad_sizes()
  671. return
  672. # version 2.7x support:
  673. if self.fileinfo.ver >= '2.7':
  674. hexlen = 2 * self._RASTER_DTYPE.itemsize
  675. lam = lambda m: m + (m-1) // 80
  676. # TRACE:
  677. self.trace_len = int(1e-3 * self.params['epoch'] * self.params['adfc'] / self.params['ana']['decimate'])
  678. self._readsize_trace = lam(hexlen * self.trace_len)
  679. self.trace_channels = [k for k in (1, 2) if self.params['ana'][f'channel{k}'] == 'on']
  680. # No stimulus in v2.7x files:
  681. self.stimulus_len = int(1e-3 * self.params['epoch'] * self.params['dafc'])
  682. self.stimulus_channels = []
  683. self._readsize_stimulus = 0
  684. return
  685. # version 2.47 support:
  686. hexlen = 2 * self._RASTER_DTYPE.itemsize
  687. lam = lambda m: m + (m-1) // 80
  688. if self.v247_subversion <= '1999-05':
  689. self.params['ana'] = OrderedDict()
  690. if self.has_anafile:
  691. with self._anafile as f:
  692. # Get information from ana-file that is usually in PARAMS
  693. head_chunk = f.read(16)
  694. assert head_chunk[0:2] == b'W1', f"Unexpected ana-file format."
  695. samplingrate, samplelength, chan = np.frombuffer(head_chunk[4:16], dtype=np.dtype('int32').newbyteorder('>'))
  696. self.trace_len = int(samplelength)
  697. self.trace_channels = [int(chan)]
  698. # For reading binary data, _not_ HEX values:
  699. self._readsize_trace = self._RASTER_DTYPE.itemsize * self.trace_len
  700. self.params['ana']['save'] = 1
  701. self.params['ana']['decimate'] = int(self.params['adfc']/samplingrate)
  702. # TODO: do we have to guess the following?
  703. self.params['ana']['every'] = 1
  704. self.params['ana']['tomv'] = 0.306523
  705. self.params['ana']['offset'] = 0.000000
  706. else:
  707. # These values are "made up", but should somehow work as they are
  708. self.trace_len = int(1e-3 * self.params['epoch'] * self.params['adfc'])
  709. self.trace_channels = []
  710. self._readsize_trace = 0
  711. self.params['ana']['save'] = 0
  712. elif self.v247_subversion >= '1999-06':
  713. # This works for the short lived mid-1999 and later versions
  714. self.trace_len = int(1e-3 * self.params['epoch'] * self.params['adfc'] / self.params['ana']['decimate'])
  715. self.trace_channels = [1] if self.params['ana']['save'] == 1 else []
  716. self._readsize_trace = lam(hexlen * self.trace_len)
  717. # All of the 2.47 file have no stimulus:
  718. self.stimulus_len = int(1e-3 * self.params['epoch'] * self.params['dafc'])
  719. self.stimulus_channels = []
  720. self._readsize_stimulus = 0
  721. def _parse_params(self):
  722. super()._parse_params()
  723. if self.v247_subversion <= '1999-05':
  724. # In these files, we only get all the information from probing
  725. # the ana-file (if one exists). This will extend self.params:
  726. self._ad_sizes()
  727. def _parse_ad(self, adkey = 'trace'):
  728. if self.v247_subversion >= '1999-06':
  729. super()._parse_ad(adkey)
  730. elif adkey == 'trace' and self.has_anafile:
  731. self._parse_anafile()
  732. else:
  733. if adkey == 'trace':
  734. self.trace = np.full(shape = (self.ntrials, self.trace_len, len(self.trace_channels)),
  735. fill_value = np.nan,
  736. dtype = self._RASTER_DTYPE.newbyteorder('='))
  737. elif adkey == 'stimulus':
  738. self.stimulus = np.full(shape = (self.ntrials, self.stimulus_len, len(self.stimulus_channels)),
  739. fill_value = np.nan,
  740. dtype = self._RASTER_DTYPE.newbyteorder('='))
  741. class XDcal(XDbase):
  742. """XDcal object represents a single xdphys calibration file
  743. """
  744. # Minimum supported xdphys version, use XDdata_legacy for older versions
  745. _XDPHYS_MIN_VER = '2.8.0'
  746. _XDPHYS_PROG_NAME = 'xcalibur'
  747. # @see: XDbase._lazy_dict
  748. _lazy_dict = XDbase._lazy_dict.copy()
  749. def _parse_caldata(self):
  750. with self._file as f:
  751. # seek RASTERDATA line
  752. f.seek(self._seek_rasterdata)
  753. # Next line must be "RASTERDATA":
  754. if not f.nextline() == "RASTERDATA":
  755. raise ValueError(f"RASTERDATA expected at position {self._seek_rasterdata}")
  756. self.raster_comments = []
  757. while True:
  758. # "nrasters=" line:
  759. l = f.nextline()
  760. if l.startswith("; "):
  761. self.raster_comments.append(l.strip("; "))
  762. continue
  763. if not l.startswith("nrasters="):
  764. raise ValueError(f"nrasters= expected in RASTERDATA at position {self._seek_rasterdata}")
  765. self.nrasters = int(l[9:]) # 9 == len('nrasters=')
  766. break
  767. self.calcomments = {}
  768. for cmt in self.raster_comments:
  769. m = re.fullmatch("([0-9]+): ([a-z_]+)(?: ?(.*))?", cmt)
  770. if m is None:
  771. raise ValueError(f"Unknown comment: '{cmt}'")
  772. colnum, varname, description = m.groups()
  773. self.calcomments[int(colnum)] = (varname, description)
  774. self.caldata_wrapped = pd.DataFrame(
  775. np.loadtxt([f.nextline() for k in range(self.nrasters)]),
  776. columns=[self.calcomments[kc][0] for kc in sorted(self.calcomments.keys())],
  777. ).set_index("freq")
  778. f_seek_endpos = f.tell()
  779. l = f.nextline()
  780. if not l == "END_RASTERDATA":
  781. raise ValueError(f"END_RASTERDATA expected at position {f_seek_endpos}")
  782. # Finally, unwrap '_phase' columns:
  783. self.caldata = self.caldata_wrapped.copy()
  784. for c in self.caldata.columns[self.caldata.columns.str.endswith('_phase')]:
  785. self.caldata[c] = np.unwrap(self.caldata[c])
  786. _lazy_dict['nrasters'] = "_parse_caldata"
  787. _lazy_dict['raster_comments'] = "_parse_caldata"
  788. _lazy_dict['caldata_wrapped'] = "_parse_caldata"
  789. _lazy_dict['caldata'] = "_parse_caldata"
  790. def interpolate_caldata(self, freqs, interpolate_phase_as_delay = False, caldata = None):
  791. """Return linearly interpolated caldata for use in calibration/decalibration.
  792. To be used for transforming a stimulus (calibration or decalibration), the given
  793. caldata must be interpolated to provide values for every (dFFT) frequency that shall
  794. be affected by the transformation. In xdphys (??) and BEDS, this interpolation is
  795. done linearly between frequencies present in the original caldata.
  796. """
  797. if caldata is None:
  798. caldata = self.caldata
  799. caldata_inter = pd.DataFrame([],
  800. columns=caldata.columns,
  801. index=pd.Series(freqs, name=caldata.index.name)
  802. )
  803. for c in caldata_inter.columns:
  804. if interpolate_phase_as_delay and c.endswith('_phase'):
  805. # This was done in BEDS (don't know about xdphys yet) but the effect is minimal
  806. # Converts phases to delays (in seconds), then interpolates, then converts back
  807. caldata_inter[c] = (2 * np.pi * freqs) * np.interp(freqs, caldata.index, caldata[c] / (2 * np.pi * caldata.index), left = np.nan, right = np.nan)
  808. else:
  809. caldata_inter[c] = np.interp(freqs, caldata.index, caldata[c], left = np.nan, right = np.nan)
  810. return caldata_inter
  811. class FileHandler():
  812. """A context handler class that keeps track of how often it is used
  813. """
  814. def __init__(self, path, mode='r'):
  815. self.path = path
  816. self.mode = mode
  817. self.handle_count = 0
  818. def __enter__(self):
  819. if self.handle_count == 0:
  820. if self.path.endswith('.gz'):
  821. self.file_obj = gzip.open(self.path, self.mode)
  822. else:
  823. self.file_obj = open(self.path, self.mode)
  824. # Read a small chunk to see if we'll get bytes or str, then
  825. # bind appropriate nextline function:
  826. chunk = self.file_obj.read(1)
  827. # Set pointer back to first byte
  828. self.file_obj.seek(0)
  829. if isinstance(chunk, str):
  830. self.file_obj.nextline = MethodType(self._nextline_str, self.file_obj)
  831. else:
  832. self.file_obj.nextline = MethodType(self._nextline_bytes, self.file_obj)
  833. self.handle_count += 1
  834. return self.file_obj
  835. def __exit__(self, errtype, value, traceback):
  836. self.handle_count -= 1
  837. if self.handle_count == 0:
  838. self.file_obj.close()
  839. del self.file_obj
  840. @staticmethod
  841. def _nextline_str(file_obj):
  842. """Read the nextline of file_obj. Removes trailing newlines."""
  843. return file_obj.readline().rstrip('\r\n')
  844. @staticmethod
  845. def _nextline_bytes(file_obj):
  846. """Read the nextline of file_obj. Removes trailing newlines."""
  847. return file_obj.readline().decode('ascii').rstrip('\r\n')