stimuli_parser.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. import pandas as pd
  2. import view.python_core.p1_class.metadata_related as metadata_related
  3. class StimuliParams(pd.DataFrame):
  4. def __init__(self, **kwargs):
  5. super().__init__(**kwargs)
  6. def iter_stimuli(self):
  7. return self.iterrows()
  8. class StimuliParamsParser(object):
  9. def __init__(self):
  10. self.param_def_df = pd.DataFrame()
  11. self.param_def_df["column_name"] = ["StimON", "StimOFF", "StimONms", "StimLen", "Odour", "OConc"]
  12. self.param_def_df["expected_type"] = [int, int, float, int, str, float]
  13. self.param_def_df["second_stim"] = ["Stim2ON", "Stim2OFF", "Stim2ONms", "Stim2Len", "Odour", "OConc"]
  14. self.param_def_df.set_index("column_name", inplace=True)
  15. self.metadata_def = metadata_related.MetadataDefinition()
  16. def get_specified_unspecified_params(self, row: pd.Series):
  17. specified_params = []
  18. unspecified_params = []
  19. for column_name in self.param_def_df.index.values:
  20. if column_name in row:
  21. if self.metadata_def.is_value_default(column_name, row[column_name]):
  22. unspecified_params.append(column_name)
  23. else:
  24. specified_params.append(column_name)
  25. return specified_params, unspecified_params
  26. def parse_row(self, row: pd.Series):
  27. specified_params, unspecified_params = self.get_specified_unspecified_params(row)
  28. if len(specified_params) == 0:
  29. return StimuliParams()
  30. else:
  31. # count the number of commas in each entry
  32. commas_per_specified_param = {x: row[x].count(",") if type(row[x]) is str else 0
  33. for x in specified_params}
  34. # infer the number of stimuli
  35. n_stim = max(commas_per_specified_param.values()) + 1
  36. # ensure that each specified parameter either has one entry or <n_stim> number of entries
  37. assert all(x in (n_stim - 1, 0) for x in commas_per_specified_param.values()), \
  38. "Some stimulus parameters have more than one entry and these parameters are not equal in number"
  39. # initialize a data frame with param names as columns and stimuli number as indices
  40. param_values = StimuliParams(columns=self.param_def_df.index,
  41. index=pd.RangeIndex(0, n_stim, 1), dtype=object)
  42. # fill all unspecified parameters with None
  43. param_values.loc[:, unspecified_params] = None
  44. # fill specified parameters, duplicating where necessary
  45. for specified_param, n_commas in commas_per_specified_param.items():
  46. # for all entries with atleast one comma
  47. if (n_commas == n_stim - 1) and (n_stim > 1):
  48. param_values.loc[:, specified_param] \
  49. = [self._parse_comma_separated_entry(specified_param, x.replace(" ", ""))
  50. for x in row[specified_param].split(',')]
  51. else:
  52. param_values.loc[:, specified_param] \
  53. = self._parse_comma_separated_entry(specified_param, row[specified_param])
  54. # when stimuli parameter entries have no commas, add parameters for second stimulus from
  55. # entries of STIM2ON etc, if they have been specified
  56. if n_stim == 1:
  57. second_param_values = {}
  58. for param_name, second_param_name in self.param_def_df["second_stim"].items():
  59. if not self.metadata_def.is_value_default(second_param_name, row[second_param_name]):
  60. second_param_values[param_name] \
  61. = self._parse_comma_separated_entry(param_name, row[second_param_name])
  62. else:
  63. second_param_values[param_name] = None
  64. # only when at least one of the four parameters starting with "Stim" are specified
  65. if any(v is not None for k, v in second_param_values.items() if k.startswith("Stim")):
  66. param_values_new = StimuliParams(columns=self.param_def_df.index,
  67. index=pd.RangeIndex(0, 2, 1), dtype=object)
  68. param_values_new.loc[0, :] = param_values.loc[0, :]
  69. for param_name, second_param_value in second_param_values.items():
  70. param_values_new.loc[1, param_name] = second_param_value
  71. param_values = param_values_new
  72. return param_values
  73. def _parse_comma_separated_entry(self, param_name, param_value):
  74. expected_type = self.param_def_df.loc[param_name, "expected_type"]
  75. if param_value == '' or pd.isnull(param_value):
  76. return None
  77. else:
  78. try:
  79. return expected_type(param_value)
  80. except ValueError as ve:
  81. raise ValueError(
  82. f"Could not interpret '{param_value}' in the column '{param_name}' as an {expected_type}."
  83. )