#!/user/bin/env python # coding=utf-8 """ @author: yannansu @created at: 23.09.21 16:07 This module loads and reads data from `raw_data/subject_idx.yaml` Example usage: LD = LoadData('test', data_path='pilot_data') activ_df = LD.read_activity() df = LD.read_data() """ import numpy as np import pandas as pd import datetime import os import yaml from data_analysis.yml2dict import yml2dict class LoadData: def __init__(self, sub, data_path=None, sel_cfg=None, sel_par=None, sel_ses=None, rm_ses=None, start_date=None, sel_ses_idx=None): """ Select and load data. :param sub: subject name :param data_path: data repository, default: 'data' :param sel_cfg: selected config keywords :param sel_par: selected param keywords :param sel_ses: selected session keywords :param rm_ses: removed session keywords :param start_date: selected starting date, e.g. '20210000 :param sel_ses_idx: selected session index """ self.sub = sub self.data_path = data_path if self.data_path is None: self.data_path = 'data' self.sel_cfg = sel_cfg self.sel_par = sel_par self.sel_ses = sel_ses self.rm_ses = rm_ses self.start_date = start_date self.sel_ses_idx = sel_ses_idx def read_activity(self): """ Read subject activity log with given selectors. :return: a dataframe listing a summary of the selected session """ # Read xrl file line by line activ_log = yml2dict(os.path.join(self.data_path, self.sub, self.sub + '.yaml')) activ_df = pd.DataFrame(activ_log).T # Select finished blocks activ_df = activ_df[activ_df.status == 'completed'] # Filter data by input selector if self.sel_cfg is not None: cfg_pattern = '|'.join(self.sel_cfg) activ_df = activ_df[activ_df.cfg_file.str.contains(cfg_pattern)] if self.sel_par is not None: par_pattern = '|'.join(self.sel_par) activ_df = activ_df[activ_df.par_file.str.contains(par_pattern)] # Restrict specific sessions by date or time if self.sel_ses is not None: activ_df = activ_df.filter(like=self.sel_ses, axis=0) if self.rm_ses is not None: activ_df = activ_df.drop(self.rm_ses, axis=0) if self.start_date is not None: activ_df = activ_df[activ_df.index >= self.start_date] if self.sel_ses_idx is not None: activ_df = activ_df.groupby('par_file').nth(self.sel_ses_idx) activ_df = activ_df.reset_index() if 'index' in activ_df.columns: activ_df = activ_df.drop(columns=['index']) return activ_df def read_data(self, save_csv=None): """ Read data from selected session. :param save_csv: '*.csv', save data in csv 'data/subject_xx/*.csv' if not None :return: a dataframe of trial-based data of all selected sessions """ # Read activity log activ_df = self.read_activity() # Read data files yml_list = activ_df.data_file.to_list() df_list = [] # Separate trials and save to dataframe for block_idx, yml in zip(activ_df.block_idx, yml_list): yml_data = pd.DataFrame({k: v for k, v in yml2dict(yml).items() if (k.startswith('trial') and any(c.isdigit() for c in k)) }).T yml_data['block_index'] = block_idx yml_data['sub'] = self.sub yml_data = yml_data.reset_index(drop=True) df_list.append(yml_data) df = pd.concat(df_list, ignore_index=True) if save_csv is not None: df.to_csv(os.path.join(self.data_path, self.sub, save_csv)) return df """ # Test LD = LoadData('test', data_path='pilot_data') activ_df = LD.read_activity() df = LD.read_data() """ # LoadData('s1', data_path='pilot2_data', sel_par='config/pilot2_param.yaml', start_date='20211207').read_data()