# -*- coding: utf-8 -*- """ Created on Mon Jun 24 08:37:33 2024 @author: arefk """ import os import pandas as pd import numpy as np # Get the directory where the code file is located code_dir = os.path.dirname(os.path.abspath(__file__)) # Get the parent directory of the code directory parent_dir = os.path.dirname(code_dir) # Specify the input file path relative to the code file input_file_path = os.path.join(parent_dir, 'input', 'Ninox_data_overview_all.csv') final_csv = os.path.join(parent_dir, 'input', 'Ninox_data_overview_all_precheck.csv') # Read the CSV file into a Pandas DataFrame df = pd.read_csv(input_file_path) # Column names that need to be processed columns_to_process = [ 'RightHindlimbDrop_RB', 'Speed_RB', 'Distance_RB', 'PercentRightPawDragPerTouch_CT', 'PercentLeftTouch_CT', 'TotalNumberOfTouches_CT', 'NumberOfFootFaultsDividedByTotalNumber_GW' ] columns_short_name = [ 'HLD_RB_count', 'Speed_RB_count', 'Distance_RB_count', 'PawDrang_CT_count', 'PercentLeftTouch_CT_count', 'TotalNumberOfTouches_CT_count', 'FootFaults_GW_count' ] # Initialize new columns in the DataFrame df['NumberOfTimepoints'] = 0 for col in columns_short_name: df[col] = 0 df['TimepointEqualsElements'] = False # Iterate through each unique StudyID for ss in df["StudyID"].unique(): temp_df = df[df["StudyID"] == ss] idx = temp_df.index # Calculate the number of time points time_points = temp_df["TimePointsBehavior"].values[0].split(", ") num_timepoints = len(time_points) # Update the NumberOfTimepoints column df.loc[idx, 'NumberOfTimepoints'] = num_timepoints # Check the number of elements in each column to process all_counts_match = True for col, short_col in zip(columns_to_process, columns_short_name): value = temp_df[col].values[0] if isinstance(value, str): elements = value.split(", ") element_count = len(elements) else: element_count = 0 # or handle NaN differently if needed # Update the corresponding count column df.loc[idx, short_col] = element_count # Check if the count matches the number of time points if element_count != num_timepoints: all_counts_match = False # Update the TimepointEqualsElements column df.loc[idx, 'TimepointEqualsElements'] = all_counts_match # Save the processed DataFrame to a new CSV file df.to_csv(final_csv, index=False)