doi
/
2024_Kalantari_PRR
forked from Aswendt_Lab/2024_Kalantari_PRR


			
			
				
					
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
							# -*- coding: utf-8 -*-
"""
Created on Mon Jun 24 08:37:33 2024

@author: arefk
"""

import os
import pandas as pd
import numpy as np

# Get the directory where the code file is located
code_dir = os.path.dirname(os.path.abspath(__file__))

# Get the parent directory of the code directory
parent_dir = os.path.dirname(code_dir)

# Specify the input file path relative to the code file
input_file_path = os.path.join(parent_dir, 'input', 'Ninox_data_overview_all.csv')
final_csv = os.path.join(parent_dir, 'input', 'Ninox_data_overview_all_precheck.csv')

# Read the CSV file into a Pandas DataFrame
df = pd.read_csv(input_file_path)

# Column names that need to be processed
columns_to_process = [
    'RightHindlimbDrop_RB', 'Speed_RB', 'Distance_RB',
    'PercentRightPawDragPerTouch_CT', 'PercentLeftTouch_CT',
    'TotalNumberOfTouches_CT', 'NumberOfFootFaultsDividedByTotalNumber_GW'
]
columns_short_name = [
    'HLD_RB_count', 'Speed_RB_count', 'Distance_RB_count',
    'PawDrang_CT_count', 'PercentLeftTouch_CT_count',
    'TotalNumberOfTouches_CT_count', 'FootFaults_GW_count'
]

# Initialize new columns in the DataFrame
df['NumberOfTimepoints'] = 0
for col in columns_short_name:
    df[col] = 0
df['TimepointEqualsElements'] = False

# Iterate through each unique StudyID
for ss in df["StudyID"].unique():
    temp_df = df[df["StudyID"] == ss]
    idx = temp_df.index
    
    # Calculate the number of time points
    time_points = temp_df["TimePointsBehavior"].values[0].split(", ")
    num_timepoints = len(time_points)
    
    # Update the NumberOfTimepoints column
    df.loc[idx, 'NumberOfTimepoints'] = num_timepoints
    
    # Check the number of elements in each column to process
    all_counts_match = True
    for col, short_col in zip(columns_to_process, columns_short_name):
        value = temp_df[col].values[0]
        
        if isinstance(value, str):
            elements = value.split(", ")
            element_count = len(elements)
        else:
            element_count = 0  # or handle NaN differently if needed
        
        # Update the corresponding count column
        df.loc[idx, short_col] = element_count
        
        # Check if the count matches the number of time points
        if element_count != num_timepoints:
            all_counts_match = False
    
    # Update the TimepointEqualsElements column
    df.loc[idx, 'TimepointEqualsElements'] = all_counts_match

# Save the processed DataFrame to a new CSV file
df.to_csv(final_csv, index=False)