1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- # -*- coding: utf-8 -*-
- """
- Created on Mon Jun 24 08:37:33 2024
- @author: arefk
- """
- import os
- import pandas as pd
- import numpy as np
- # Get the directory where the code file is located
- code_dir = os.path.dirname(os.path.abspath(__file__))
- # Get the parent directory of the code directory
- parent_dir = os.path.dirname(code_dir)
- # Specify the input file path relative to the code file
- input_file_path = os.path.join(parent_dir, 'input', 'Ninox_data_overview_all.csv')
- final_csv = os.path.join(parent_dir, 'input', 'Ninox_data_overview_all_precheck.csv')
- # Read the CSV file into a Pandas DataFrame
- df = pd.read_csv(input_file_path)
- # Column names that need to be processed
- columns_to_process = [
- 'RightHindlimbDrop_RB', 'Speed_RB', 'Distance_RB',
- 'PercentRightPawDragPerTouch_CT', 'PercentLeftTouch_CT',
- 'TotalNumberOfTouches_CT', 'NumberOfFootFaultsDividedByTotalNumber_GW'
- ]
- columns_short_name = [
- 'HLD_RB_count', 'Speed_RB_count', 'Distance_RB_count',
- 'PawDrang_CT_count', 'PercentLeftTouch_CT_count',
- 'TotalNumberOfTouches_CT_count', 'FootFaults_GW_count'
- ]
- # Initialize new columns in the DataFrame
- df['NumberOfTimepoints'] = 0
- for col in columns_short_name:
- df[col] = 0
- df['TimepointEqualsElements'] = False
- # Iterate through each unique StudyID
- for ss in df["StudyID"].unique():
- temp_df = df[df["StudyID"] == ss]
- idx = temp_df.index
-
- # Calculate the number of time points
- time_points = temp_df["TimePointsBehavior"].values[0].split(", ")
- num_timepoints = len(time_points)
-
- # Update the NumberOfTimepoints column
- df.loc[idx, 'NumberOfTimepoints'] = num_timepoints
-
- # Check the number of elements in each column to process
- all_counts_match = True
- for col, short_col in zip(columns_to_process, columns_short_name):
- value = temp_df[col].values[0]
-
- if isinstance(value, str):
- elements = value.split(", ")
- element_count = len(elements)
- else:
- element_count = 0 # or handle NaN differently if needed
-
- # Update the corresponding count column
- df.loc[idx, short_col] = element_count
-
- # Check if the count matches the number of time points
- if element_count != num_timepoints:
- all_counts_match = False
-
- # Update the TimepointEqualsElements column
- df.loc[idx, 'TimepointEqualsElements'] = all_counts_match
- # Save the processed DataFrame to a new CSV file
- df.to_csv(final_csv, index=False)
|