123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- # -*- coding: utf-8 -*-
- """
- Created on Fri Nov 17 11:34:11 2023
- @author: arefks
- """
- import os
- import glob
- import pandas as pd
- from PIL import Image
- import numpy as np
- import concurrent.futures
- # Step 1: Define the starting path and file pattern
- start_path = r"C:\Users\aswen\Desktop\Code\Validation3"
- file_pattern_all_images = os.path.join(start_path, "*", "manual_slice_inspection", "*.png")
- manual_slice_inspection_image_files = glob.glob(file_pattern_all_images, recursive=True)
- file_pattern_all_raters = os.path.join(start_path, "*", "validation*", "*.png")
- validators_image_files = glob.glob(file_pattern_all_raters, recursive=True)
- # Step 2: Create DataFrames
- column_names = ["Path", "dataset_name", "validator_name"]
- manual_slice_inspection_df = pd.DataFrame(
- [
- [file_path, path_elements[-3] if len(path_elements) >= 3 else None, path_elements[-2] if len(path_elements) >= 2 else None]
- for file_path in manual_slice_inspection_image_files
- for path_elements in [file_path.split(os.sep)]
- ],
- columns=column_names,
- )
- validators_df = pd.DataFrame(
- [
- [file_path, path_elements[-3] if len(path_elements) >= 3 else None, path_elements[-2] if len(path_elements) >= 2 else None]
- for file_path in validators_image_files
- for path_elements in [file_path.split(os.sep)]
- ],
- columns=column_names,
- )
- # Get unique dataset names and validators
- unique_datasets = manual_slice_inspection_df["dataset_name"].unique()
- def process_dataset(dataset_name):
- ma_subset = manual_slice_inspection_df[manual_slice_inspection_df["dataset_name"] == dataset_name]
- va_subset = validators_df[validators_df["dataset_name"] == dataset_name]
- results = []
- for index, ma_row in ma_subset.iterrows():
- ma_path = ma_row["Path"]
- ma_image = Image.open(ma_path).convert('RGB')
- ma_image_array = np.array(ma_image)
- result_row = {"Path": ma_path}
- is_same_all = []
- for _, va_row in va_subset.iterrows():
- va_path = va_row["Path"]
- va_image = Image.open(va_path).convert('RGB')
- va_image_array = np.array(va_image)
- diff_image = abs(ma_image_array - va_image_array)
- threshold = 1e-6 # You can adjust this threshold based on your needs
- is_same = np.sum(diff_image) < threshold
- is_same_all.append(is_same)
- result_row[va_row["validator_name"]] = is_same
- result_row["dataset_name"] = dataset_name
- result_row["SequenceType"] = ma_path.split(os.sep)[-1].split("_")[0]
- validator_names = va_subset[is_same_all].validator_name
- for v in validator_names:
- result_row[v] = True
- results.append(result_row)
- #print(C)
-
- print("finished: " + dataset_name)
- return results
- # Get the number of available CPUs
- num_cpus = os.cpu_count()
- # Create a ThreadPoolExecutor
- with concurrent.futures.ThreadPoolExecutor(max_workers=num_cpus) as executor:
- # Process datasets in parallel
- all_results = list(executor.map(process_dataset, unique_datasets))
- # Concatenate the results into the final DataFrame
- result_df = pd.concat([pd.DataFrame(results) for results in all_results], ignore_index=True)
- # Fill NaN values with False
- result_df = result_df.fillna(False)
- # Optionally, you can save the combined DataFrame to a CSV file
- output_path = r"C:\Users\aswen\Desktop\Code\AIDAqc_Figures\input"
- result_df.to_csv(os.path.join(output_path, "combined_Human_Voters_from_votings3_Final.csv"), index=False)
|