NDCLab · computingcharlie · Oct 27, 2025 · Oct 22, 2025 · Oct 22, 2025 · Oct 22, 2025
diff --git a/code/behavior/behavior_analysis.py b/code/behavior/behavior_analysis.py
@@ -0,0 +1,301 @@
+import sys
+import os
+import glob
+import pandas as pd
+import numpy as np
+import re
+import time
+import datetime
+
+pd.options.mode.chained_assignment = None
+
+def convert_to_list_rt(series):
+    float_list = []
+    for value in series:
+        if isinstance(value, str):
+            if "," in value.strip("[]"):
+                float_list.append([float(v) for v in value.strip("[]").split(",")][0]) # Check if the value is a string
+            else:
+                float_list.append(float(value.strip("[]"))) # Convert string to float and remove brackets
+        elif isinstance(value, list): # Check if the value is a list
+            float_list.extend([float(v) for v in value]) # Convert each element of the list to float
+        else: # Handle NaN values
+            float_list.append(np.nan) # Append NaN if value is NaN
+    return float_list
+
+def convert_to_list_resp(series):
+    resp_list = []
+    for value in series:
+        if isinstance(value, str):
+            converted_row = list(map(int, re.findall(r'\d+', value)))
+            resp_list.append(converted_row)
+        else: # Handle NaN values
+            resp_list.append(np.nan) # Append NaN if value is NaN
+    return resp_list
+
+start = time.time()
+session = sys.argv[1]
+
+#session = "s1_r1"
+input_dataset_path = "/home/data/NDClab/datasets/read-study2-dataset/"
+output_dataset_path = "/home/data/NDClab/analyses/read-study2-alpha/"
+data_path = "sourcedata/raw/"
+sub_path = f"{session}/psychopy/"
+output_path = f"derivatives/behavior/{session}/"
+
+date_time = datetime.datetime.now().strftime("%d_%m_%Y_%H_%M_%S")
+sys.stdout = open(f"{output_dataset_path}{output_path}{date_time}_log.txt","wt")
+
+n_blocks = 20
+n_trials = 40
+valid_rt_thresh = 0.150
+
+#sub_folders = [i for i in os.listdir(input_dataset_path + data_path) if i.startswith("sub-")]
+sub_folders = glob.glob(f"{input_dataset_path}/{data_path}/sub-*/{sub_path}/*")
+subjects = sorted(set([re.findall(r'\d+', item.split("/")[-4])[0] for item in sub_folders]))
+print(subjects)
+processing_log = dict()
+summary_columns = [
+            "n_trials", "invalid_rt_percent", "skipped_percent",
+            "acc", "acc_con", "acc_incon", "rt_con", "rt_incon", "rt_corr", "rt_err",
+            "rt_con_log", "rt_incon_log", "rt_corr_log", "rt_err_log",
+            "pes", "pea", "peri_acc", "peri_rt", "6_or_more_err",
+            ]
+processing_log["sub"] = []
+processing_log["success"] = []
+processing_log["n_trials"] = []
+
+for condition in [0, 1]:
+    if condition == 0:
+        prefix = "_nonsoc"
+    elif condition == 1:
+        prefix = "_soc"
+    for colname in summary_columns:
+        processing_log[colname + prefix] = []
+
+for sub in subjects:
+    processing_log["sub"].append(sub)    
+    subject_folder = (input_dataset_path + data_path + "sub-" + sub + os.sep + sub_path)
+    num_files = len(os.listdir(subject_folder))
+    if ((num_files != 3) and (sub not in ["3000124", "3000008", "3000014"]) and session == "s1_r1") or (np.any(["deviation" in i for i in os.listdir(subject_folder)])):
+        processing_log["success"].append(0)
+        print("sub-{} has a deviation in psychopy data ({} files), skipping ...".format(sub, num_files))
+        [processing_log[i].append(np.nan) for i in list(processing_log.keys())[2:]]
+        pass
+#    elif:
+#       os.path.exists(f"{output_dataset_path}{output_path}sub-{sub}_trial_data.csv"):
+#        print(f"sub-{sub} is already processed! Skipping...")
+    else:
+        print("Processing sub-{}...".format(sub))
+        processing_log["success"].append(1)
+
+        pattern = f"{subject_folder}/sub-{sub}_arrow-alert-v1-*_psychopy_{session}_e1.csv"
+        filename = glob.glob(pattern)
+        data = pd.read_csv(filename[0])
+        start_index = data["task_blockText.started"].first_valid_index()
+        data = data.iloc[start_index:, :].dropna(subset = "middleStim")
+        data = data[data["conditionText"].isin(["Observed", "Alone"])].reset_index(drop=True)
+        assert (len(data) == n_blocks * n_trials), "Check your data length!"
+        processing_log["n_trials"].append(len(data))
+
+        trial_data = data[[
+            "target",
+            "congruent",
+            "accuracy",
+            "task_stim_keyResp.rt",
+            "task_stim_keyResp.stopped",
+            "task_stim_keyResp.keys",
+            "conditionText",
+        ]]
+        trial_data["rt"] = convert_to_list_rt(trial_data["task_stim_keyResp.rt"])
+        trial_data.drop("task_stim_keyResp.rt", axis = 1, inplace = True)
+        assert (np.sum([type(i) != float for i in trial_data["rt"]]) == 0), "Check your RT!"
+
+        trial_data["resp_direction_R"] = convert_to_list_resp(trial_data["task_stim_keyResp.keys"])
+        trial_data.drop("task_stim_keyResp.keys", axis = 1, inplace = True)
+
+        trial_data.columns = [
+            "target",
+            "congruent",
+            "accuracy",
+            "task_stim_keyResp.stopped",
+            "condition",
+            "rt",
+            "resp_direction_R",
+        ]
+        trial_data["condition_soc"] = [1 if i == "Observed" else 0 for i in trial_data["condition"]]
+        trial_data.drop("condition", axis = 1, inplace = True)
+
+        trial_data["target_R"] = [0 if i == "left" else 1 for i in trial_data["target"]]
+        trial_data.drop("target", axis = 1, inplace = True)
+
+        trial_data["fl_direction_R"] = [
+                                        0 if 
+                                        (
+                                            (trial_data.loc[i, 'target_R'] == 0 and trial_data.loc[i, 'congruent'] == 1) or
+                                            (trial_data.loc[i, 'target_R'] == 1 and trial_data.loc[i, 'congruent'] == 0)
+                                        )
+                                        else 1 if 
+                                        (
+                                            (trial_data.loc[i, 'target_R'] == 0 and trial_data.loc[i, 'congruent'] == 0) or
+                                            (trial_data.loc[i, 'target_R'] == 1 and trial_data.loc[i, 'congruent'] == 1)
+                                        )
+                                        else None
+                                        for i in range(len(trial_data))
+                                    ]
+        trial_data["valid_rt"] = [0 if i < valid_rt_thresh else 1 for i in trial_data["rt"]]
+        trial_data["no_resp"] = [1 if np.isnan(i) else 0 for i in trial_data["rt"]]
+
+        trial_data["block_num"] = sum([[i] * n_trials for i in range(1, n_blocks+1)], [])
+        trial_data["trial_num"] = [i for i in range(1, len(trial_data)+1)]
+        trial_data["first_trial"] = [1 if i == 0 else 0 for i in range(len(trial_data))]
+        trial_data["last_trial"] = [1 if i == (len(trial_data)-1) else 0 for i in range(len(trial_data))]
+
+        extra_resp = []
+        resp_direction = []
+        for i in range(len(trial_data)):
+            row = trial_data.loc[i, "resp_direction_R"]
+            if type(row) == list:
+                if row[0] == 1:
+                    resp_direction.append(0)
+                elif row[0] == 8:
+                    resp_direction.append(1)
+                if len(row) > 1:
+                    extra_resp.append(1)
+                else:
+                    extra_resp.append(0)
+            elif np.isnan(row):
+                extra_resp.append(np.nan)
+                resp_direction.append(np.nan)
+
+        trial_data["resp_direction_R"] = resp_direction
+        trial_data["extra_resp"] = extra_resp
+
+        assert (len(trial_data) == n_blocks * n_trials), "Check your data length!"
+
+        current_cols = trial_data.columns
+        for col_name in current_cols:
+            trial_data["pre_" + col_name] = "None"
+            trial_data["next_" + col_name] = "None"
+
+        # Iterate through each row of the dataframe
+        for i in range(len(trial_data)):
+            # Check for previous trial (n-1) if it exists and is in the same block
+            if i > 0 and (trial_data.loc[i, 'task_stim_keyResp.stopped'] - trial_data.loc[i-1, 'task_stim_keyResp.stopped']) <= 3\
+            and trial_data.loc[i, 'valid_rt'] == 1 and trial_data.loc[i, 'no_resp'] == 0:
+                for col_name in current_cols:
+                    trial_data.loc[i, 'pre_' + col_name] = trial_data.loc[i-1, col_name]
+            else:
+                for col_name in current_cols:
+                    trial_data.loc[i, 'pre_' + col_name] = np.nan
+        for i in range(len(trial_data)):
+            # Check for next trial (n+1) if it exists and is in the same block
+            if i < len(trial_data)-1 and (trial_data.loc[i+1, 'task_stim_keyResp.stopped'] - trial_data.loc[i, 'task_stim_keyResp.stopped']) <= 3\
+            and trial_data.loc[i, 'valid_rt'] == 1 and trial_data.loc[i, 'no_resp'] == 0:
+                for col_name in current_cols:
+                    trial_data.loc[i, 'next_' + col_name] = trial_data.loc[i+1, col_name]
+            else:
+                for col_name in current_cols:
+                    trial_data.loc[i, 'next_' + col_name] = np.nan
+
+        # Check if the string "None" exists anywhere in the DataFrame to make sure all cells were properly populated in the above step
+        assert not ((trial_data == "None").any().any()), "Check your data!"
+
+        trial_data.drop(['pre_task_stim_keyResp.stopped', 'next_task_stim_keyResp.stopped'], axis = 1, inplace = True)
+
+        trial_data["sub"] = sub
+        all_cols = list(trial_data.columns)[:-1]
+        all_cols.insert(0, "sub")
+        trial_data = trial_data[all_cols]
+
+        trial_data.to_csv(f"{output_dataset_path}{output_path}sub-{sub}_trial_data.csv", index=False)
+        condition = []
+        for condition in [0, 1]:
+            if condition == 0:
+                prefix = "_nonsoc"
+            elif condition == 1:
+                prefix = "_soc"
+            condition_data = trial_data[trial_data["condition_soc"] == condition]
+            processing_log["n_trials"+prefix].append(len(condition_data))
+            processing_log["skipped_percent"+prefix].append(np.round(condition_data["no_resp"].sum() / len(condition_data) * 100, 3))
+            processing_log["invalid_rt_percent"+prefix].append(np.round((1 - (sum(condition_data["valid_rt"]) / len(condition_data))) * 100, 3))
+            condition_data = condition_data[(condition_data["valid_rt"] == 1)]
+            processing_log["6_or_more_err"+prefix].append(1 if len(condition_data[(condition_data["no_resp"] == 0) & (condition_data["accuracy"] == 0)]) >= 6 else 0)
+            processing_log["acc"+prefix].append(np.round(condition_data.accuracy.mean(), 3))
+            processing_log["acc_con"+prefix].append(np.round(condition_data[condition_data["congruent"] == 1].accuracy.mean(), 3))
+            processing_log["acc_incon"+prefix].append(np.round(condition_data[condition_data["congruent"] == 0].accuracy.mean(), 3))
+            processing_log["rt_con"+prefix].append(np.round(condition_data[(condition_data["congruent"] == 1) & (condition_data["accuracy"] == 1)]["rt"].mean() * 1000, 3))
+            processing_log["rt_con_log"+prefix].append(np.round(np.log(condition_data[(condition_data["congruent"] == 1) & (condition_data["accuracy"] == 1)]["rt"]).mean() * 1000, 3))            
+            processing_log["rt_incon"+prefix].append(np.round(condition_data[(condition_data["congruent"] == 0) & (condition_data["accuracy"] == 1)]["rt"].mean() * 1000, 3))
+            processing_log["rt_incon_log"+prefix].append(np.round(np.log(condition_data[(condition_data["congruent"] == 0) & (condition_data["accuracy"] == 1)]["rt"]).mean() * 1000, 3))
+            processing_log["rt_corr"+prefix].append(np.round(condition_data[(condition_data["congruent"] == 0) & (condition_data["accuracy"] == 1)]["rt"].mean() * 1000, 3))
+            processing_log["rt_corr_log"+prefix].append(np.round(np.log(condition_data[(condition_data["congruent"] == 0) & (condition_data["accuracy"] == 1)]["rt"]).mean() * 1000, 3))
+            processing_log["rt_err"+prefix].append(np.round(condition_data[(condition_data["congruent"] == 0) & (condition_data["accuracy"] == 0)]["rt"].mean() * 1000, 3))
+            processing_log["rt_err_log"+prefix].append(np.round(np.log(condition_data[(condition_data["congruent"] == 0) & (condition_data["accuracy"] == 0)]["rt"]).mean() * 1000, 3))
+            condition_data = condition_data[(condition_data["pre_valid_rt"] == 1) & (condition_data["pre_extra_resp"] == 0) & (condition_data["pre_no_resp"] == 0)]
+            processing_log["pes"+prefix].append(np.round(
+                np.log(
+                    condition_data[(condition_data["accuracy"] == 1) & (condition_data["pre_accuracy"] == 0) &\
+                    (condition_data["pre_congruent"] == 0)]["rt"]
+                ).mean()\
+                - np.log(
+                    condition_data[(condition_data["accuracy"] == 1) & (condition_data["pre_accuracy"] == 1) &\
+                    (condition_data["pre_congruent"] == 0)]["rt"]
+                ).mean(), 5
+            ))
+            processing_log["pea"+prefix].append(np.round(
+                condition_data[(condition_data["pre_accuracy"] == 0) & (condition_data["pre_congruent"] == 0)]["accuracy"].mean()\
+                - condition_data[(condition_data["pre_accuracy"] == 1) & (condition_data["pre_congruent"] == 0)]["accuracy"].mean(), 5
+            ))
+
+            processing_log["peri_acc"+prefix].append(np.round(
+                (
+                    condition_data[(condition_data["pre_accuracy"] == 0) & (condition_data["congruent"] == 0) &\
+                    (condition_data["pre_congruent"] == 0)]["accuracy"].mean()\
+                 - condition_data[(condition_data["pre_accuracy"] == 0) & (condition_data["congruent"] == 1) &\
+                    (condition_data["pre_congruent"] == 0)]["accuracy"].mean()
+                )\
+                - (
+                    condition_data[(condition_data["pre_accuracy"] == 1) & (condition_data["congruent"] == 0) &\
+                    (condition_data["pre_congruent"] == 0)]["accuracy"].mean()\
+                 - condition_data[(condition_data["pre_accuracy"] == 1) & (condition_data["congruent"] == 1) &\
+                    (condition_data["pre_congruent"] == 0)]["accuracy"].mean()
+                ), 5
+            ))
+
+            processing_log["peri_rt"+prefix].append(np.round(
+                (
+                    np.log(
+                    condition_data[(condition_data["pre_accuracy"] == 0) & (condition_data["congruent"] == 0) &\
+                        (condition_data["pre_congruent"] == 0) & (condition_data["accuracy"] == 1)]["rt"]
+                    ).mean()\
+                 - np.log(
+                     condition_data[(condition_data["pre_accuracy"] == 0) & (condition_data["congruent"] == 1) &\
+                     (condition_data["pre_congruent"] == 0) & (condition_data["accuracy"] == 1)]["rt"]
+                 ).mean()
+                )\
+                - (
+                    np.log(
+                    condition_data[(condition_data["pre_accuracy"] == 1) & (condition_data["congruent"] == 0) &\
+                    (condition_data["pre_congruent"] == 0) & (condition_data["accuracy"] == 1)]["rt"]
+                    ).mean()\
+                 - np.log(
+                     condition_data[(condition_data["pre_accuracy"] == 1) & (condition_data["congruent"] == 1) &\
+                     (condition_data["pre_congruent"] == 0) & (condition_data["accuracy"] == 1)]["rt"]
+                 ).mean()
+                  ), 5
+            ))
+
+        print(f"sub-{sub} has been processed")
+
+pd.DataFrame(processing_log).to_csv(f"{output_dataset_path}{output_path}summary_{date_time}.csv", index=False)
+
+list_of_ind_csv = []
+for df in sorted([i for i in os.listdir(f"{output_dataset_path}{output_path}") if "sub-" in i]):
+    list_of_ind_csv.append(pd.read_csv(f"{output_dataset_path}{output_path}{df}"))
+full_df = pd.concat(list_of_ind_csv)
+# full_df = full_df[(full_df["pre_accuracy"] == 1) | (full_df["pre_accuracy"] == 0)]
+full_df.to_csv(f"{output_dataset_path}{output_path}full_df_{date_time}.csv", index = False)
+
+end = time.time()
+print(f"Executed time {np.round(end - start, 2)} s")
diff --git a/code/behavior/behavior_processing_batch.sub b/code/behavior/behavior_processing_batch.sub
@@ -0,0 +1,29 @@
+#!/bin/bash
+#SBATCH --job-name=read-behavior         # create a short name for your job
+#SBATCH --nodes=1                # node count
+#SBATCH --ntasks=1               # total number of tasks across all nodes
+#SBATCH --cpus-per-task=6
+#SBATCH --time=24:00:00          # total run time limit (HH:MM:SS)
+#SBATCH --mem=20G
+#SBATCH --partition=highmem1
+#SBATCH --qos=highmem1
+#SBATCH --account=iacc_gbuzzell
+#SBATCH --output=%x-%j.out
+#SBATCH --mail-type=end          # send email when job ends
+#SBATCH --mail-user=cknowlto@fiu.edu
+
+export PYTHONUNBUFFERED=TRUE
+
+pwd; hostname; date
+echo "flurm cpus per task: $SLURM_CPUS_PER_TASK"
+
+source activate base
+conda activate /home/data/NDClab/analyses/read-study2-alpha/containers/READenvironment
+conda run -n READenvironment python -u behavior_analysis.py $"s1_r1"
+
+errors=$(cat ${SLURM_JOB_NAME}-${SLURM_JOB_ID}.out | grep "Error")
+if [[ -z ${errors} ]]; then
+    echo "Behavior processing complete."
+else
+    echo "Behavior processing exited with errors: ${errors}"
+fi
diff --git a/code/behavior/check_subject_csv.py b/code/behavior/check_subject_csv.py
@@ -0,0 +1,25 @@
+import os
+import re
+import glob
+
+input_dataset_path = "/home/data/NDClab/datasets/read-study2-dataset/"
+data_path = "sourcedata/raw/s1_r1/psychopy/"
+skip_subjects = ["3300118", "3300138"] #subs with _1 after s1_r1_e1 in reading-ranger filenames
+
+sub_folders = [i for i in os.listdir(input_dataset_path + data_path) if i.startswith("sub-")]
+subjects = sorted([re.findall(r'\d+', item)[0] for item in sub_folders])
+for sub in subjects:
+    if sub in skip_subjects:
+        print(f"sub-{sub} skipped (in skip list)")
+        continue
+    subject_folder = (input_dataset_path + data_path + "sub-" + sub + os.sep)
+    num_files = len(os.listdir(subject_folder))
+    if (num_files != 6):
+        print("sub-{} has unresolved deviation in psychopy data ({} files), skipping ...".format(sub, num_files))
+        pass
+    else:
+        print("sub-{} checked".format(sub))    
+        pattern_arrow = "{}sub-{}_arrow-alert-nf-v1-2_psychopy_s1_r1_e1.csv".format(subject_folder, sub)
+        assert len(glob.glob(pattern_arrow)) != 0, f"sub-{sub} arrow-alert .csv has deviation in filename"
+        pattern_reading = "{}sub-{}_reading-ranger-v2-*_psychopy_s1_r1_e1.csv".format(subject_folder, sub)
+        assert len(glob.glob(pattern_reading)) != 0, f"sub-{sub} reading-ranger .csv has deviation in filename"
diff --git a/code/MADE_pipeline.m → code/preprocessEEG/MADE_pipeline.m b/code/MADE_pipeline.m → code/preprocessEEG/MADE_pipeline.m
diff --git a/code/create_mat_s1.m → code/preprocessEEG/create_mat_s1.m b/code/create_mat_s1.m → code/preprocessEEG/create_mat_s1.m
diff --git a/code/edit_event_markers_read.m → code/preprocessEEG/edit_event_markers_read.m b/code/edit_event_markers_read.m → code/preprocessEEG/edit_event_markers_read.m
diff --git a/code/not_processed_checked.py → code/preprocessEEG/not_processed_checked.py b/code/not_processed_checked.py → code/preprocessEEG/not_processed_checked.py