-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess.py
More file actions
143 lines (108 loc) · 5.63 KB
/
process.py
File metadata and controls
143 lines (108 loc) · 5.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import ast
from scipy.signal import resample
# This version primarily focussed on creating sets with unprocessed A-scans
# It does this for singular A-scans
noSamples = 300
# Extracting key parameters and assigning variables -----------------
Dataset_definitions_file_path = "processing_dataset_definitions.txt"
def get_value(key, file_path):
with open(file_path, 'r') as file:
for line in file:
if line.startswith(key):
# Splitting by '=' and stripping to remove any whitespace
return line.split("=")[1].strip()
return None
# Convert these values to floats as they represent numerical data
Training_examples_TGT = int(get_value("Training examples Target", Dataset_definitions_file_path))
Training_examples_MIX = int(get_value("Training examples Mix", Dataset_definitions_file_path))
Training_examples_FA = int(get_value("Training examples False Alarm", Dataset_definitions_file_path))
scans_per_example = int(get_value("Scans per training example", Dataset_definitions_file_path))
PCA_factor = float(get_value("PCA_scaling", Dataset_definitions_file_path))
PCA_variance = float(get_value("PCA_variance", Dataset_definitions_file_path))
Batch_name = get_value("Batch name", Dataset_definitions_file_path)
# Calculating key parameters
Training_scans_TGT = scans_per_example * Training_examples_TGT
Training_scans_MIX = scans_per_example * Training_examples_MIX
Training_scans_FA = scans_per_example * Training_examples_FA
total_scans = Training_scans_TGT + Training_scans_MIX + Training_scans_FA
# path = f'Current_code/Intermediate_datasets/NEW_TGT_FA.csv'
path = 'Current_code/Intermediate_datasets/SIMPLES_500S_5_13_V7.csv'
#path = 'Current_code/Intermediate_datasets/NBR_TGT_MIX_FA.csv's
data = pd.read_csv(path , header=None)
input_array = data.values
# Normalize input A-scans
maxVal = 8.08228 # Max amplitude in free space
input_array_normalized = input_array / maxVal
print("input array shape",input_array_normalized.shape)
# Apply time zero correction
corrected_rows = [] # To store the corrected rows
min_length = np.inf # Initialize min_length to infinity
for row in input_array_normalized:
# Step 2: Find the first index where the value passes 0.05
time_zero_index = np.argmax(row > 0.05) # np.argmax returns the first True (value exceeds 0.05)
# Step 3: Remove all values before this index
corrected_row = row[time_zero_index:]
# Append the corrected row to our list
corrected_rows.append(corrected_row)
# Update min_length if the current row is shorter
if len(corrected_row) < min_length:
min_length = len(corrected_row)
# Step 4: Truncate all rows to the length of the shortest row
truncated_rows = np.array([row[:min_length] for row in corrected_rows])
print("truncated array shape",truncated_rows.shape)
# np.savetxt(f'Geom/Repeats/First_Batch/Data_processing/Time_0_A_scans_{Batch_name}.csv', truncated_rows, delimiter=',')
def remove_background_and_reduce(a_scans):
"""
Remove the background by subtracting the 6th row from the first 5 rows in each group of 6.
Removes the 6th row from the dataset after subtraction.
Parameters:
a_scans (numpy.ndarray): The input array of A-scans.
Returns:
numpy.ndarray: The array after background removal and reduction.
"""
# Ensure the array is a numpy array
a_scans = np.array(a_scans)
# Calculate the number of groups of 6
num_groups = a_scans.shape[0] // (scans_per_example+1)
# Initialize an empty list to hold the modified A-scans
modified_a_scans = []
# Process each group
for i in range(num_groups):
start_idx = i * (scans_per_example+1)
end_idx = start_idx + scans_per_example
background = a_scans[start_idx + scans_per_example]
# Subtract the background from the first 5 rows and add them to the modified list
for j in range(start_idx, end_idx):
# modified_a_scan = a_scans[j] - background
modified_a_scan = a_scans[j] - 0
modified_a_scans.append(modified_a_scan)
# Convert the list back to a numpy array
modified_a_scans_array = np.array(modified_a_scans)
return modified_a_scans_array
# Remove the background and reduce the array
a_scans_after_removal = remove_background_and_reduce(truncated_rows)
a_scans_after_removal2= resample(a_scans_after_removal , noSamples, axis=1)
print('resampled array shape:', a_scans_after_removal2.shape)
# Creating the classification values
ones = np.ones(((Training_scans_TGT + Training_scans_MIX), 1))
zeros = np.zeros((Training_scans_FA, 1))
classification_values = np.concatenate((ones, zeros), axis=0)
classification_values = np.concatenate((ones, zeros), axis=0)
print('classification',classification_values .shape)
# Creating the classification values
twos = np.full((Training_scans_TGT, 1),2)
threes= np.full((Training_scans_MIX, 1), 3) # This creates an array of twos
fours = np.full((Training_scans_FA, 1),4)
categorisation_values = np.concatenate((twos,threes,fours), axis=0)
print('categorisation_value shape:', categorisation_values.shape)
# Creating the training dataset
Training_dataset = np.hstack((a_scans_after_removal2, classification_values, categorisation_values))
units = Training_dataset.shape[1]
# Training_dataset = np.hstack((a_scans_after_removal, classification_values))
print("Training_dataset_shape:",Training_dataset.shape)
np.savetxt(f'ML_stuff/Training_Sets/Train_{units}_1Sc_{Batch_name}.csv', Training_dataset, delimiter=',')
print("Training dataset saved to:",f'ML_stuff/Training_Sets/Train_{units}_1Sc_{Batch_name}.csv')