From c29c202ff8ff8d1dc5d4c3dcce0bd89ca171ae46 Mon Sep 17 00:00:00 2001
From: LuigiAltamura <luigi.altamura@mail.polimi.it>
Date: Wed, 15 Nov 2023 21:44:53 +0100
Subject: [PATCH 1/6] Added MEATRO-like mapping as a result

---
 src/GAMMA/gamma.py | 12 +++++++++---
 src/GAMMA/train.py |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/GAMMA/gamma.py b/src/GAMMA/gamma.py
index e9182d0..566a3df 100755
--- a/src/GAMMA/gamma.py
+++ b/src/GAMMA/gamma.py
@@ -842,10 +842,16 @@ def get_CONVtypeShape(self, dimensions, CONVtype=1):
             print("Not supported layer.")
         return dimensions
 
-    def write_maestro(self, indv, layer_id=0, m_file=None):
+    def write_maestro(self, indv, layer_id=0, m_file = None, folder_path = None):
         dimensions = [self.dimension]
-        with open("{}.m".format(m_file), "w") as fo:
-            fo.write("Network {} {{\n".format(layer_id))
+        if layer_id != 0:
+            m_file_with_layer = "{}_{}".format(m_file, layer_id)
+        else:
+            m_file_with_layer = m_file
+
+        file_path = os.path.join(folder_path or ".", "{}.m".format(m_file_with_layer))
+        with open(file_path, "w") as fo:
+            fo.write("Network {} {{\n".format(m_file))
             for i in range(len(dimensions)):
                 dimension = dimensions[i]
                 m_type = m_type_dicts[int(dimension[-1])]
diff --git a/src/GAMMA/train.py b/src/GAMMA/train.py
index 284496a..83cb70d 100644
--- a/src/GAMMA/train.py
+++ b/src/GAMMA/train.py
@@ -91,6 +91,7 @@ def train_model(model_defs, input_arg, map_cstr=None, chkpt_file='./chkpt'):
         np_array = np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["best_sol"]}']).reshape(1, -1)
         df = pd.DataFrame(np_array, columns=columns)
         df.to_csv(chkpt_file[:-4]+".csv")
+        env.write_maestro(best_sol, m_file = opt.model, layer_id = opt.singlelayer, folder_path=os.path.dirname(chkpt_file))
         with open(chkpt_file, "wb") as fd:
             pickle.dump(chkpt, fd)
 

From ddd2d2a4f634fb9f43671da0b8de66b84c3e4413 Mon Sep 17 00:00:00 2001
From: LuigiAltamura <luigi.altamura@mail.polimi.it>
Date: Wed, 22 Nov 2023 17:57:45 +0100
Subject: [PATCH 2/6] Added Scritp

---
 run_digamma.sh                        | 12 ++++++++++++
 run_gamma_map_cstr.sh                 | 10 ++++++++++
 script/avg_exec_time.sh               | 24 ++++++++++++++++++++++++
 script/avg_exec_time_digamma.sh       | 24 ++++++++++++++++++++++++
 script/avg_exec_time_map_cstr.sh      | 24 ++++++++++++++++++++++++
 script/avg_exec_time_with_hwconfig.sh | 24 ++++++++++++++++++++++++
 script/delete_file.sh                 | 12 ++++++++++++
 script/run_all.sh                     |  3 +++
 script/run_digamma.sh                 | 12 ++++++++++++
 script/run_gamma_map_cstr.sh          | 10 ++++++++++
 10 files changed, 155 insertions(+)
 create mode 100755 run_digamma.sh
 create mode 100755 run_gamma_map_cstr.sh
 create mode 100755 script/avg_exec_time.sh
 create mode 100755 script/avg_exec_time_digamma.sh
 create mode 100755 script/avg_exec_time_map_cstr.sh
 create mode 100755 script/avg_exec_time_with_hwconfig.sh
 create mode 100755 script/delete_file.sh
 create mode 100755 script/run_all.sh
 create mode 100755 script/run_digamma.sh
 create mode 100755 script/run_gamma_map_cstr.sh

diff --git a/run_digamma.sh b/run_digamma.sh
new file mode 100755
index 0000000..d2c5438
--- /dev/null
+++ b/run_digamma.sh
@@ -0,0 +1,12 @@
+cd ./src/GAMMA
+
+
+python main.py --num_pe -1 --area_budget 0.2 --pe_limit 200 --model vgg16 --outdir outdir_digamma
+
+cd ../../
+
+
+
+
+
+
diff --git a/run_gamma_map_cstr.sh b/run_gamma_map_cstr.sh
new file mode 100755
index 0000000..f0278ad
--- /dev/null
+++ b/run_gamma_map_cstr.sh
@@ -0,0 +1,10 @@
+cd ./src/GAMMA
+python main.py --mapping_cstr dla_map --fitness1 latency --fitness2 power --num_pe 168 --l1_size 512 --l2_size 108000 --NocBW 81920000 --epochs 10 \
+              --model vgg16 --outdir outdir_map
+cd ../../
+
+
+
+
+
+
diff --git a/script/avg_exec_time.sh b/script/avg_exec_time.sh
new file mode 100755
index 0000000..fb02bf5
--- /dev/null
+++ b/script/avg_exec_time.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+cd ..
+# Number of executions
+num_executions=10
+total_execution_time_ns=0
+
+for ((i=1; i<=$num_executions; i++)); do
+
+    start_time=$(perl -MTime::HiRes -e 'printf("%.0f\n",Time::HiRes::time()*1000)')
+    
+    ./run_gamma.sh
+
+    end_time=$(perl -MTime::HiRes -e 'printf("%.0f\n",Time::HiRes::time()*1000)')
+   
+    elapsed_time_ms=$((end_time - start_time))
+    
+    total_execution_time_ms=$((total_execution_time_ms + elapsed_time_ms))
+done
+
+
+average_execution_time_ms=$((total_execution_time_ms / num_executions))
+
+# Display the total execution time in nanoseconds
+echo "Gamma -> Avg Execution Time: $average_execution_time_ms ms"  >> "output.txt"
diff --git a/script/avg_exec_time_digamma.sh b/script/avg_exec_time_digamma.sh
new file mode 100755
index 0000000..631d391
--- /dev/null
+++ b/script/avg_exec_time_digamma.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+cd ..
+# Number of executions
+num_executions=10
+total_execution_time_ns=0
+
+for ((i=1; i<=$num_executions; i++)); do
+
+    start_time=$(perl -MTime::HiRes -e 'printf("%.0f\n",Time::HiRes::time()*1000)')
+    
+    ./run_digamma.sh
+
+    end_time=$(perl -MTime::HiRes -e 'printf("%.0f\n",Time::HiRes::time()*1000)')
+   
+    elapsed_time_ms=$((end_time - start_time))
+    
+    total_execution_time_ms=$((total_execution_time_ms + elapsed_time_ms))
+done
+
+
+average_execution_time_ms=$((total_execution_time_ms / num_executions))
+
+# Display the total execution time in nanoseconds
+echo "DiGamma -> Avg Execution Time: $average_execution_time_ms ms" >> "output.txt"
diff --git a/script/avg_exec_time_map_cstr.sh b/script/avg_exec_time_map_cstr.sh
new file mode 100755
index 0000000..88f03f5
--- /dev/null
+++ b/script/avg_exec_time_map_cstr.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+cd ..
+# Number of executions
+num_executions=10
+total_execution_time_ns=0
+
+for ((i=1; i<=$num_executions; i++)); do
+
+    start_time=$(perl -MTime::HiRes -e 'printf("%.0f\n",Time::HiRes::time()*1000)')
+    
+    ./run_gamma_map_cstr.sh
+
+    end_time=$(perl -MTime::HiRes -e 'printf("%.0f\n",Time::HiRes::time()*1000)')
+   
+    elapsed_time_ms=$((end_time - start_time))
+    
+    total_execution_time_ms=$((total_execution_time_ms + elapsed_time_ms))
+done
+
+
+average_execution_time_ms=$((total_execution_time_ms / num_executions))
+
+# Display the total execution time in nanoseconds
+echo "Map Cstr -> Avg Execution Time: $average_execution_time_ms ms" >> "output.txt"
diff --git a/script/avg_exec_time_with_hwconfig.sh b/script/avg_exec_time_with_hwconfig.sh
new file mode 100755
index 0000000..9f4b8c8
--- /dev/null
+++ b/script/avg_exec_time_with_hwconfig.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+cd ..
+# Number of executions
+num_executions=10
+total_execution_time_ns=0
+
+for ((i=1; i<=$num_executions; i++)); do
+
+    start_time=$(perl -MTime::HiRes -e 'printf("%.0f\n",Time::HiRes::time()*1000)')
+    
+     ./run_gamma_with_hwconfig.sh
+
+    end_time=$(perl -MTime::HiRes -e 'printf("%.0f\n",Time::HiRes::time()*1000)')
+   
+    elapsed_time_ms=$((end_time - start_time))
+    
+    total_execution_time_ms=$((total_execution_time_ms + elapsed_time_ms))
+done
+
+
+average_execution_time_ms=$((total_execution_time_ms / num_executions))
+
+# Display the total execution time in nanoseconds
+echo "Av Execution Time: $average_execution_time_ms ms"
diff --git a/script/delete_file.sh b/script/delete_file.sh
new file mode 100755
index 0000000..5014a05
--- /dev/null
+++ b/script/delete_file.sh
@@ -0,0 +1,12 @@
+folder_path="$HOME/Desktop/Tesi/gamma/src/GAMMA"
+
+# Navigate to the folder
+cd "$folder_path" || exit
+
+# Delete .csv files
+find . -type f -name "*.csv" -delete
+
+# Delete .m files
+find . -type f -name "*.m" -delete
+
+echo "Deletion complete for .csv and .m files in $folder_path"
\ No newline at end of file
diff --git a/script/run_all.sh b/script/run_all.sh
new file mode 100755
index 0000000..176256f
--- /dev/null
+++ b/script/run_all.sh
@@ -0,0 +1,3 @@
+./avg_exec_time.sh 
+./avg_exec_time_map_cstr.sh
+./avg_exec_time_digamma.sh 
\ No newline at end of file
diff --git a/script/run_digamma.sh b/script/run_digamma.sh
new file mode 100755
index 0000000..d2c5438
--- /dev/null
+++ b/script/run_digamma.sh
@@ -0,0 +1,12 @@
+cd ./src/GAMMA
+
+
+python main.py --num_pe -1 --area_budget 0.2 --pe_limit 200 --model vgg16 --outdir outdir_digamma
+
+cd ../../
+
+
+
+
+
+
diff --git a/script/run_gamma_map_cstr.sh b/script/run_gamma_map_cstr.sh
new file mode 100755
index 0000000..f0278ad
--- /dev/null
+++ b/script/run_gamma_map_cstr.sh
@@ -0,0 +1,10 @@
+cd ./src/GAMMA
+python main.py --mapping_cstr dla_map --fitness1 latency --fitness2 power --num_pe 168 --l1_size 512 --l2_size 108000 --NocBW 81920000 --epochs 10 \
+              --model vgg16 --outdir outdir_map
+cd ../../
+
+
+
+
+
+

From 5fadd616c84834fc59b094246e03835b5594224e Mon Sep 17 00:00:00 2001
From: LuigiAltamura <luigi.altamura@mail.polimi.it>
Date: Tue, 16 Jan 2024 16:45:42 +0100
Subject: [PATCH 3/6] Add multilayer support

Now GAMMA supports multilayer analysis. Solved bug that overwrite the .csv result. Now there is a .csv for each layer
---
 requirements.txt   |  4 ----
 run_gamma.sh       |  2 +-
 src/GAMMA/main.py  |  2 +-
 src/GAMMA/train.py | 29 ++++++++++++++++++++++-------
 4 files changed, 24 insertions(+), 13 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 73d4c0a..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-numpy==1.18.5
-matplotlib==3.2.1
-pandas==1.0.4
-nevergrad==0.4.2
\ No newline at end of file
diff --git a/run_gamma.sh b/run_gamma.sh
index 23dc76f..fbc9e48 100755
--- a/run_gamma.sh
+++ b/run_gamma.sh
@@ -1,6 +1,6 @@
 cd ./src/GAMMA
 python main.py --fitness1 latency --fitness2 power --num_pe 168 --l1_size 512 --l2_size 108000 --NocBW 81920000 --epochs 10 \
-              --model vgg16 --singlelayer 1
+              --model vgg16 --num_layer 13 #--singlelayer 1 
 cd ../../
 
 
diff --git a/src/GAMMA/main.py b/src/GAMMA/main.py
index 9600ede..bd7f0ea 100644
--- a/src/GAMMA/main.py
+++ b/src/GAMMA/main.py
@@ -15,7 +15,7 @@
     parser.add_argument('--offchipBW', type=int, default=-1, help='Off-chip BW')
     parser.add_argument('--hwconfig', type=str, default=None, help='HW configuration file')
     parser.add_argument('--model', type=str, default="resnet18", help='Model to run')
-    parser.add_argument('--num_layer', type=int, default=2, help='Number of layers to optimize')
+    parser.add_argument('--num_layer', type=int, default=0, help='Number of layers to optimize')
     parser.add_argument('--singlelayer', type=int, default=0, help='The layer index to optimize')
     parser.add_argument('--slevel_min', type=int, default=2, help='Minimum number of parallelization level')
     parser.add_argument('--slevel_max', type=int, default=2, help='Maximum number of parallelization level')
diff --git a/src/GAMMA/train.py b/src/GAMMA/train.py
index 83cb70d..d89bf19 100644
--- a/src/GAMMA/train.py
+++ b/src/GAMMA/train.py
@@ -64,6 +64,8 @@ def train_model(model_defs, input_arg, map_cstr=None, chkpt_file='./chkpt'):
                       l2_size=opt.l2_size, NocBW=opt.NocBW, offchipBW=opt.offchipBW, slevel_min=opt.slevel_min, slevel_max=opt.slevel_max,
                       fixedCluster=opt.fixedCluster, log_level=opt.log_level, map_cstr=map_cstr)
     constraints = {"area":opt.area_budget* 1e6}
+    chkpt_list = []
+    num_layer = 1
     for dimension in model_defs:
         env.reset_dimension(fitness=fitness, constraints=constraints, dimension=dimension)
         env.reset_hw_parm(num_pe=opt.num_pe, l1_size=opt.l1_size, l2_size=opt.l2_size, pe_limit=opt.pe_limit,area_pebuf_only=False, external_area_model=True)
@@ -87,13 +89,26 @@ def train_model(model_defs, input_arg, map_cstr=None, chkpt_file='./chkpt'):
             "L1_size": best_l1_size,
             "L2_size": best_l2_size
         }
-        columns = ["runtime", "area", "pe_area_ratio", "PE", "L1_size", "L2_size", "PE_area", "L1_area", "L2_area","best_sol"]
-        np_array = np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["best_sol"]}']).reshape(1, -1)
-        df = pd.DataFrame(np_array, columns=columns)
-        df.to_csv(chkpt_file[:-4]+".csv")
-        env.write_maestro(best_sol, m_file = opt.model, layer_id = opt.singlelayer, folder_path=os.path.dirname(chkpt_file))
-        with open(chkpt_file, "wb") as fd:
-            pickle.dump(chkpt, fd)
+        chkpt_list.append(chkpt)
+        if opt.num_layer != 0:
+            env.write_maestro(best_sol, m_file = opt.model, layer_id = num_layer, folder_path=os.path.dirname(chkpt_file))
+        else:
+            env.write_maestro(best_sol, m_file = opt.model, layer_id = opt.singlelayer, folder_path=os.path.dirname(chkpt_file))
+
+        num_layer += 1
+
+    columns = ["runtime", "area", "pe_area_ratio", "PE", "L1_size", "L2_size", "PE_area", "L1_area", "L2_area","best_sol"]
+    np_array = None
+    for chkpt in chkpt_list:
+        if np_array is None:
+            np_array = np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["best_sol"]}']).reshape(1, -1)
+        else:
+            np_array = np.vstack([np_array, np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["best_sol"]}']).reshape(1, -1)])
+    df = pd.DataFrame(np_array, columns=columns)
+    df.to_csv(chkpt_file[:-4]+".csv")
+
+    with open(chkpt_file, "wb") as fd:
+        pickle.dump(chkpt_list, fd)
 
 def get_cstr_name(mapping_cstr):
     if mapping_cstr:

From 9d9d1ee46a47b32c10ba2ff7299c459016f82402 Mon Sep 17 00:00:00 2001
From: LuigiAltamura <luigi.altamura@mail.polimi.it>
Date: Mon, 22 Apr 2024 10:49:31 +0200
Subject: [PATCH 4/6] Add extract_dataflow.py

---
 extract_dataflow.py | 52 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 extract_dataflow.py

diff --git a/extract_dataflow.py b/extract_dataflow.py
new file mode 100644
index 0000000..399ef2a
--- /dev/null
+++ b/extract_dataflow.py
@@ -0,0 +1,52 @@
+import os
+import argparse
+
+def extract_dataflow(file_name, output_folder):
+    # Read the content of the original file
+    with open(file_name, 'r') as f:
+        content = f.read()
+
+    # Find the starting and ending indices of the Dataflow section
+    start_index = content.find("Dataflow {")
+    end_index = content.find("}", start_index) + 1
+
+    # Extract the Dataflow section
+    dataflow_section = content[start_index:end_index]
+
+    # Add a new line after the closing parenthesis
+    dataflow_section += '\n'
+
+    # Add a tab of shift to each line inside the Dataflow section, excluding the first and last lines
+    lines = dataflow_section.split('\n')
+    indented_lines = '\n'.join(lines[0:1] + ['\t' + line for line in lines[1:-2]] + lines[-2:])
+
+    # Create the output folder if it doesn't exist
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+
+    # Construct the output file path without the "_dataflow" suffix
+    output_file_name = os.path.splitext(os.path.basename(file_name))[0].replace('_dataflow', '')
+    output_file_path = os.path.join(output_folder, output_file_name + '.m')
+
+    # Write the modified Dataflow content to the new file
+    with open(output_file_path, 'w') as f:
+        # Write the modified Dataflow section
+        f.write(indented_lines)
+
+if __name__ == "__main__":
+    # Create an argument parser
+    parser = argparse.ArgumentParser(description='Extract Dataflow from a file and save to a new file.')
+
+    # Add the file and output folder arguments
+    parser.add_argument('--file', type=str, help='Path of the input file')
+    parser.add_argument('--out', type=str, help='Path of the output folder')
+
+    # Parse the command-line arguments
+    args = parser.parse_args()
+
+    # Check if both --file and --out arguments are provided
+    if args.file and args.out:
+        # Extract the Dataflow part and save it to the output folder
+        extract_dataflow(args.file, args.out)
+    else:
+        print("Both --file and --out arguments are required.")

From 0277b628bfc32a714fe24bfedae12b62e6a68723 Mon Sep 17 00:00:00 2001
From: LuigiAltamura <luigi.altamura@mail.polimi.it>
Date: Wed, 8 May 2024 16:10:57 +0200
Subject: [PATCH 5/6] Add mixed precision to GAMMA

---
 build.py                                      |   2 +-
 .../costmodel_cstr/maestro_cstr.py            |   3 +
 data/model/vgg16.csv                          |  28 ++--
 src/GAMMA/gamma.py                            | 101 +++++++++---
 src/GAMMA/main.py                             |  36 ++---
 src/GAMMA/train.py                            | 151 ++++++++++++++----
 6 files changed, 230 insertions(+), 91 deletions(-)

diff --git a/build.py b/build.py
index bb69cd7..efe6146 100644
--- a/build.py
+++ b/build.py
@@ -1,7 +1,7 @@
 import os, sys
 commit_id = 'e1d8efd8e5469cf865a9db60007a70e3f0cb8778'
 dst_path = "cost_model/maestro"
-maestro_dir = "../maestro"
+maestro_dir = "../qmaestro"
 working_path = os.getcwd()
 dst_path = os.path.join(working_path, dst_path)
 maestro = os.path.join(maestro_dir, "maestro")
diff --git a/data/mapping_cstr/advanced_cstr/costmodel_cstr/maestro_cstr.py b/data/mapping_cstr/advanced_cstr/costmodel_cstr/maestro_cstr.py
index 14a0e21..ac3c67f 100644
--- a/data/mapping_cstr/advanced_cstr/costmodel_cstr/maestro_cstr.py
+++ b/data/mapping_cstr/advanced_cstr/costmodel_cstr/maestro_cstr.py
@@ -1,5 +1,8 @@
 import numpy as np
 mapping_cstr = {}
+mapping_cstr["L3"] = {"R":"R",
+                      "S":"S",
+                      }
 mapping_cstr["L2"] = {"R":"R",
                       "S":"S",
                       }
diff --git a/data/model/vgg16.csv b/data/model/vgg16.csv
index bd43515..c935806 100755
--- a/data/model/vgg16.csv
+++ b/data/model/vgg16.csv
@@ -1,14 +1,14 @@
-K,C,Y,X,R,S,T
-64,3,224,224,3,3,1
-64,64,224,224,3,3,1
-128,64,112,112,3,3,1
-128,128,112,112,3,3,1
-256,128,56,56,3,3,1
-256,256,56,56,3,3,1
-256,256,56,56,3,3,1
-512,256,28,28,3,3,1
-512,512,28,28,3,3,1
-512,512,28,28,3,3,1
-512,512,14,14,3,3,1
-512,512,14,14,3,3,1
-512,512,14,14,3,3,1
+K,C,Y,X,R,S,T,Precision
+64,3,224,224,3,3,1,FP16
+64,64,224,224,3,3,1,FP16
+128,64,112,112,3,3,1,FP16
+128,128,112,112,3,3,1,FP16
+256,128,56,56,3,3,1,FP16
+256,256,56,56,3,3,1,FP16
+256,256,56,56,3,3,1,FP16
+512,256,28,28,3,3,1,FP16
+512,512,28,28,3,3,1,FP16
+512,512,28,28,3,3,1,FP16
+512,512,14,14,3,3,1,FP16
+512,512,14,14,3,3,1,FP16
+512,512,14,14,3,3,1,FP16
diff --git a/src/GAMMA/gamma.py b/src/GAMMA/gamma.py
index 566a3df..42d169b 100755
--- a/src/GAMMA/gamma.py
+++ b/src/GAMMA/gamma.py
@@ -1,3 +1,5 @@
+import functools
+
 import numpy as np
 import copy, random
 import os
@@ -60,7 +62,7 @@ def __init__(self,dimension, map_cstr=None, num_pe=64, pe_limit=1024, fitness="l
         self.area_pebuf_only=False
         self.external_area_model = False
 
-    def reset_hw_parm(self, l1_size=None, l2_size=None, num_pe=None, NocBW=None, map_cstr=None, pe_limit=None,area_pebuf_only=None, external_area_model=None, offchipBW=None):
+    def reset_hw_parm(self, l1_size=None, l2_size=None, num_pe=None, NocBW=None, map_cstr=None, pe_limit=None,area_pebuf_only=None, external_area_model=None, offchipBW=None, slevel_max=None, slevel_min=None):
         if l1_size:
             self.l1_size=l1_size if l1_size > 0 else 2**30
         if l2_size:
@@ -79,6 +81,12 @@ def reset_hw_parm(self, l1_size=None, l2_size=None, num_pe=None, NocBW=None, map
             self.area_pebuf_only = area_pebuf_only
         if external_area_model:
             self.external_area_model = external_area_model
+        if map_cstr:
+            self.map_cstr = map_cstr
+        if slevel_min:
+            self.slevel_min = slevel_min
+        if slevel_max:
+            self.slevel_max = slevel_max
 
     def get_dimension_factors(self, dimension_dict):
         dimension_factors = dict()
@@ -548,7 +556,7 @@ def create_unit_base_pops(self, population, num_all_unit=None):
                 for i in range(1, 7):
                     population[idx][i + level * 7][1] = 1
 
-    def reinit_pop(self,pool, num_population,  stage_idx, best_sol_1st, init_pop, cur_gen=-1, bias= None, num_all_unit=2):
+    def reinit_pop(self,pool, num_population,  stage_idx, best_sol_1st, init_pop, cur_gen=-1, bias= None, num_all_unit=2, precision=None):
         population = [self.create_genome_fixedSL(bias=bias) for _ in range(num_population)]
         #====always create a base unit pop=======
         self.create_unit_base_pops(population, num_all_unit=num_all_unit)
@@ -565,7 +573,8 @@ def reinit_pop(self,pool, num_population,  stage_idx, best_sol_1st, init_pop, cu
         self.num_parents = num_population
         self.comform_to_cstr(population)
         self.fitness = np.ones((max(num_population, len(population)), len(self.fitness_objective)), float)
-        self.evaluate(pool=pool, population=population,cur_gen=cur_gen)
+
+        self.evaluate(pool=pool, population=population,cur_gen=cur_gen, precision=precision)
         return population
 
 
@@ -640,14 +649,15 @@ def adjust_fitness(self, fitness):
         gen_best_idx = np.argmax(fitness[:,0])
         return fitness, gen_best_idx
 
-    def evaluate(self, pool, population, cur_gen=-1):
+    def evaluate(self, pool, population, cur_gen=-1, precision=None):
         gen_best = -float("Inf")
         gen_best_activity = None
         gen_best_idx = 0
         count_non_valid = 0
         # populations = pool.map(self.thread_fun_correctify_tile_dependency, population)
         # population[:] = populations
-        reward_activ_list = pool.map(self.thread_fun, population)
+        partial_thread_fun = functools.partial(self.thread_fun, precision=precision)
+        reward_activ_list = pool.map(partial_thread_fun, population)
 
         for i in range(len(population)):
             reward, activity_count = reward_activ_list[i]
@@ -721,17 +731,17 @@ def injection(self, inject_ratio=1.0):
         return pop_inj, inj_fitness
 
     def run(self, dimension, stage_idx=0, prev_stage_value=0, num_population=100, num_generations=100, elite_ratio=0.05,
-                       parents_ratio=0.4, ratio_decay=1, num_finetune=1, best_sol_1st=None, init_pop=None, bias=None, uni_base=True, use_factor=False, use_pleteau=False, L1_bias_template=None):
+                       parents_ratio=0.4, ratio_decay=1, num_finetune=1, best_sol_1st=None, init_pop=None, bias=None, uni_base=True, use_factor=False, use_pleteau=False, L1_bias_template=None, precision=None):
         self.init_arguement(dimension=dimension, stage_idx=stage_idx, prev_stage_value=prev_stage_value, num_population=num_population, num_generations=num_generations, elite_ratio=elite_ratio,
                        parents_ratio=parents_ratio, ratio_decay=ratio_decay, num_finetune=num_finetune, best_sol_1st=best_sol_1st, init_pop=init_pop,uni_base=uni_base, use_factor=use_factor, use_pleteau=use_pleteau,L1_bias_template=L1_bias_template)
         pool = Pool(min(self.num_population + self.num_elite, cpu_count()))
-        population = self.reinit_pop(pool,self.num_population,  self.stage_idx, self.best_sol_1st, self.init_pop, bias=bias)
+        population = self.reinit_pop(pool,self.num_population,  self.stage_idx, self.best_sol_1st, self.init_pop, bias=bias, precision=precision)
         if self.map_cstr:
             self.cstr_list, self.num_free_order, self.num_free_par = self.map_cstr.get_cstr_list(copy.deepcopy(population[0]), fixed_sp_sz=self.fixedCluster)
         for g in range(num_generations):
 
             while self.num_parents < 1:  # restart
-                population = self.reinit_pop(pool, self.num_population, self.stage_idx, self.best_sol_1st, self.init_pop, cur_gen=g)
+                population = self.reinit_pop(pool, self.num_population, self.stage_idx, self.best_sol_1st, self.init_pop, cur_gen=g, precision=precision)
                 print("Reinitialize population")
 
             population, self.fitness, self.parents = self.select_parents(population, self.fitness, self.num_parents, self.num_population,)
@@ -769,12 +779,12 @@ def run(self, dimension, stage_idx=0, prev_stage_value=0, num_population=100, nu
             # population = elite + population + pop_inj
             self.fitness = np.concatenate((self.elite_fitness, self.fitness))
             # self.fitness = np.concatenate((self.elite_fitness, self.fitness, inj_fitness))
-            chkpt = self.evaluate(pool=pool, population=population, cur_gen=g)
+            chkpt = self.evaluate(pool=pool, population=population, cur_gen=g, precision=precision)
             # self.check_tile_dependency(population)
 
             if self.log_level>1:
                 if chkpt["best_sol"] is not None and self.log_level>1:
-                    best_runtime, best_throughput, best_energy, best_area, best_l1_size, best_l2_size, best_mac, best_power, best_num_pe = self.get_indiv_info( chkpt["best_sol"])
+                    best_runtime, best_throughput, best_energy, best_area, best_l1_size, best_l2_size, best_mac, best_power, best_num_pe = self.get_indiv_info( chkpt["best_sol"], precision=precision)
                     # best_num_pe = chkpt["best_sol"][0][1] if self.num_pe<1 else self.num_pe
                     # print(f"Runtime: {best_runtime}, L1: {best_l1_size}, L2: {best_l2_size}, L1_usage:{best_l1_size/self.l1_size:}, L2_usage:{best_l2_size/self.l2_size:.4f}, PE: {best_num_pe}")
                     print(f"Gen {g+1}: Reward: {chkpt['best_reward'][0]:.3e}, Runtime: {best_runtime}, Area: {best_area/1e6:.3f}mm2,  PE Area_ratio: {best_num_pe*MAC_AREA_INT8/best_area*100:.1f}%, L1: {best_l1_size}, L2: {best_l2_size},  PE: {best_num_pe}")
@@ -820,12 +830,12 @@ def sort_population(self, population):
     def thread_fun_correctify_tile_dependency(self, indv):
         return self.correctify_tile_dependency_thread(indv)
 
-    def thread_fun(self, individual):
-        reward, activity_count = self.oberserve_maestro(individual)
+    def thread_fun(self, individual, precision=None):
+        reward, activity_count = self.oberserve_maestro(individual, precision=precision)
         return [reward, activity_count]
 
-    def get_indiv_info(self, individual, num_pe=None, l1_size=None, l2_size=None, NocBW=None):
-        self.oberserve_maestro(individual,num_pe=num_pe, l1_size=l1_size, l2_size=l2_size, NocBW=NocBW)
+    def get_indiv_info(self, individual, num_pe=None, l1_size=None, l2_size=None, NocBW=None, precision=None):
+        self.oberserve_maestro(individual,num_pe=num_pe, l1_size=l1_size, l2_size=l2_size, NocBW=NocBW, precision=precision)
         return self.observation
 
     def get_CONVtypeShape(self, dimensions, CONVtype=1):
@@ -842,7 +852,7 @@ def get_CONVtypeShape(self, dimensions, CONVtype=1):
             print("Not supported layer.")
         return dimensions
 
-    def write_maestro(self, indv, layer_id=0, m_file = None, folder_path = None):
+    def write_maestro(self, indv, layer_id=0, m_file = None, folder_path = None, precision=None):
         dimensions = [self.dimension]
         if layer_id != 0:
             m_file_with_layer = "{}_{}".format(m_file, layer_id)
@@ -861,6 +871,8 @@ def write_maestro(self, indv, layer_id=0, m_file = None, folder_path = None):
                 fo.write(
                     "Dimensions {{ K: {:.0f}, C: {:.0f}, Y: {:.0f}, X: {:.0f}, R: {:.0f}, S: {:.0f} }}\n".format(
                         *dimension))
+                if precision is not None:
+                    fo.write("Precision: {{ {} }}\n".format(precision))
                 fo.write("Dataflow {\n")
                 for k in range(0, len(indv), 7):
                     for i in range(k, k + 7):
@@ -888,16 +900,16 @@ def write_maestro(self, indv, layer_id=0, m_file = None, folder_path = None):
                 fo.write("}\n")
             fo.write("}")
 
-    def oberserve_maestro(self, indv, num_pe=None, l1_size=None, l2_size=None, NocBW=None, offchipBW=None):
+    def oberserve_maestro(self, indv, num_pe=None, l1_size=None, l2_size=None, NocBW=None, offchipBW=None, precision=None):
 
         m_file = "{}".format(random.randint(0, 2**32))
-        self.write_maestro(indv,m_file=m_file)
+        self.write_maestro(indv, m_file=m_file, precision=precision)
         if num_pe:
-            to_use_num_pe = num_pe
+            to_use_num_pe = self.num_pe_to_use(num_pe, precision)
         elif self.num_pe <1:
-            to_use_num_pe = indv[0][1]
+            to_use_num_pe = self.num_pe_to_use(indv[0][1], precision)
         else:
-            to_use_num_pe = self.num_pe
+            to_use_num_pe = self.num_pe_to_use(self.num_pe, precision)
         # print(num_pe, bw, l1_size)
         os.remove("./{}.csv".format(m_file)) if os.path.exists("./{}.csv".format(m_file)) else None
         command = [self._executable,
@@ -908,6 +920,8 @@ def oberserve_maestro(self, indv, num_pe=None, l1_size=None, l2_size=None, NocBW
                    "--noc_mc_support=true", "--num_pes={}".format(int(to_use_num_pe)),
                    "--num_simd_lanes=1", "--l1_size_cstr={}".format(self.l1_size if not l1_size else l1_size),
                    "--l2_size_cstr={}".format(self.l2_size if not l2_size else l2_size), "--print_res=false", "--print_res_csv_file=true", "--print_log_file=false", "--print_design_space=false", "--msg_print_lv=0"]
+# "--num_simd_lanes=1", "--l1_size_cstr={}".format(int(self.l1_to_use(self.l1_size, precision)) if not l1_size else
+        # int(self.l1_to_use(l1_size, precision))),
 
         process = Popen(command, stdout=PIPE, stderr=PIPE)
         stdout, stderr = process.communicate()
@@ -960,7 +974,8 @@ def oberserve_maestro(self, indv, num_pe=None, l1_size=None, l2_size=None, NocBW
             elif self.area_pebuf_only:
                 area = self.compute_area_maestro(to_use_num_pe, l1_size, l2_size)
 
-            self.observation = [np.mean(x) for x in [runtime, throughput, energy, area, l1_size, l2_size, mac, power, to_use_num_pe]]
+            self.observation = [np.mean(x) for x in [runtime, throughput, energy, area, l1_size, l2_size, mac, power,
+                                                     self.restore_num_pe(to_use_num_pe, precision)]]
             def catch_exception():
                 if l1_size>self.l1_size or l2_size>self.l2_size or any(runtime_series<1) or any(l1_size_series<1) or any(l2_size_series<1):
                     return True
@@ -977,6 +992,50 @@ def catch_exception():
         except:
             return None, None
 
+    def num_pe_to_use(self, num_pe, precision):
+
+        if precision is None or precision == "FP32":
+            return num_pe
+        if precision == "FP16":
+            return num_pe / 2
+        if precision == "FP8":
+            return num_pe / 4
+        if precision == "INT32":
+            return num_pe
+        if precision == "INT16":
+            return num_pe / 2
+        if precision == "INT8":
+            return num_pe / 4
+
+    def restore_num_pe(self, num_pe, precision):
+
+        if precision is None or precision == "FP32":
+            return num_pe
+        if precision == "FP16":
+            return num_pe * 2
+        if precision == "FP8":
+            return num_pe * 4
+        if precision == "INT32":
+            return num_pe
+        if precision == "INT16":
+            return num_pe * 2
+        if precision == "INT8":
+            return num_pe * 4
+
+    def l1_to_use(self, l1_size, precision):
+
+        if precision is None or precision == "FP32":
+            return l1_size
+        if precision == "FP16":
+            return l1_size * 2
+        if precision == "FP8":
+            return l1_size * 4
+        if precision == "INT32":
+            return l1_size
+        if precision == "INT16":
+            return l1_size * 2
+        if precision == "INT8":
+            return l1_size * 4
     def impose_halloffame(self, observe_value, target="latency_ave" ):
         is_violated = False
         if self.stat is not None:
diff --git a/src/GAMMA/main.py b/src/GAMMA/main.py
index bd7f0ea..2d180cb 100644
--- a/src/GAMMA/main.py
+++ b/src/GAMMA/main.py
@@ -17,8 +17,8 @@
     parser.add_argument('--model', type=str, default="resnet18", help='Model to run')
     parser.add_argument('--num_layer', type=int, default=0, help='Number of layers to optimize')
     parser.add_argument('--singlelayer', type=int, default=0, help='The layer index to optimize')
-    parser.add_argument('--slevel_min', type=int, default=2, help='Minimum number of parallelization level')
-    parser.add_argument('--slevel_max', type=int, default=2, help='Maximum number of parallelization level')
+    parser.add_argument('--slevel_min', type=int, default=3, help='Minimum number of parallelization level')
+    parser.add_argument('--slevel_max', type=int, default=3, help='Maximum number of parallelization level')
     parser.add_argument('--fixedCluster', type=int, default=0, help='Rigid cluster size')
     parser.add_argument('--log_level', type=int, default=1, help='Detail: 2, runtimeinfo: 1')
     parser.add_argument('--costmodel_cstr', type=str, default='maestro_cstr', help='Constraint from Cost model')
@@ -41,7 +41,17 @@
     m_file_path = "../../data/model/"
     m_file = os.path.join(m_file_path, opt.model + ".csv")
     df = pd.read_csv(m_file)
-    model_defs = df.to_numpy()
+    model = df.to_numpy()
+    model_defs = []
+    precision_array = []
+
+    for sublist in model:
+        string_indices = [i for i, elem in enumerate(sublist) if isinstance(elem, str)]
+        precision_array.extend(elem for elem in sublist if isinstance(elem, str))
+        sublist = np.delete(sublist, string_indices)
+        model_defs.append(sublist.tolist())
+    model_defs = np.array(model_defs)
+
     if opt.singlelayer:
         model_defs=model_defs[opt.singlelayer-1:opt.singlelayer]
     else:
@@ -63,30 +73,12 @@
     os.makedirs(outdir_exp, exist_ok=True)
     chkpt_file_t = "{}".format("result")
     chkpt_file = os.path.join(outdir_exp, chkpt_file_t + "_c.plt")
-    map_cstr = None
-    if opt.accel_cstr:
-        accel_file = importlib.import_module(f'data.mapping_cstr.advanced_cstr.accel_cstr.{opt.accel_cstr}')
-        accelator_cstr = accel_file.accel_cstr
-        map_cstr = Constraint(num_pe=opt.num_pe)
-        translate_to_actual_cstr(accelator_cstr, map_cstr)
-
-    if  opt.mapping_cstr:
-        mapping_file = importlib.import_module(f'data.mapping_cstr.{opt.mapping_cstr}')
-        mapping_cstr = mapping_file.mapping_cstr
-        map_cstr = Constraint(num_pe=opt.num_pe) if not map_cstr else map_cstr
-        put_into_actual_cstr(mapping_cstr, map_cstr)
-
-    if  opt.costmodel_cstr:
-        mapping_file = importlib.import_module(f'data.mapping_cstr.advanced_cstr.costmodel_cstr.{opt.costmodel_cstr}')
-        costmodel_cstr = mapping_file.mapping_cstr
-        map_cstr = Constraint(num_pe=opt.num_pe) if not map_cstr else map_cstr
-        put_into_actual_cstr(costmodel_cstr, map_cstr)
 
     if check_tpu(opt.accel_cstr, opt.mapping_cstr):
         model_defs = translate_to_gemm(model_defs)
 
     try:
-        train_model(model_defs, input_arg=opt, map_cstr=map_cstr, chkpt_file=chkpt_file)
+        train_model(model_defs, input_arg=opt, chkpt_file=chkpt_file, precisions=precision_array)
 
     finally:
         for f in glob.glob("*.m"):
diff --git a/src/GAMMA/train.py b/src/GAMMA/train.py
index d89bf19..bf901e2 100644
--- a/src/GAMMA/train.py
+++ b/src/GAMMA/train.py
@@ -1,29 +1,28 @@
-
 import copy
 import argparse
 from datetime import datetime
 
 import glob
 import os, sys
+
 script_dir = os.path.dirname(__file__)
 module_path = os.path.abspath(os.path.join(script_dir, '../'))
 project_path = os.path.abspath(os.path.join(script_dir, '../../'))
 if module_path not in sys.path:
-    sys.path.insert(0,module_path)
+    sys.path.insert(0, module_path)
 if project_path not in sys.path:
-    sys.path.insert(0,project_path)
+    sys.path.insert(0, project_path)
 from utils import *
 import gamma as gamma
-from math import ceil
 import importlib
-from shutil import copyfile
+
 fitness_list = None
 fitness = None
 stage_idx = 0
 prev_stage_value = []
 tune_iter = 1
 opt = None
-MAC_AREA_MAESTRO=4470
+MAC_AREA_MAESTRO = 4470
 MAC_AREA_INT8 = 282
 BUF_AREA_perbit = 0.086
 L2BUF_AREA_MAESTRO = 4161.536
@@ -31,58 +30,81 @@
 L2BUF_UNIT = 32768
 L1BUF_UNIT = 64
 
+# bias = {"par": {1: "K", 2:"C"}, "order":{1:["K", "C"]}, "tiles": {1:{"K":0.1, "C":0.2}, 2:{"K":0.3}}}
+bias = {"par": {1: "K", 2: "C"}, "order": {1: ["K", "C", "Y", "X"], 2: ["K", "C", "Y", "X"]}}
 
 
-# bias = {"par": {1: "K", 2:"C"}, "order":{1:["K", "C"]}, "tiles": {1:{"K":0.1, "C":0.2}, 2:{"K":0.3}}}
-bias = {"par": {1: "K", 2:"C"}, "order":{1:["K", "C","Y", "X"], 2:["K", "C","Y", "X"]}}
 # bias = {"par": {1: "K", 2:"C"}}
 # bias = {"par": {1: "Y"}}
 
 
-def get_pe_usage(env, sol, num_pe ):
+def get_pe_usage(env, sol, num_pe):
     util_num_pe = num_pe
-    baseline = env.get_indiv_info( sol, num_pe=num_pe)
+    baseline = env.get_indiv_info(sol, num_pe=num_pe)
     best_runtime, best_throughput, best_energy, best_area, best_l1_size, best_l2_size, best_mac, best_power, best_num_pe = baseline
     baseline = np.array(baseline)[:-2]
-    for i in range(num_pe-1):
+    for i in range(num_pe - 1):
         util_num_pe -= 1
         cur = env.get_indiv_info(sol, num_pe=util_num_pe)
         best_runtime, best_throughput, best_energy, best_area, best_l1_size, best_l2_size, best_mac, best_power, best_num_pe = cur
         cur = np.array(cur)[:-2]
-        if sum(baseline!=cur)>1:
+        if sum(baseline != cur) > 1:
             util_num_pe += 1
             break
     return util_num_pe
 
-def train_model(model_defs, input_arg, map_cstr=None, chkpt_file='./chkpt'):
+
+def train_model(model_defs, input_arg, chkpt_file='./chkpt', precisions=None):
     global opt
     opt = input_arg
-    fitness = [ opt.fitness1, opt.fitness2]
+    fitness = [opt.fitness1, opt.fitness2]
     dimension = model_defs[0]
+    map_cstr = None
     env = gamma.GAMMA(dimension=dimension, num_pe=opt.num_pe, fitness=fitness, par_RS=opt.parRS,
                       l1_size=opt.l1_size,
-                      l2_size=opt.l2_size, NocBW=opt.NocBW, offchipBW=opt.offchipBW, slevel_min=opt.slevel_min, slevel_max=opt.slevel_max,
+                      l2_size=opt.l2_size, NocBW=opt.NocBW, offchipBW=opt.offchipBW, slevel_min=opt.slevel_min,
+                      slevel_max=opt.slevel_max,
                       fixedCluster=opt.fixedCluster, log_level=opt.log_level, map_cstr=map_cstr)
-    constraints = {"area":opt.area_budget* 1e6}
+    constraints = {"area": opt.area_budget * 1e6}
     chkpt_list = []
     num_layer = 1
+
     for dimension in model_defs:
+
+        # ridefinire i valori di PE e l1 in base al tipo di quantizzazione
+        if len(precisions):
+            precision = precisions[num_layer - 1]
+        else:
+            precision = None
+
+        map_cstr = map_constraints(map_cstr, opt, precision)
+
         env.reset_dimension(fitness=fitness, constraints=constraints, dimension=dimension)
-        env.reset_hw_parm(num_pe=opt.num_pe, l1_size=opt.l1_size, l2_size=opt.l2_size, pe_limit=opt.pe_limit,area_pebuf_only=False, external_area_model=True)
+        env.reset_hw_parm(num_pe=get_value_for_pe(precision, opt.num_pe),
+                          l1_size=opt.l1_size,
+                          l2_size=opt.l2_size, pe_limit=opt.pe_limit,
+                          area_pebuf_only=False, external_area_model=True, map_cstr=map_cstr,
+                          slevel_max=get_value_for_precision(precision), slevel_min=get_value_for_precision(precision))
+        #tolto calcolo su l1 value da verificare se rimettere
         chkpt, pops = env.run(dimension, stage_idx=0, num_population=opt.num_pop, prev_stage_value=None,
-                                  num_generations=opt.epochs,
-                                  best_sol_1st=None, init_pop=None, bias=None, uni_base=True, use_factor=opt.use_factor, use_pleteau=False)
+                              num_generations=opt.epochs,
+                              best_sol_1st=None, init_pop=None, bias=None, uni_base=True, use_factor=opt.use_factor,
+                              use_pleteau=False, precision=precision)
         best_sol = chkpt["best_sol"]
-        best_runtime, best_throughput, best_energy, best_area, best_l1_size, best_l2_size, best_mac, best_power, best_num_pe = env.get_indiv_info(best_sol, num_pe=None)
+        best_runtime, best_throughput, best_energy, best_area, best_l1_size, best_l2_size, best_mac, best_power, best_num_pe = env.get_indiv_info(
+            best_sol, num_pe=None, precision=precision)
         print("Mapping:", chkpt["best_sol"])
-        print(f"Reward: {chkpt['best_reward'][0]:.3e}, Runtime: {best_runtime:.0f}(cycles), Area: {best_area/1e6:.3f}(mm2), PE Area_ratio: {best_num_pe*MAC_AREA_INT8/best_area*100:.1f}%, Num_PE: {best_num_pe:.0f}, L1 Buffer: {best_l1_size:.0f}(elements), L2 Buffer: {best_l2_size:.0f}(elements)")
+        print(
+            f"Reward: {chkpt['best_reward'][0]:.3e}, Runtime: {best_runtime:.0f}(cycles), Area: {best_area / 1e6:.3f}(mm2), PE Area_ratio: {best_num_pe * MAC_AREA_INT8 / best_area * 100:.1f}%, Num_PE: {best_num_pe:.0f}, L1 Buffer: {best_l1_size:.0f}(elements), L2 Buffer: {best_l2_size:.0f}(elements)")
         chkpt = {
-            "reward":chkpt['best_reward'][0],
-            "best_sol":best_sol,
-            "runtime":best_runtime,
-            "area":best_area,
-            "pe_area_ratio":best_num_pe*MAC_AREA_INT8/best_area,
-            "PE":best_num_pe,
+            "reward": chkpt['best_reward'][0],
+            "Best_solution": best_sol,
+            "Runtime": best_runtime,
+            "Throughput (MACs/Cycle)": best_throughput,
+            "Activity count-based Energy (nJ)": best_energy,
+            "Area": best_area,
+            "PE_Area_Ratio": best_num_pe * MAC_AREA_INT8 / best_area,
+            "PE": best_num_pe,
             "PE_area": best_num_pe * MAC_AREA_INT8,
             "L1_area": best_l1_size * best_num_pe * BUF_AREA_perbit * 8,
             "L2_area": best_l2_size * BUF_AREA_perbit * 8,
@@ -91,25 +113,30 @@ def train_model(model_defs, input_arg, map_cstr=None, chkpt_file='./chkpt'):
         }
         chkpt_list.append(chkpt)
         if opt.num_layer != 0:
-            env.write_maestro(best_sol, m_file = opt.model, layer_id = num_layer, folder_path=os.path.dirname(chkpt_file))
+            env.write_maestro(best_sol, m_file=opt.model, layer_id=num_layer, folder_path=os.path.dirname(chkpt_file),
+                              precision=precision)
         else:
-            env.write_maestro(best_sol, m_file = opt.model, layer_id = opt.singlelayer, folder_path=os.path.dirname(chkpt_file))
+            env.write_maestro(best_sol, m_file=opt.model, layer_id=opt.singlelayer,
+                              folder_path=os.path.dirname(chkpt_file), precision=precision)
 
         num_layer += 1
 
-    columns = ["runtime", "area", "pe_area_ratio", "PE", "L1_size", "L2_size", "PE_area", "L1_area", "L2_area","best_sol"]
+    columns = ["Runtime", "Throughput (MACs/Cycle)", "Activity count-based Energy (nJ)", "Area", "PE_Area_Ratio", "PE", "L1_size", "L2_size", "PE_area", "L1_area", "L2_area",
+               "Best_solution"]
     np_array = None
     for chkpt in chkpt_list:
         if np_array is None:
-            np_array = np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["best_sol"]}']).reshape(1, -1)
+            np_array = np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["Best_solution"]}']).reshape(1, -1)
         else:
-            np_array = np.vstack([np_array, np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["best_sol"]}']).reshape(1, -1)])
+            np_array = np.vstack(
+                [np_array, np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["Best_solution"]}']).reshape(1, -1)])
     df = pd.DataFrame(np_array, columns=columns)
-    df.to_csv(chkpt_file[:-4]+".csv")
+    df.to_csv(chkpt_file[:-4] + ".csv", index_label="Layer")
 
     with open(chkpt_file, "wb") as fd:
         pickle.dump(chkpt_list, fd)
 
+
 def get_cstr_name(mapping_cstr):
     if mapping_cstr:
         cstr_name = mapping_cstr
@@ -118,3 +145,61 @@ def get_cstr_name(mapping_cstr):
     return cstr_name
 
 
+def get_value_for_precision(precision):
+    if precision is None or precision == "FP32":
+        return 2
+    else:
+        return 3
+
+
+def get_value_for_pe(precision, num_pe):
+    if precision is None or precision == "FP32":
+        return num_pe
+    if precision == "FP16":
+        return int(num_pe * 2)
+    if precision == "FP8":
+        return int(num_pe * 4)
+    if precision == "INT32":
+        return int(num_pe)
+    if precision == "INT16":
+        return int(num_pe * 2)
+    if precision == "INT8":
+        return int(num_pe * 4)
+
+
+def get_value_for_l1(precision, l1_size):
+    if precision is None or precision == "FP32":
+        return l1_size
+    if precision == "FP16":
+        return int(l1_size / 2)
+    if precision == "FP8":
+        return int(l1_size / 4)
+    if precision == "INT32":
+        return l1_size
+    if precision == "INT16":
+        return int(l1_size / 2)
+    if precision == "INT8":
+        return int(l1_size / 4)
+
+
+def map_constraints(map_cstr, opt, precision):
+    if opt.accel_cstr:
+        accel_file = importlib.import_module(f'data.mapping_cstr.advanced_cstr.accel_cstr.{opt.accel_cstr}')
+        accelator_cstr = accel_file.accel_cstr
+        map_cstr = Constraint(num_pe=get_value_for_pe(precision, opt.num_pe))
+        translate_to_actual_cstr(accelator_cstr, map_cstr)
+
+    if opt.mapping_cstr:
+        mapping_file = importlib.import_module(f'data.mapping_cstr.{opt.mapping_cstr}')
+        mapping_cstr = mapping_file.mapping_cstr
+        map_cstr = Constraint(num_pe=get_value_for_pe(precision, opt.num_pe)) if not map_cstr else map_cstr
+        put_into_actual_cstr(mapping_cstr, map_cstr)
+
+    if opt.costmodel_cstr:
+        mapping_file = importlib.import_module(
+            f'data.mapping_cstr.advanced_cstr.costmodel_cstr.{opt.costmodel_cstr}')
+        costmodel_cstr = mapping_file.mapping_cstr
+        map_cstr = Constraint(num_pe=get_value_for_pe(precision, opt.num_pe)) if not map_cstr else map_cstr
+        put_into_actual_cstr(costmodel_cstr, map_cstr)
+
+    return map_cstr

From 3687a13646d24419b7b0c62d1c89d89a6e795885 Mon Sep 17 00:00:00 2001
From: LuigiAltamura <luigi.altamura@mail.polimi.it>
Date: Sun, 30 Jun 2024 17:52:41 +0200
Subject: [PATCH 6/6] Add mixed precision to GAMMA

---
 extract_dataflow.py                     |  67 +--
 from_maestro_to_gamma.py                |  49 ++
 src/GAMMA/cost_database.py              | 105 +++++
 src/GAMMA/gamma.py                      | 586 +++++++++++++++---------
 src/GAMMA/main.py                       |   4 +-
 src/GAMMA/train.py                      |  94 +++-
 src/Other_Blackbox_optimization/main.py |   4 +-
 src/utils/constraint.py                 |  65 +--
 8 files changed, 686 insertions(+), 288 deletions(-)
 create mode 100644 from_maestro_to_gamma.py
 create mode 100644 src/GAMMA/cost_database.py

diff --git a/extract_dataflow.py b/extract_dataflow.py
index 399ef2a..76338c1 100644
--- a/extract_dataflow.py
+++ b/extract_dataflow.py
@@ -1,52 +1,61 @@
 import os
 import argparse
 
-def extract_dataflow(file_name, output_folder):
+def extract_sections(file_name, output_folder, output_file_name=None):
     # Read the content of the original file
     with open(file_name, 'r') as f:
         content = f.read()
 
-    # Find the starting and ending indices of the Dataflow section
-    start_index = content.find("Dataflow {")
-    end_index = content.find("}", start_index) + 1
-
-    # Extract the Dataflow section
-    dataflow_section = content[start_index:end_index]
-
-    # Add a new line after the closing parenthesis
-    dataflow_section += '\n'
-
-    # Add a tab of shift to each line inside the Dataflow section, excluding the first and last lines
-    lines = dataflow_section.split('\n')
-    indented_lines = '\n'.join(lines[0:1] + ['\t' + line for line in lines[1:-2]] + lines[-2:])
+    # Initialize sections
+    precision_section = ""
+    dataflow_section = ""
+
+    # Find the Precision section, if it exists
+    precision_start_index = content.find("Precision:")
+    if precision_start_index != -1:
+        precision_end_index = content.find("}", precision_start_index) + 1
+        precision_section = content[precision_start_index:precision_end_index]
+        precision_section += '\n'
+
+    # Find the Dataflow section
+    dataflow_start_index = content.find("Dataflow {")
+    if dataflow_start_index != -1:
+        dataflow_end_index = content.find("}", dataflow_start_index) + 1
+        dataflow_section = content[dataflow_start_index:dataflow_end_index]
+        dataflow_section += '\n'
+
+        # Indent lines in Dataflow section (excluding the first and last lines)
+        dataflow_lines = dataflow_section.split('\n')
+        indented_dataflow_lines = '\n'.join(dataflow_lines[0:1] + ['\t' + line for line in dataflow_lines[1:-1]] + dataflow_lines[-1:])
+        dataflow_section = indented_dataflow_lines
 
     # Create the output folder if it doesn't exist
     if not os.path.exists(output_folder):
         os.makedirs(output_folder)
 
-    # Construct the output file path without the "_dataflow" suffix
-    output_file_name = os.path.splitext(os.path.basename(file_name))[0].replace('_dataflow', '')
-    output_file_path = os.path.join(output_folder, output_file_name + '.m')
+    # Determine the output file name
+    if output_file_name is None:
+        output_file_name = os.path.basename(file_name)
+    output_file_path = os.path.join(output_folder, output_file_name)
 
-    # Write the modified Dataflow content to the new file
+    # Write the modified Precision and Dataflow sections to the new file
     with open(output_file_path, 'w') as f:
-        # Write the modified Dataflow section
-        f.write(indented_lines)
+        if precision_section:
+            f.write(precision_section)
+        if dataflow_section:
+            f.write(dataflow_section)
 
 if __name__ == "__main__":
     # Create an argument parser
-    parser = argparse.ArgumentParser(description='Extract Dataflow from a file and save to a new file.')
+    parser = argparse.ArgumentParser(description='Extract Precision and Dataflow from a file and save to a new file.')
 
     # Add the file and output folder arguments
-    parser.add_argument('--file', type=str, help='Path of the input file')
-    parser.add_argument('--out', type=str, help='Path of the output folder')
+    parser.add_argument('--file', type=str, required=True, help='Path of the input file')
+    parser.add_argument('--out', type=str, required=True, help='Path of the output folder')
+    parser.add_argument('--outname', type=str, help='Optional name of the output file')
 
     # Parse the command-line arguments
     args = parser.parse_args()
 
-    # Check if both --file and --out arguments are provided
-    if args.file and args.out:
-        # Extract the Dataflow part and save it to the output folder
-        extract_dataflow(args.file, args.out)
-    else:
-        print("Both --file and --out arguments are required.")
+    # Extract the Precision and Dataflow parts and save them to the output folder
+    extract_sections(args.file, args.out, args.outname)
diff --git a/from_maestro_to_gamma.py b/from_maestro_to_gamma.py
new file mode 100644
index 0000000..f0d953e
--- /dev/null
+++ b/from_maestro_to_gamma.py
@@ -0,0 +1,49 @@
+import csv
+import re
+import argparse
+
+def parse_m_file(file_path):
+    layers = []
+    with open('../qmaestro/data/model/' + file_path, 'r') as file:
+        with open('../qmaestro/data/model/' + file_path, 'r') as file:
+            content = file.read()
+            layers_type = re.findall(r'Type: (\w+)', content, re.DOTALL)
+            dimension_matches = re.findall(r'Dimensions \{.*?\}', content, re.DOTALL)
+
+            for i, dimensions in enumerate(dimension_matches):
+                dim_values = re.findall(r'\b\w: \d+', dimensions)
+                dim_dict = {d.split(': ')[0]: int(d.split(': ')[1]) for d in dim_values}
+                if layers_type[i] == 'DSCONV':
+                    dim_dict['T'] = 2
+                else:
+                    dim_dict['T'] = 1
+                layers.append(dim_dict)
+    return layers
+
+def write_csv(layers, precision, output_file):
+    with open('./data/model/'+ output_file, mode='w', newline='') as file:
+        writer = csv.writer(file)
+        writer.writerow(["K", "C", "Y", "X", "R", "S", "T", "Precision"])
+        for layer in layers:
+            t_value = 2 if layer.get('Type') == 'DSCONV' else 1
+            writer.writerow([
+                layer.get('K', 0),
+                layer.get('C', 0),
+                layer.get('Y', 0),
+                layer.get('X', 0),
+                layer.get('R', 0),
+                layer.get('S', 0),
+                layer.get('T', 0),
+                precision
+            ])
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Parse a .m file and generate a CSV.')
+    parser.add_argument('input_file', type=str, help='Path to the input .m file')
+    parser.add_argument('precision', type=str, help='Precision value for the CSV')
+    parser.add_argument('output_file', type=str, help='Path to the output CSV file')
+    args = parser.parse_args()
+
+    layers = parse_m_file(args.input_file)
+    write_csv(layers, args.precision, args.output_file)
diff --git a/src/GAMMA/cost_database.py b/src/GAMMA/cost_database.py
new file mode 100644
index 0000000..d06a66f
--- /dev/null
+++ b/src/GAMMA/cost_database.py
@@ -0,0 +1,105 @@
+# value expressed in nJ/op
+energy_data = {
+    'FLOAT': {
+        'MULT': {
+            '7nm': {'FP32': 1.31e-3, 'FP16': 0.34e-3},
+            '45nm': {'FP32': 3.7e-3, 'FP16': 1.1e-3, 'FP8': 0.327027027e-3, 'FP4': 0.097224251e-3},
+            '22nm': {'FP32': 2.368e-3, 'FP16': 0.704e-3, 'FP8': 0.209297297e-3, 'FP4': 0.062223521e-3}
+        },
+        'ADD': {
+            '7nm': {'FP32': 0.38e-3, 'FP16': 0.16e-3},
+            '45nm': {'FP32': 0.9e-3, 'FP16': 0.4e-3, 'FP8': 0.2e-3, 'FP4': 0.1e-3},
+            '22nm': {'FP32': 0.576e-3, 'FP16': 0.256e-3, 'FP8': 0.128e-3, 'FP4': 0.064e-3}
+        },
+        'MAC': {
+            '7nm': {},
+            '45nm': {'FP32': 16.8e-3, 'FP16': 7.85e-3, 'FP8': 3.802027027e-3, 'FP4': 2.183474251e-3},
+            '22nm': {'FP32': 10.752e-3, 'FP16': 5.024e-3, 'FP8': 2.433297297e-3, 'FP4': 1.397423521e-3}
+        }
+    },
+    'INT': {
+        'MULT': {
+            '7nm': {'INT32': 1.48e-3, 'INT8': 0.07e-3},
+            '45nm': {'INT32': 3.1e-3, 'INT16': 0.8e-3, 'INT8': 0.2e-3, 'INT4': 0.05e-3, 'INT2': 0.003225806e-3},
+            '22nm': {'INT32': 1.984e-3, 'INT8': 0.128e-3}
+        },
+        'ADD': {
+            '7nm': {'INT32': 0.03e-3, 'INT8': 0.007e-3},
+            '45nm': {'INT32': 0.1e-3, 'INT16': 0.06e-3, 'INT8': 0.03e-3, 'INT4': 0.015e-3, 'INT2': 0.0075e-3},
+            '22nm (ext)': {'INT32': 0.064e-3, 'INT16': 0.0384e-3, 'INT8': 0.0192e-3, 'INT4': 0.0096e-3, 'INT2': 0.0048e-3}
+        },
+        'MAC': {
+            '7nm': {},
+            '45nm': {'INT32': 3.470506757e-3, 'INT16': 2.030506757e-3, 'INT8': 1.290506757e-3, 'INT4': 0.950506757e-3,
+                     'INT2': 0.66211966e-3},
+            '22nm': {'INT32': 2.221124324e-3, 'INT16': 1.299524324e-3, 'INT8': 0.825924324e-3, 'INT4': 0.608324324e-3,
+                     'INT2': 0.423756582e-3}
+        }
+    }
+}
+
+# Read and Write values expressed in nJ
+sram_data = {
+    512: {'Write': 2.73e-13, 'Read': 2.91e-13, 'Leakage Power (mW)': 5.81e-05},
+    1024: {'Write': 3.35e-13, 'Read': 4.18e-13, 'Leakage Power (mW)': 0.000130558},
+    2048: {'Write': 5.06e-13, 'Read': 5.89e-13, 'Leakage Power (mW)': 0.000300498},
+    4096: {'Write': 9.51e-13, 'Read': 9.73e-13, 'Leakage Power (mW)': 0.000518707},
+    8192: {'Write': 1.51e-12, 'Read': 1.53e-12, 'Leakage Power (mW)': 0.00101401},
+    16384: {'Write': 2.17e-12, 'Read': 2.24e-12, 'Leakage Power (mW)': 0.00210329},
+    32768: {'Write': 3.39e-12, 'Read': 3.45e-12, 'Leakage Power (mW)': 0.00400746},
+    65536: {'Write': 5.19e-12, 'Read': 5.30e-12, 'Leakage Power (mW)': 0.00865728},
+    131072: {'Write': 7.94e-12, 'Read': 8.06e-12, 'Leakage Power (mW)': 0.0169947},
+    262144: {'Write': 1.08e-11, 'Read': 1.09e-11, 'Leakage Power (mW)': 0.0326368},
+    524288: {'Write': 1.62e-11, 'Read': 1.63e-11, 'Leakage Power (mW)': 0.0642241},
+    1048576: {'Write': 2.22e-11, 'Read': 2.23e-11, 'Leakage Power (mW)': 0.125151},
+    2097152: {'Write': 3.33e-11, 'Read': 3.34e-11, 'Leakage Power (mW)': 0.247646},
+    4194304: {'Write': 4.59e-11, 'Read': 4.60e-11, 'Leakage Power (mW)': 0.487825},
+    8388608: {'Write': 9.18e-11, 'Read': 9.2e-11, 'Leakage Power (mW)': 0.97565},
+    16777216: {'Write': 1.836e-10, 'Read': 1.84e-10, 'Leakage Power (mW)': 1.9513},
+    33554432: {'Write': 3.672e-10, 'Read': 3.68e-10, 'Leakage Power (mW)': 3.9026},
+    67108864: {'Write': 7.344e-10, 'Read': 7.36e-10, 'Leakage Power (mW)': 7.8052},
+    134217728: {'Write': 1.4688e-9, 'Read': 1.472e-9, 'Leakage Power (mW)': 15.6104},
+    268435456: {'Write': 2.937e-9, 'Read': 2.944e-9, 'Leakage Power (mW)': 31.2208},
+    536870912: {'Write': 5.8752e-9, 'Read': 5.88e-9, 'Leakage Power (mW)': 62.4416},
+    1073741824: {'Write': 1.175e-8, 'Read': 1.176e-8, 'Leakage Power (mW)': 124.8832},
+    2147483648: {'Write': 2.35e-8, 'Read': 2.352e-8, 'Leakage Power (mW)': 249.7664}
+}
+
+noc_dyn_energy_per_bit = 0.1143e-12  # J/bit/hop
+
+precision_to_bits = {
+    'FP32': 32,
+    'FP16': 16,
+    'FP8': 8,
+    'FP4': 4,
+    'INT32': 32,
+    'INT16': 16,
+    'INT8': 8,
+    'INT4': 4,
+    'INT2': 2
+}
+
+
+def get_sram_data(size, parameter):
+    try:
+        return sram_data[size][parameter]
+    except KeyError:
+        return None
+
+
+def get_energy(operation, tech='22nm', precision=None):
+    try:
+        datatype = 'FLOAT' if precision.startswith('FP') else 'INT'
+        return energy_data[datatype][operation][tech][precision]
+    except KeyError:
+        return None
+
+
+def calculate_noc_dyn_energy(precision, bw, hops=2):
+    if precision in precision_to_bits:
+        bits = precision_to_bits[precision]
+        # return a value expressed in nJ as for the other values considered
+        return noc_dyn_energy_per_bit * bits * float(bw) * hops * 1e9
+    else:
+        raise ValueError(f"Unknown precision: {precision}")
+
diff --git a/src/GAMMA/gamma.py b/src/GAMMA/gamma.py
index 42d169b..a8a908d 100755
--- a/src/GAMMA/gamma.py
+++ b/src/GAMMA/gamma.py
@@ -10,18 +10,25 @@
 from functools import reduce
 from collections import defaultdict
 from math import ceil
-m_type_dicts = {0:"CONV", 1:"CONV", 2:"DSCONV", 3:"CONV"}
-CONVtype_dicts = {0:"FC", 1:"CONV",2:"DSCONV", 3:"GEMM"}
-MAC_AREA_MAESTRO=4470
-MAC_AREA_INT8=282
-DEVELOP_MODE=False
+
+m_type_dicts = {0: "CONV", 1: "CONV", 2: "DSCONV", 3: "CONV"}
+CONVtype_dicts = {0: "FC", 1: "CONV", 2: "DSCONV", 3: "GEMM"}
+MAC_AREA_MAESTRO = 4470
+MAC_AREA_INT8 = 282
+DEVELOP_MODE = False
+
 
 class GAMMA(object):
-    def __init__(self,dimension, map_cstr=None, num_pe=64, pe_limit=1024, fitness="latency", constraints=dict(), par_RS=False, l1_size=512, l2_size=108000, NocBW=81920000, offchipBW=81920000, slevel_min=2,slevel_max=2, fixedCluster=0, log_level=2,constraint_class=None,external_mem_cstr=None, use_factor=False,uni_base=True):
-        super(GAMMA,self).__init__()
+    def __init__(self, dimension, map_cstr=None, num_pe=64, pe_limit=1024, fitness="latency", constraints=dict(),
+                 par_RS=False, l1_size=512, l2_size=108000, NocBW=81920000, offchipBW=81920000, slevel_min=2,
+                 slevel_max=2, fixedCluster=0, log_level=2, constraint_class=None, external_mem_cstr=None,
+                 use_factor=False, uni_base=True):
+        super(GAMMA, self).__init__()
         self.dimension = dimension
-        self.dimension_dict = {"K":dimension[0], "C":dimension[1], "Y":dimension[2], "X":dimension[3], "R":dimension[4],"S":dimension[5], "T":dimension[6]}
-        self.lastcluster_dict = {"K":dimension[0], "C":dimension[1], "Y":dimension[2], "X":dimension[3], "R":dimension[4],"S":dimension[5], "T":dimension[6]}
+        self.dimension_dict = {"K": dimension[0], "C": dimension[1], "Y": dimension[2], "X": dimension[3],
+                               "R": dimension[4], "S": dimension[5], "T": dimension[6]}
+        self.lastcluster_dict = {"K": dimension[0], "C": dimension[1], "Y": dimension[2], "X": dimension[3],
+                                 "R": dimension[4], "S": dimension[5], "T": dimension[6]}
         if DEVELOP_MODE:
             path = "/usr/scratch/felix/my_code/HW_optimizer_rnn_result_history/maestro_his/maestro21/"
             if os.path.exists(path) is False:
@@ -37,11 +44,11 @@ def __init__(self,dimension, map_cstr=None, num_pe=64, pe_limit=1024, fitness="l
         self.num_pe = num_pe
         self.pe_limit = pe_limit
         self.fitness_objective = fitness
-        self.cluster_space = ["K", "C", "Y","X","R","S"] if par_RS else ["K", "C", "Y","X"]
-        self.l1_size = l1_size if l1_size > 0 else 2**30
-        self.l2_size = l2_size if l2_size > 0 else 2**30
-        self.NocBW = NocBW if NocBW>0 else 2**30
-        self.offchipBW = offchipBW if offchipBW > 0 else 2**30
+        self.cluster_space = ["K", "C", "Y", "X", "R", "S"] if par_RS else ["K", "C", "Y", "X"]
+        self.l1_size = l1_size if l1_size > 0 else 2 ** 30
+        self.l2_size = l2_size if l2_size > 0 else 2 ** 30
+        self.NocBW = NocBW if NocBW > 0 else 2 ** 30
+        self.offchipBW = offchipBW if offchipBW > 0 else 2 ** 30
         self.slevel_min = slevel_min
         self.slevel_max = slevel_max
         self.fixedCluster = fixedCluster
@@ -53,26 +60,29 @@ def __init__(self,dimension, map_cstr=None, num_pe=64, pe_limit=1024, fitness="l
         self.stat = None
         self.dimension_factors = self.get_dimension_factors(self.dimension_dict)
         self.use_ranking = True if self.fitness_objective[0] == "ranking" else False
-        self.constraints=constraints
-        self.constraint_class=constraint_class
+        self.constraints = constraints
+        self.constraint_class = constraint_class
         self.external_mem_cstr = external_mem_cstr
         self.use_factor = use_factor
         self.uni_base = uni_base
         self.L1_bias_template = None
-        self.area_pebuf_only=False
+        self.area_pebuf_only = False
         self.external_area_model = False
+        self.precision = None
 
-    def reset_hw_parm(self, l1_size=None, l2_size=None, num_pe=None, NocBW=None, map_cstr=None, pe_limit=None,area_pebuf_only=None, external_area_model=None, offchipBW=None, slevel_max=None, slevel_min=None):
+    def reset_hw_parm(self, l1_size=None, l2_size=None, num_pe=None, NocBW=None, map_cstr=None, pe_limit=None,
+                      area_pebuf_only=None, external_area_model=None, offchipBW=None, slevel_max=None, slevel_min=None,
+                      precision=None):
         if l1_size:
-            self.l1_size=l1_size if l1_size > 0 else 2**30
+            self.l1_size = l1_size if l1_size > 0 else 2 ** 30
         if l2_size:
-            self.l2_size=l2_size if l2_size > 0 else 2**30
+            self.l2_size = l2_size if l2_size > 0 else 2 ** 30
         if num_pe:
-            self.num_pe=num_pe
+            self.num_pe = num_pe
         if NocBW:
-            self.NocBW=NocBW if NocBW > 0 else 2**30
+            self.NocBW = NocBW if NocBW > 0 else 2 ** 30
         if offchipBW:
-            self.offchipBW=offchipBW if offchipBW > 0 else 2**30
+            self.offchipBW = offchipBW if offchipBW > 0 else 2 ** 30
         if map_cstr:
             self.map_cstr = map_cstr
         if pe_limit:
@@ -87,20 +97,23 @@ def reset_hw_parm(self, l1_size=None, l2_size=None, num_pe=None, NocBW=None, map
             self.slevel_min = slevel_min
         if slevel_max:
             self.slevel_max = slevel_max
+        if precision:
+            self.precision = precision
 
     def get_dimension_factors(self, dimension_dict):
         dimension_factors = dict()
         for key, value in dimension_dict.items():
             if key != "T":
                 factors = self.get_factors(value)
-                dimension_factors[key] = {"set":factors, "array":np.array(list(factors))}
+                dimension_factors[key] = {"set": factors, "array": np.array(list(factors))}
         return dimension_factors
 
-    def reset_dimension(self, dimension=None, fitness=None, constraints=None, constraint_class=None, external_mem_cstr=None):
+    def reset_dimension(self, dimension=None, fitness=None, constraints=None, constraint_class=None,
+                        external_mem_cstr=None):
         if dimension is not None:
             self.dimension = dimension
         if fitness is not None:
-            self.fitness_objective =  fitness
+            self.fitness_objective = fitness
         if constraints is not None:
             self.constraints = constraints
         if constraint_class is not None:
@@ -108,7 +121,9 @@ def reset_dimension(self, dimension=None, fitness=None, constraints=None, constr
         if external_mem_cstr is not None:
             self.external_mem_cstr = external_mem_cstr
         self.use_ranking = True if self.fitness_objective[0] == "ranking" else False
-        self.dimension_dict = {"K": self.dimension[0], "C": self.dimension[1], "Y": self.dimension[2], "X": self.dimension[3], "R": self.dimension[4],"S": self.dimension[5], "T": self.dimension[6]}
+        self.dimension_dict = {"K": self.dimension[0], "C": self.dimension[1], "Y": self.dimension[2],
+                               "X": self.dimension[3], "R": self.dimension[4], "S": self.dimension[5],
+                               "T": self.dimension[6]}
         self.dimension_factors = self.get_dimension_factors(self.dimension_dict)
 
     def create_genome_with_cstr(self):
@@ -118,14 +133,24 @@ def create_genome_with_cstr(self):
         self.map_cstr.create_from_constraint(indv, self.fixedCluster, self.dimension_dict)
         return indv
 
-    def create_genome(self, uni_base=False,last_cluster_dict=None, l1_bias_template=None):
+    def create_genome_fixedSL(self, bias=None):
+        if self.map_cstr:
+            return self.create_genome_with_cstr()
+        ind = self.create_genome()
+        for _ in range(self.slevel_min - 1):
+            ind = self.born_cluster_ind(ind)
+        if bias:
+            ind = self.biased_init(ind, bias=bias)
+        return ind
+
+    def create_genome(self, uni_base=False, last_cluster_dict=None, l1_bias_template=None, ind_level=0):
         if uni_base:
             if l1_bias_template:
                 K, C, Y, X, R, S = l1_bias_template
             else:
-                K,C,Y,X,R,S,T = [1]*len(self.dimension)
+                K, C, Y, X, R, S, T = [1] * len(self.dimension)
         else:
-            K,C,Y,X,R,S,T = self.dimension
+            K, C, Y, X, R, S, T = self.dimension
         if uni_base is False and last_cluster_dict:
             K = last_cluster_dict["K"]
             C = last_cluster_dict["C"]
@@ -135,52 +160,95 @@ def create_genome(self, uni_base=False,last_cluster_dict=None, l1_bias_template=
             S = last_cluster_dict["S"]
         sp = random.choice(self.cluster_space)
         lastcluster_sz = last_cluster_dict[sp] if last_cluster_dict else self.dimension_dict[sp]
-        if uni_base == True:
-            if self.fixedCluster>0:
-                sp_sz = self.fixedCluster
+        if self.slevel_max == 3:
+            if uni_base:
+                sp_sz = self.sp_sz_precision_based_3_level()
+                limited_cluster_space = self.cluster_space[1:4]
+                sp = random.choice(limited_cluster_space)
             else:
-                if self.num_pe > 0:
-                    sp_sz = random.randint(1, min(lastcluster_sz, self.num_pe))
-                else:
-                    sp_sz = random.randint(1, lastcluster_sz)
+                if ind_level == 0:
+                    sp_sz = random.randint(1, self.num_pe if self.num_pe > 0 else self.pe_limit)
+                if ind_level == 1:
+                    if self.fixedCluster > 0:
+                        sp_sz = self.fixedCluster
+                    else:
+                        sp_sz = np.random.randint(2, self.num_pe // self.sp_sz_precision_based_3_level())
         else:
-            sp_sz = random.randint(1, self.num_pe if self.num_pe > 0 else self.pe_limit)
+            if uni_base == True:
+                if self.fixedCluster > 0:
+                    sp_sz = self.fixedCluster
+                else:
+                    if self.num_pe > 0:
+                        sp_sz = random.randint(1, min(lastcluster_sz, self.num_pe))
+                    else:
+                        sp_sz = random.randint(1, lastcluster_sz)
+            else:
+                sp_sz = random.randint(1, self.num_pe if self.num_pe > 0 else self.pe_limit)
         if self.use_factor and not uni_base:
-            df = [["K", np.random.choice(self.dimension_factors["K"]["array"])], ["C",np.random.choice(self.dimension_factors["C"]["array"])], ["Y", np.random.choice(self.dimension_factors["Y"]["array"])],
-                  ["X", np.random.choice(self.dimension_factors["X"]["array"])], ["R",np.random.choice(self.dimension_factors["R"]["array"])], ["S",np.random.choice(self.dimension_factors["S"]["array"])]]
+            df = [["K", np.random.choice(self.dimension_factors["K"]["array"])],
+                  ["C", np.random.choice(self.dimension_factors["C"]["array"])],
+                  ["Y", np.random.choice(self.dimension_factors["Y"]["array"])],
+                  ["X", np.random.choice(self.dimension_factors["X"]["array"])],
+                  ["R", np.random.choice(self.dimension_factors["R"]["array"])],
+                  ["S", np.random.choice(self.dimension_factors["S"]["array"])]]
         else:
             if uni_base:
-                df = [["K", K], ["C", C], ["Y", Y],["X", X], ["R", R], ["S", S]]
+                df = [["K", K], ["C", C], ["Y", Y], ["X", X], ["R", R], ["S", S]]
             else:
-                df = [["K", random.randint(1, K)], ["C", random.randint(1, C)], ["Y", random.randint(1, Y)],["X", random.randint(1, X)], ["R", random.randint(1, R)], ["S", random.randint(1, S)]]
+                df = [["K", random.randint(1, K)], ["C", random.randint(1, C)], ["Y", random.randint(1, Y)],
+                      ["X", random.randint(1, X)], ["R", random.randint(1, R)], ["S", random.randint(1, S)]]
         idx = np.random.permutation(len(df))
         indv = [[sp, sp_sz]] + [df[i] for i in idx]
         return indv
 
+    def sp_sz_precision_based_3_level(self):
+
+        if self.precision is None or self.precision == "FP32":
+            return 0
+        if self.precision == "FP16":
+            return 2
+        if self.precision == "FP8":
+            return 4
+        if self.precision == "FP4":
+            return 8
+        if self.precision == "FP2":
+            return 16
+        if self.precision == "INT32":
+            return 0
+        if self.precision == "INT16":
+            return 2
+        if self.precision == "INT8":
+            return 4
+        if self.precision == "INT4":
+            return 8
+        if self.precision == "INT2":
+            return 16
+
     def search_loc(self, segment_of_indv, dim):
         for i in range(len(segment_of_indv)):
-            if segment_of_indv[i][0]==dim:
+            if segment_of_indv[i][0] == dim:
                 return i
 
-    def validTo_external_mem_cstr(self, indv,num_pe=1024):
+    def validTo_external_mem_cstr(self, indv, num_pe=1024):
         if not self.external_mem_cstr:
             return True
-        mem_used = self.compute_l1_l2_mem_size(indv,num_pe=num_pe)
+        mem_used = self.compute_l1_l2_mem_size(indv, num_pe=num_pe)
         for key, value in self.external_mem_cstr.items():
-            if mem_used[key]> value:
+            if mem_used[key] > value:
                 return False
         return True
 
     def compute_l1_l2_mem_size(self, indv, num_pe=1024):
         mem = {}
+
         def get_w_i_o_size(picks, level=1, num_pe=1024):
-            if level==2:
+            if level == 2:
                 sp_dim_L2 = indv[0][0]
                 sp_dim_size_L2 = picks[sp_dim_L2]
                 dim = self.dimension_dict[sp_dim_L2]
                 sp_sz = indv[7][1]
-                num_cluster = num_pe//sp_sz
-                needed_iters = ceil(dim/sp_dim_size_L2)
+                num_cluster = num_pe // sp_sz
+                needed_iters = ceil(dim / sp_dim_size_L2)
                 actual_sp_tile_size = min(dim, sp_dim_size_L2 * min(needed_iters, num_cluster))
                 picks[sp_dim_L2] = actual_sp_tile_size
 
@@ -188,7 +256,8 @@ def get_w_i_o_size(picks, level=1, num_pe=1024):
             input = picks["C"] * picks["Y"] * picks["X"]
             output = picks["K"] * picks["Y"] * picks["X"]
             return weight, input, output
-        weight, input, output = get_w_i_o_size(picks=self.scan_indv(indv[0:7]), level=2,num_pe=num_pe)
+
+        weight, input, output = get_w_i_o_size(picks=self.scan_indv(indv[0:7]), level=2, num_pe=num_pe)
         mem[f"L2-W"] = weight
         mem[f"L2-I"] = input
         mem[f"L2-O"] = output
@@ -200,16 +269,16 @@ def get_w_i_o_size(picks, level=1, num_pe=1024):
         mem[f"L1-soft"] = output + input + weight
         return mem
 
-    def biased_init(self, indv, bias = None):
+    def biased_init(self, indv, bias=None):
         if bias is None:
             return indv
         if "par" in bias:
             for key, value in bias["par"].items():
-                pointer = (key-1) * 7
+                pointer = (key - 1) * 7
                 indv[pointer][0] = value
         if "order" in bias:
             for key, value in bias["order"].items():
-                st, end = (key-1)*7+1, (key)*7
+                st, end = (key - 1) * 7 + 1, (key) * 7
                 temp_indv = copy.deepcopy(indv[st: end])
                 for di in value[::-1]:
                     idx = self.search_loc(temp_indv, di)
@@ -218,7 +287,7 @@ def biased_init(self, indv, bias = None):
                 indv[st: end] = temp_indv
         if "tiles" in bias:
             for key, value in bias["tiles"].items():
-                st, end = (key-1)*7+1, (key)*7
+                st, end = (key - 1) * 7 + 1, (key) * 7
                 temp_indv = copy.deepcopy(indv[st: end])
                 if key == 1:
                     last_cluster_dict = self.dimension_dict
@@ -227,27 +296,17 @@ def biased_init(self, indv, bias = None):
                 for i in range(len(temp_indv)):
                     dim = temp_indv[i][0]
                     if dim in value:
-                        new_tile = max(1, int(last_cluster_dict[dim]* value[dim]))
+                        new_tile = max(1, int(last_cluster_dict[dim] * value[dim]))
                         temp_indv[i][1] = new_tile
                 indv[st: end] = temp_indv
         return indv
 
-    def create_genome_fixedSL(self,  bias = None):
-        if self.map_cstr:
-            return self.create_genome_with_cstr()
-        ind = self.create_genome()
-        for _ in range(self.slevel_min-1):
-            ind = self.born_cluster_ind(ind)
-        if bias:
-            ind = self.biased_init(ind, bias=bias)
-        return ind
-
     def select_parents(self, pop, fitness, num_parents, num_population):
         #=====sel unique======================
         pop_set = set()
         to_saved_idx = []
         for i in range(len(pop)):
-            cur_cand = tuple([tt for i, t in enumerate(pop[i]) for j, tt in enumerate(t)  if (i, j) != (0, 1)])
+            cur_cand = tuple([tt for i, t in enumerate(pop[i]) for j, tt in enumerate(t) if (i, j) != (0, 1)])
             if cur_cand not in pop_set:
                 pop_set.add(cur_cand)
                 to_saved_idx.append(i)
@@ -257,10 +316,10 @@ def select_parents(self, pop, fitness, num_parents, num_population):
         #=====================================
 
         if self.normalize:
-            norm_fitness = fitness/np.abs(np.nanmean(np.ma.masked_equal(fitness, value=float("-Inf")), axis=0))
+            norm_fitness = fitness / np.abs(np.nanmean(np.ma.masked_equal(fitness, value=float("-Inf")), axis=0))
             fitness_list = [tuple([-np.prod(ar[1:]), -i]) for i, ar in enumerate(norm_fitness)]
         else:
-            fitness_list = [tuple(list(ar)+[-i]) for i, ar in enumerate(fitness)]
+            fitness_list = [tuple(list(ar) + [-i]) for i, ar in enumerate(fitness)]
         fitness_list = sorted(fitness_list, reverse=True)
         idx = [int(-ar[-1]) for ar in fitness_list]
         new_pop = [pop[i] for i in idx][:num_population]
@@ -269,20 +328,21 @@ def select_parents(self, pop, fitness, num_parents, num_population):
         if self.use_pleteau:
             num_pletau = self.build_pleteau(fitness, pop)
             # print(f"Num pleteau: {num_pletau}")
-            fitness_list = [tuple([*ar[:len(self.fitness_objective)], *ar]) for i, ar in enumerate(self.pleteau_sol.keys())]
+            fitness_list = [tuple([*ar[:len(self.fitness_objective)], *ar]) for i, ar in
+                            enumerate(self.pleteau_sol.keys())]
             fitness_list = sorted(fitness_list, reverse=True)
             idx = [tuple(ar[-len(self.fitness_objective):]) for ar in fitness_list]
 
             new_pop[num_pletau:] = new_pop[:-num_pletau]
-            new_pop[:num_pletau] =[self.pleteau_sol[i] for i in idx]
+            new_pop[:num_pletau] = [self.pleteau_sol[i] for i in idx]
             new_fitness[num_pletau:] = new_fitness[:-num_pletau]
-            new_fitness[:num_pletau] =[i for i in idx]
-            parents = copy.deepcopy(new_pop[:num_parents+num_pletau])
+            new_fitness[:num_pletau] = [i for i in idx]
+            parents = copy.deepcopy(new_pop[:num_parents + num_pletau])
             self.best_reward_pleteau = copy.deepcopy(new_fitness[:num_pletau])
             self.best_sol_pleteau = copy.deepcopy(new_pop[:num_pletau])
         return new_pop, new_fitness, parents
 
-    def mutate_par(self, pop,alpha=0.5):
+    def mutate_par(self, pop, alpha=0.5):
         if self.map_cstr is not None:
             return
         for idx in range(len(pop)):
@@ -291,19 +351,19 @@ def mutate_par(self, pop,alpha=0.5):
                 #     avail_val = self.num_free_par + self.num_free_order - 1
                 # else:
                 #     avail_val = len(indv) - 1
-                    # ##===ad hoc trial=========
+                # ##===ad hoc trial=========
                 pop[idx][7][0], pop[idx][0][0] = pop[idx][0][0], pop[idx][7][0]
                 continue
-                    # #=========================
+                # #=========================
                 pick = random.randint(0, avail_val)
-                pick_level = pick//7
-                pick = int(pick_level *7)
-                if  self.map_cstr  and "sp" in self.cstr_list[pick_level]:
+                pick_level = pick // 7
+                pick = int(pick_level * 7)
+                if self.map_cstr and "sp" in self.cstr_list[pick_level]:
                     choices = self.cstr_list[pick_level]["sp"]
                 else:
                     choices = self.cluster_space
                 sp = random.choice(choices)
-                if self.map_cstr  and "sp_sz" in self.cstr_list[pick_level]:
+                if self.map_cstr and "sp_sz" in self.cstr_list[pick_level]:
                     sp_sz = self.self.cstr_list[pick_level]["sp_sz"]
                 else:
                     if self.fixedCluster < 1:
@@ -325,7 +385,7 @@ def mutate_tile(self, pop, is_finetune=False, num_mu_loc=1, alpha=0.5, range_alp
                 if random.random() < alpha:
                     if self.map_cstr:
                         num_free_tile = self.cstr_list[1]["num_free_tile"]
-                        if num_free_tile==0:
+                        if num_free_tile == 0:
                             pick = random.randint(0, len(indv) - 6 - 1)
                         else:
                             pick = random.randint(0, len(indv) - 1)
@@ -334,33 +394,52 @@ def mutate_tile(self, pop, is_finetune=False, num_mu_loc=1, alpha=0.5, range_alp
                     if cluster_only:
                         pick = 7
                     if pick % 7 == 0:
-                        if  self.map_cstr  and "sp" in self.cstr_list[pick // 7]:
+                        if self.map_cstr and "sp" in self.cstr_list[pick // 7]:
                             choices = self.cstr_list[pick // 7]["sp"]
                         else:
                             choices = self.cluster_space
-                        sp = random.choice(choices)
-                        if pick>0:
-                            if self.map_cstr  and "sp_sz" in self.cstr_list[pick // 7]:
-                                sp_sz = self.cstr_list[pick // 7]["sp_sz"]
-                            else:
+                        if self.slevel_max == 3 and pick >= 7:
+                            if pick // 7 == 1:
                                 if self.fixedCluster < 1:
-                                    last_cluster_dict = self.scan_indv(indv[:-7]) if pick != 0 else None
-                                    lastcluster_sz = last_cluster_dict[sp] if last_cluster_dict else self.dimension_dict[sp]
-                                    if self.num_pe > 0:
-                                        # sp_sz = max(1, random.randint(0, min(lastcluster_sz, self.num_pe)))
-                                        sp_sz = max(1, random.choice(list(self.get_factors(min(lastcluster_sz, self.num_pe)))))
-                                    else:
-                                        # sp_sz = max(1, random.randint(0, min(lastcluster_sz, indv[0][1])))
-                                        sp_sz = max(1, random.choice(list(self.get_factors(min(lastcluster_sz, indv[0][1])))))
+                                    sp_sz = np.random.randint(2, self.num_pe // self.sp_sz_precision_based_3_level())
+                                    sp = random.choice(choices)
                                 else:
                                     sp_sz = self.fixedCluster
+                                    sp = random.choice(choices)
+                            if pick // 7 == 2:
+                                sp_sz = self.sp_sz_precision_based_3_level()
+                                sp = random.choice(choices[1:4])
                         else:
-                            sp_sz =pop[idx][pick][1]
+                            sp = random.choice(choices)
+                            if pick > 0:
+                                if self.map_cstr and "sp_sz" in self.cstr_list[pick // 7]:
+                                    sp_sz = self.cstr_list[pick // 7]["sp_sz"]
+                                else:
+                                    if self.fixedCluster < 1:
+                                        last_cluster_dict = self.scan_indv(indv[:-7]) if pick != 0 else None
+                                        lastcluster_sz = last_cluster_dict[sp] if last_cluster_dict else \
+                                            self.dimension_dict[sp]
+                                        if self.num_pe > 0:
+                                            # sp_sz = max(1, random.randint(0, min(lastcluster_sz, self.num_pe)))
+                                            sp_sz = max(1, random.choice(
+                                                list(self.get_factors(min(lastcluster_sz, self.num_pe)))))
+                                        else:
+                                            # sp_sz = max(1, random.randint(0, min(lastcluster_sz, indv[0][1])))
+                                            sp_sz = max(1, random.choice(
+                                                list(self.get_factors(min(lastcluster_sz, indv[0][1])))))
+                                    else:
+                                        sp_sz = self.fixedCluster
+                            else:
+                                sp_sz = pop[idx][pick][1]
+
                         pop[idx][pick] = [sp, sp_sz]
                     else:
                         d, d_sz = indv[pick]
                         if pick > 7:
-                            last_cluster_dict = self.scan_indv(indv[:-7])
+                            if 7 <= pick < 14:
+                                last_cluster_dict = self.scan_indv(indv[7:14])
+                            elif pick >= 14:
+                                last_cluster_dict = self.scan_indv(indv[:-7])
                             thr = last_cluster_dict[d]
                             if self.use_factor is False:
                                 new_d_sz = random.randint(1, thr)
@@ -381,33 +460,35 @@ def mutate_tile(self, pop, is_finetune=False, num_mu_loc=1, alpha=0.5, range_alp
                             new_d_sz = max(1, min(new_d_sz, self.dimension_dict[d]))
                         pop[idx][pick][1] = new_d_sz
 
+
     def mutate_pe(self, pop, alpha=0.5, mutate_range_ratio=0.5):
         for idx in range(len(pop)):
-            if len(pop[idx])<=7:
+            if len(pop[idx]) <= 7:
                 if random.random() < alpha:
                     pop[idx][0][1] = random.randint(1, self.pe_limit)
             else:
-                sp , sp_sz, *a = pop[idx][7]
-                cur_multiplier = pop[idx][0][1]//sp_sz
-                if random.random()< alpha:
+                sp, sp_sz, *a = pop[idx][7]
+                cur_multiplier = pop[idx][0][1] // sp_sz
+                if random.random() < alpha:
                     if self.use_factor is False:
                         #==method 1
                         last_cluster_dict = self.scan_indv(pop[idx][:7])
                         last_cluster_dict_sz = last_cluster_dict[sp]
                         max_multiplier = max(1, self.pe_limit // sp_sz)
-                        cur_multiplier = random.randint(1, min(max_multiplier, ceil(self.dimension_dict[sp]/last_cluster_dict_sz)))
+                        cur_multiplier = random.randint(1, min(max_multiplier,
+                                                               ceil(self.dimension_dict[sp] / last_cluster_dict_sz)))
                         # ====constrained to smaller search space====
-                        max_value = min(max_multiplier, ceil(self.dimension_dict[sp]/last_cluster_dict_sz))
-                        cur_multiplier = random.randint(max(1, int(max_value*mutate_range_ratio)), max_value)
+                        max_value = min(max_multiplier, ceil(self.dimension_dict[sp] / last_cluster_dict_sz))
+                        cur_multiplier = random.randint(max(1, int(max_value * mutate_range_ratio)), max_value)
                         #============================================
                     else:
                         #method 2
                         factors = self.dimension_factors[sp]["array"]
                         max_multiplier = max(1, self.pe_limit // sp_sz)
-                        factors = factors[(factors<= max_multiplier)]
+                        factors = factors[(factors <= max_multiplier)]
                         cur_multiplier = random.choice(factors)
                         # ====constrained to smaller search space====
-                        cur_multiplier =  random.choice(factors[int(len(factors)*mutate_range_ratio):])
+                        cur_multiplier = random.choice(factors[int(len(factors) * mutate_range_ratio):])
                         #============================================
                 cur_pe = min(self.pe_limit, cur_multiplier * sp_sz)
                 pop[idx][0][1] = cur_pe
@@ -419,23 +500,23 @@ def swap_order(self, pop, alpha=0.5):
             return
         while max_count > 0:
             max_count -= 1
-            if random.random()< alpha:
+            if random.random() < alpha:
                 idx = random.randint(0, len(pop) - 1)
                 if self.map_cstr is None:
-                    sel_cluster = random.randint(0, (len(pop[idx])-1)//7)
-                    swap_id = np.random.randint(1, 6+1, (2,)) + sel_cluster * 7
+                    sel_cluster = random.randint(0, (len(pop[idx]) - 1) // 7)
+                    swap_id = np.random.randint(1, 6 + 1, (2,)) + sel_cluster * 7
                 else:
-                    sel_cluster = random.randint(0, (self.num_free_order-1)//6)
-                    num_free_order = (self.num_free_order - sel_cluster*6 -1)%6
-                    swap_id = np.random.randint(1, 1+num_free_order+1, (2,)) + sel_cluster * 7
+                    sel_cluster = random.randint(0, (self.num_free_order - 1) // 6)
+                    num_free_order = (self.num_free_order - sel_cluster * 6 - 1) % 6
+                    swap_id = np.random.randint(1, 1 + num_free_order + 1, (2,)) + sel_cluster * 7
                 pop[idx][swap_id[0]], pop[idx][swap_id[1]] = pop[idx][swap_id[1]], pop[idx][swap_id[0]]
 
     def crossover_tile(self, parents, pop, alpha=0.5):
-        if len(parents) ==1:
+        if len(parents) == 1:
             for idx in range(len(pop)):
                 pop[idx] = copy.deepcopy(parents[0])
         else:
-            for idx in range(0,len(pop),2):
+            for idx in range(0, len(pop), 2):
                 pick_range = np.random.permutation(np.arange(0, len(parents)))
                 dad, mom = parents[pick_range[0]], parents[pick_range[1]]
                 # dad, mom = parents[random.randint(0, len(parents)-1)], parents[random.randint(0, len(parents)-1)]
@@ -445,9 +526,9 @@ def crossover_tile(self, parents, pop, alpha=0.5):
                 if random.random() < alpha:
                     cross_point = random.choice(["K", "C", "Y", "X", "R", "S"])
                     for k in range(0, length, 7):
-                        for i in range(k+1, k+7):
+                        for i in range(k + 1, k + 7):
                             d, d_sz = dad[i]
-                            if d== cross_point:
+                            if d == cross_point:
                                 dad_sz = d_sz
                                 dad_idx = i
                             d, d_sz = mom[i]
@@ -458,12 +539,12 @@ def crossover_tile(self, parents, pop, alpha=0.5):
                         mom[mom_idx][1] = dad_sz
                 pop[idx] = dad
                 if idx + 1 < len(pop):
-                    pop[idx+1] = mom
+                    pop[idx + 1] = mom
 
-    def check_tile_dependency(self,  pop):
+    def check_tile_dependency(self, pop):
         for idx in range(0, len(pop)):
             cur_pop = pop[idx]
-            last_cluster =self.scan_indv(cur_pop)
+            last_cluster = self.scan_indv(cur_pop)
             first_cluster = self.scan_indv(cur_pop[:7])
             for key in ["K", "C", "Y", "X", "R", "S"]:
                 if last_cluster[key] > first_cluster[key]:
@@ -473,14 +554,14 @@ def correctify_tile_dependency(self, pop):
         for i in range(0, len(pop)):
             ind = pop[i]
             cur_cluster = None
-            levels = len(ind)//7
+            levels = len(ind) // 7
             for i in range(levels):
                 last_cluster = copy.deepcopy(cur_cluster)
-                cur_cluster = self.scan_indv(ind[7*i:7*(i+1)])
+                cur_cluster = self.scan_indv(ind[7 * i:7 * (i + 1)])
                 if i == 0:
                     continue
                 else:
-                    for idx in range(7*i+1, 7*(i+1)):
+                    for idx in range(7 * i + 1, 7 * (i + 1)):
                         d, d_sz = ind[idx]
                         d_sz = min(last_cluster[d], d_sz)
                         ind[idx][1] = d_sz
@@ -488,26 +569,38 @@ def correctify_tile_dependency(self, pop):
     def correctify_tile_dependency_thread(self, indv):
         ind = copy.deepcopy(indv)
         cur_cluster = None
-        levels = len(ind)//7
+        levels = len(ind) // 7
         for i in range(levels):
             last_cluster = copy.deepcopy(cur_cluster)
-            cur_cluster = self.scan_indv(ind[7*i:7*(i+1)])
+            cur_cluster = self.scan_indv(ind[7 * i:7 * (i + 1)])
             if i == 0:
                 continue
             else:
-                for idx in range(7*i+1, 7*(i+1)):
+                for idx in range(7 * i + 1, 7 * (i + 1)):
                     d, d_sz = ind[idx]
                     d_sz = min(last_cluster[d], d_sz)
                     ind[idx][1] = d_sz
         return ind
 
     def born_cluster_ind(self, ind):
-        if (len(ind)) // 7 < self.slevel_max:
+        ind_level = len(ind) // 7
+        if ind_level < self.slevel_max:
             last_cluster_dict = self.scan_indv(ind)
-            new_ind = ind + self.create_genome(uni_base=self.uni_base, l1_bias_template=self.L1_bias_template, last_cluster_dict=last_cluster_dict)
+            if self.slevel_max == 3:
+                if ind_level == 1:
+                    new_ind = ind + self.create_genome(l1_bias_template=self.L1_bias_template,
+                                                       last_cluster_dict=last_cluster_dict, ind_level=1)
+                else:
+                    new_ind = ind + self.create_genome(uni_base=self.uni_base, l1_bias_template=self.L1_bias_template,
+                                                       last_cluster_dict=last_cluster_dict)
+            else:
+                new_ind = ind + self.create_genome(uni_base=self.uni_base, l1_bias_template=self.L1_bias_template,
+                                                   last_cluster_dict=last_cluster_dict)
+
             ind = new_ind
         return ind
 
+
     def born_cluster(self, pop, alpha=0.1):
         max_count = len(pop)
         while max_count > 0:
@@ -523,15 +616,15 @@ def kill_cluster(self, pop, alpha=0.5):
             max_count -= 1
             if random.random() < alpha:
                 idx = random.randint(0, len(pop) - 1)
-                if (len(pop[idx]))//7>self.slevel_min:
+                if (len(pop[idx])) // 7 > self.slevel_min:
                     pop[idx] = pop[idx][:-7]
 
-    def scan_indv(self,indv):
-        last_cluster_dict=defaultdict(str)
-        for i in range(len(indv)-6,len(indv), 1):
+    def scan_indv(self, indv):
+        last_cluster_dict = defaultdict(str)
+        for i in range(len(indv) - 6, len(indv), 1):
             d, d_sz = indv[i]
             last_cluster_dict[d] = d_sz
-        return  last_cluster_dict
+        return last_cluster_dict
 
     def get_out_repr(self, x):
         if x in self.out_repr:
@@ -556,12 +649,13 @@ def create_unit_base_pops(self, population, num_all_unit=None):
                 for i in range(1, 7):
                     population[idx][i + level * 7][1] = 1
 
-    def reinit_pop(self,pool, num_population,  stage_idx, best_sol_1st, init_pop, cur_gen=-1, bias= None, num_all_unit=2, precision=None):
+    def reinit_pop(self, pool, num_population, stage_idx, best_sol_1st, init_pop, cur_gen=-1, bias=None, num_all_unit=2,
+                   precision=None):
         population = [self.create_genome_fixedSL(bias=bias) for _ in range(num_population)]
-        #====always create a base unit pop=======
+        # ====always create a base unit pop=======
         self.create_unit_base_pops(population, num_all_unit=num_all_unit)
 
-        #========================================
+        # ========================================
         if init_pop is not None:
             # population = [self.create_genome_fixedSL() for _ in range(num_population)] if best_sol_1st is None else [best_sol_1st for _ in range(num_population)]
             population[:10] = init_pop[:10]
@@ -574,17 +668,15 @@ def reinit_pop(self,pool, num_population,  stage_idx, best_sol_1st, init_pop, cu
         self.comform_to_cstr(population)
         self.fitness = np.ones((max(num_population, len(population)), len(self.fitness_objective)), float)
 
-        self.evaluate(pool=pool, population=population,cur_gen=cur_gen, precision=precision)
+        self.evaluate(pool=pool, population=population, cur_gen=cur_gen, precision=precision)
         return population
 
-
-
     def cal_statstics(self):
         fitness = np.array(self.fitness)
-        reward = fitness[:,0]
-        sel_valid = reward>float("-Inf")
-        latency_ave =  np.mean(-fitness[sel_valid, 0])
-        area_ave =  np.mean(-fitness[sel_valid, 1])
+        reward = fitness[:, 0]
+        sel_valid = reward > float("-Inf")
+        latency_ave = np.mean(-fitness[sel_valid, 0])
+        area_ave = np.mean(-fitness[sel_valid, 1])
         l1_size_ave = np.mean(-fitness[sel_valid, 2])
         l2_size_ave = np.mean(-fitness[sel_valid, 3])
         # l1_size_pops = -np.array(self.l1_size_pop)
@@ -592,10 +684,10 @@ def cal_statstics(self):
         # l1_size_pops = -l1_size_pops[l1_size_pops>float("-Inf")]
         # l2_size_pops = -l2_size_pops[l2_size_pops>float("-Inf")]
         statstics = {
-            "latency_ave":latency_ave,
-            "area_ave":area_ave,
-            "l1_size_ave":l1_size_ave,
-            "l2_size_ave":l2_size_ave
+            "latency_ave": latency_ave,
+            "area_ave": area_ave,
+            "l1_size_ave": l1_size_ave,
+            "l2_size_ave": l2_size_ave
         }
         self.stat = statstics
         return statstics
@@ -604,7 +696,7 @@ def cal_pletau_stat(self):
         fitness = np.array(list(self.pleteau_sol.keys()))
         fitness = np.mean(fitness, axis=0)
         stats = {
-            "fitness":fitness,
+            "fitness": fitness,
             "Reward": fitness[0],
             "latency": fitness[1],
             "area": fitness[2],
@@ -613,7 +705,7 @@ def cal_pletau_stat(self):
         }
         return stats
 
-    def build_pleteau(self,fitness, population):
+    def build_pleteau(self, fitness, population):
         self.pleteau_sol = dict()
         for cand_fit, cand_sol in zip(fitness, population):
             self.insert_into_pleteau(cand_fit, cand_sol)
@@ -621,14 +713,14 @@ def build_pleteau(self,fitness, population):
 
     def insert_into_pleteau(self, cand_fit, cand_sol):
         reject = False
-        if np.prod(cand_fit>float("-inf"))!=1:
+        if np.prod(cand_fit > float("-inf")) != 1:
             return
         cand_fit = tuple(list(cand_fit))
         for pl in set(self.pleteau_sol.keys()):
-            if all([cand_fit[i]< pl[i] for i in range(len(cand_fit))]):
+            if all([cand_fit[i] < pl[i] for i in range(len(cand_fit))]):
                 del self.pleteau_sol[pl]
                 self.pleteau_sol[cand_fit] = cand_sol
-            elif all([cand_fit[i]> pl[i] for i in range(len(cand_fit))]):
+            elif all([cand_fit[i] > pl[i] for i in range(len(cand_fit))]):
                 reject = True
         if not reject:
             self.pleteau_sol[cand_fit] = cand_sol
@@ -645,8 +737,8 @@ def adjust_fitness(self, fitness):
         rank2 = np.zeros((len(idx),))
         rank2[idx] = -np.arange(len(idx))
         rank = rank1 + rank2
-        fitness[:,0] = rank
-        gen_best_idx = np.argmax(fitness[:,0])
+        fitness[:, 0] = rank
+        gen_best_idx = np.argmax(fitness[:, 0])
         return fitness, gen_best_idx
 
     def evaluate(self, pool, population, cur_gen=-1, precision=None):
@@ -675,7 +767,7 @@ def evaluate(self, pool, population, cur_gen=-1, precision=None):
                 gen_best_activity = activity_count
                 gen_best_idx = i
         if self.use_ranking:
-            self.fitness, gen_best_idx =  self.adjust_fitness(self.fitness)
+            self.fitness, gen_best_idx = self.adjust_fitness(self.fitness)
             gen_best = - np.prod(self.fitness[gen_best_idx][1:])
             judging_best_reward = - np.prod(self.best_reward[1:])
         else:
@@ -701,13 +793,13 @@ def evaluate(self, pool, population, cur_gen=-1, precision=None):
             "num_generations": self.num_generations,
             "fitness_use": self.fitness_objective,
             "num_pe": self.num_pe,
-            "pe_limit":self.pe_limit,
+            "pe_limit": self.pe_limit,
             "l1_size": self.l1_size,
             "l2_size": self.l2_size,
             "NocBW": self.NocBW,
             "dimension": self.dimension,
-            "best_reward_pleteau":self.best_reward_pleteau ,
-            "best_sol_pleteau":self.best_sol_pleteau ,
+            "best_reward_pleteau": self.best_reward_pleteau,
+            "best_sol_pleteau": self.best_sol_pleteau,
             # "stat":stat,
             # "stat_list":self.stat_list
         }
@@ -731,20 +823,37 @@ def injection(self, inject_ratio=1.0):
         return pop_inj, inj_fitness
 
     def run(self, dimension, stage_idx=0, prev_stage_value=0, num_population=100, num_generations=100, elite_ratio=0.05,
-                       parents_ratio=0.4, ratio_decay=1, num_finetune=1, best_sol_1st=None, init_pop=None, bias=None, uni_base=True, use_factor=False, use_pleteau=False, L1_bias_template=None, precision=None):
-        self.init_arguement(dimension=dimension, stage_idx=stage_idx, prev_stage_value=prev_stage_value, num_population=num_population, num_generations=num_generations, elite_ratio=elite_ratio,
-                       parents_ratio=parents_ratio, ratio_decay=ratio_decay, num_finetune=num_finetune, best_sol_1st=best_sol_1st, init_pop=init_pop,uni_base=uni_base, use_factor=use_factor, use_pleteau=use_pleteau,L1_bias_template=L1_bias_template)
+            parents_ratio=0.4, ratio_decay=1, num_finetune=1, best_sol_1st=None, init_pop=None, bias=None,
+            uni_base=True, use_factor=False, use_pleteau=False, L1_bias_template=None):
+        self.init_arguement(dimension=dimension, stage_idx=stage_idx, prev_stage_value=prev_stage_value,
+                            num_population=num_population, num_generations=num_generations, elite_ratio=elite_ratio,
+                            parents_ratio=parents_ratio, ratio_decay=ratio_decay, num_finetune=num_finetune,
+                            best_sol_1st=best_sol_1st, init_pop=init_pop, uni_base=uni_base, use_factor=use_factor,
+                            use_pleteau=use_pleteau, L1_bias_template=L1_bias_template)
         pool = Pool(min(self.num_population + self.num_elite, cpu_count()))
-        population = self.reinit_pop(pool,self.num_population,  self.stage_idx, self.best_sol_1st, self.init_pop, bias=bias, precision=precision)
+        population = self.reinit_pop(pool, self.num_population, self.stage_idx, self.best_sol_1st, self.init_pop,
+                                     bias=bias, precision=self.precision)
         if self.map_cstr:
-            self.cstr_list, self.num_free_order, self.num_free_par = self.map_cstr.get_cstr_list(copy.deepcopy(population[0]), fixed_sp_sz=self.fixedCluster)
-        for g in range(num_generations):
+            self.cstr_list, self.num_free_order, self.num_free_par = self.map_cstr.get_cstr_list(
+                copy.deepcopy(population[0]), fixed_sp_sz=self.fixedCluster)
+
+        best_sol = None
+        no_change_counter = 0
+
+        if self.precision is None or self.precision == "FP32"or self.precision == "INT32":
+            num_gen = int(num_generations / 10)
+        else:
+            num_gen = int(num_generations)
+
+        for g in range(num_gen):
 
             while self.num_parents < 1:  # restart
-                population = self.reinit_pop(pool, self.num_population, self.stage_idx, self.best_sol_1st, self.init_pop, cur_gen=g, precision=precision)
+                population = self.reinit_pop(pool, self.num_population, self.stage_idx, self.best_sol_1st,
+                                             self.init_pop, cur_gen=g, precision=self.precision)
                 print("Reinitialize population")
 
-            population, self.fitness, self.parents = self.select_parents(population, self.fitness, self.num_parents, self.num_population,)
+            population, self.fitness, self.parents = self.select_parents(population, self.fitness, self.num_parents,
+                                                                         self.num_population, )
             elite = copy.deepcopy(self.parents[:self.num_elite])
             self.elite_fitness = copy.deepcopy(self.fitness[:(len(elite))])
 
@@ -762,15 +871,13 @@ def run(self, dimension, stage_idx=0, prev_stage_value=0, num_population=100, nu
             else:
                 self.swap_order(population, alpha=0.47)
                 self.mutate_tile(population, num_mu_loc=3, range_alpha=0.53, alpha=0.53, is_finetune=False)
-                self.mutate_pe(population, alpha=1 if g==0 else 0.5) if self.num_pe<1 else None
+                self.mutate_pe(population, alpha=1 if g == 0 else 0.5) if self.num_pe < 1 else None
                 self.mutate_par(population, alpha=0.1)
 
-
             if self.map_cstr is None:
                 self.born_cluster(population, alpha=0.57)
                 self.kill_cluster(population, alpha=0.27)
 
-
             # pop_inj, inj_fitness = self.injection()
             self.correctify_tile_dependency(population)
             # self.calculate_equivalent_num_pe(population)
@@ -779,17 +886,30 @@ def run(self, dimension, stage_idx=0, prev_stage_value=0, num_population=100, nu
             # population = elite + population + pop_inj
             self.fitness = np.concatenate((self.elite_fitness, self.fitness))
             # self.fitness = np.concatenate((self.elite_fitness, self.fitness, inj_fitness))
-            chkpt = self.evaluate(pool=pool, population=population, cur_gen=g, precision=precision)
+            chkpt = self.evaluate(pool=pool, population=population, cur_gen=g, precision=self.precision)
             # self.check_tile_dependency(population)
 
-            if self.log_level>1:
-                if chkpt["best_sol"] is not None and self.log_level>1:
-                    best_runtime, best_throughput, best_energy, best_area, best_l1_size, best_l2_size, best_mac, best_power, best_num_pe = self.get_indiv_info( chkpt["best_sol"], precision=precision)
+            if self.log_level > 1:
+                if chkpt["best_sol"] is not None and self.log_level > 1:
+                    (best_runtime, best_throughput, best_energy, best_area, best_l1_size, best_l2_size, best_mac,
+                     best_power, best_num_pe, best_l1_read, best_l1_write, best_l2_read, best_l2_write, best_avg_pe,
+                     best_avg_bw) = self.get_indiv_info(chkpt["best_sol"], precision=self.precision)
                     # best_num_pe = chkpt["best_sol"][0][1] if self.num_pe<1 else self.num_pe
                     # print(f"Runtime: {best_runtime}, L1: {best_l1_size}, L2: {best_l2_size}, L1_usage:{best_l1_size/self.l1_size:}, L2_usage:{best_l2_size/self.l2_size:.4f}, PE: {best_num_pe}")
-                    print(f"Gen {g+1}: Reward: {chkpt['best_reward'][0]:.3e}, Runtime: {best_runtime}, Area: {best_area/1e6:.3f}mm2,  PE Area_ratio: {best_num_pe*MAC_AREA_INT8/best_area*100:.1f}%, L1: {best_l1_size}, L2: {best_l2_size},  PE: {best_num_pe}")
+                    print(
+                        f"Gen {g + 1}: Reward: {chkpt['best_reward'][0]:.3e}, Runtime: {best_runtime}, Area: {best_area / 1e6:.3f}mm2,  PE Area_ratio: {best_num_pe * MAC_AREA_INT8 / best_area * 100:.1f}%, L1: {best_l1_size}, L2: {best_l2_size},  PE: {best_num_pe}")
                 else:
-                    print(f"Gen {g+1}: Reward: {chkpt['best_reward'][0]:.3e}")
+                    print(f"Gen {g + 1}: Reward: {chkpt['best_reward'][0]:.3e}")
+
+            if best_sol == chkpt["best_sol"]:
+                no_change_counter += 1
+            else:
+                best_sol = chkpt["best_sol"]
+                no_change_counter = 0
+
+            if no_change_counter > 100:
+                print(f"No improvement for 100 generations. Terminating at generation {g}.")
+                break
 
         population = self.sort_population(population)
         pool.close()
@@ -799,23 +919,23 @@ def calculate_equivalent_num_pe(self, population):
         for idx in range(len(population)):
             indv = population[idx]
             num_pe, sp_sz = indv[0][1], indv[7][1]
-            num_cluster = num_pe//sp_sz
-            sp_dim_L2_loc = [i for i, item in enumerate(indv) if item[0]==indv[0][0] and i%7!=0]
+            num_cluster = num_pe // sp_sz
+            sp_dim_L2_loc = [i for i, item in enumerate(indv) if item[0] == indv[0][0] and i % 7 != 0]
             sp_real_tile_sizeL2 = indv[sp_dim_L2_loc[0]][1]
             sp_real_tile_sizeL1 = indv[sp_dim_L2_loc[1]][1]
             if sp_real_tile_sizeL2 > num_cluster:
-                sp_dim_sp_sizeL2 = ceil(sp_real_tile_sizeL2/num_cluster)
+                sp_dim_sp_sizeL2 = ceil(sp_real_tile_sizeL2 / num_cluster)
                 using_num_cluster = num_cluster
             else:
                 using_num_cluster = sp_real_tile_sizeL2
                 sp_dim_sp_sizeL2 = 1
             if sp_dim_sp_sizeL2 < sp_real_tile_sizeL1:
                 sp_dim_sp_sizeL2 = sp_real_tile_sizeL1
-                using_num_cluster = ceil(sp_real_tile_sizeL2/sp_dim_sp_sizeL2)
+                using_num_cluster = ceil(sp_real_tile_sizeL2 / sp_dim_sp_sizeL2)
             indv[0][1] = using_num_cluster * sp_sz
             indv[sp_dim_L2_loc[0]][1] = sp_dim_sp_sizeL2
 
-            if indv[0][1]>self.pe_limit:
+            if indv[0][1] > self.pe_limit:
                 print("error1")
             if indv[sp_dim_L2_loc[0]][1] * using_num_cluster < sp_real_tile_sizeL2:
                 print("error2")
@@ -823,27 +943,27 @@ def calculate_equivalent_num_pe(self, population):
 
     def sort_population(self, population):
         population, self.fitness, self.parents = self.select_parents(population, self.fitness, self.num_parents,
-                                                                     self.num_population,)
+                                                                     self.num_population, )
         return population
 
-
     def thread_fun_correctify_tile_dependency(self, indv):
         return self.correctify_tile_dependency_thread(indv)
 
     def thread_fun(self, individual, precision=None):
-        reward, activity_count = self.oberserve_maestro(individual, precision=precision)
+        reward, activity_count = self.observe_maestro(individual, precision=precision)
         return [reward, activity_count]
 
     def get_indiv_info(self, individual, num_pe=None, l1_size=None, l2_size=None, NocBW=None, precision=None):
-        self.oberserve_maestro(individual,num_pe=num_pe, l1_size=l1_size, l2_size=l2_size, NocBW=NocBW, precision=precision)
+        self.observe_maestro(individual, num_pe=num_pe, l1_size=l1_size, l2_size=l2_size, NocBW=NocBW,
+                             precision=precision)
         return self.observation
 
     def get_CONVtypeShape(self, dimensions, CONVtype=1):
         CONVtype = CONVtype_dicts[CONVtype]
-        if CONVtype == "CONV"or CONVtype=="DSCONV":
+        if CONVtype == "CONV" or CONVtype == "DSCONV":
             pass
-        elif CONVtype == "GEMM" or CONVtype=="SGEMM":
-            SzM, SzN, SzK,*a = dimensions
+        elif CONVtype == "GEMM" or CONVtype == "SGEMM":
+            SzM, SzN, SzK, *a = dimensions
             dimensions = [SzN, SzK, SzM, 1, 1, 1]
         elif CONVtype == "FC":
             SzOut, SzIn, *a = dimensions
@@ -852,7 +972,7 @@ def get_CONVtypeShape(self, dimensions, CONVtype=1):
             print("Not supported layer.")
         return dimensions
 
-    def write_maestro(self, indv, layer_id=0, m_file = None, folder_path = None, precision=None):
+    def write_maestro(self, indv, layer_id=0, m_file=None, folder_path=None, precision=None):
         dimensions = [self.dimension]
         if layer_id != 0:
             m_file_with_layer = "{}_{}".format(m_file, layer_id)
@@ -885,7 +1005,7 @@ def write_maestro(self, indv, layer_id=0, m_file = None, folder_path = None, pre
                                 fo.write("Cluster({},P);\n".format(d_sz))
                         else:
                             sp = "SpatialMap" if d == indv[k][0] or (
-                                        len(indv[k]) > 2 and d == indv[k][2]) else "TemporalMap"
+                                    len(indv[k]) > 2 and d == indv[k][2]) else "TemporalMap"
                             # MAESTRO cannot take K dimension as dataflow file
                             if not (m_type == "DSCONV"):
                                 fo.write("{}({},{}) {};\n".format(sp, d_sz, d_sz, self.get_out_repr(d)))
@@ -900,13 +1020,14 @@ def write_maestro(self, indv, layer_id=0, m_file = None, folder_path = None, pre
                 fo.write("}\n")
             fo.write("}")
 
-    def oberserve_maestro(self, indv, num_pe=None, l1_size=None, l2_size=None, NocBW=None, offchipBW=None, precision=None):
+    def observe_maestro(self, indv, num_pe=None, l1_size=None, l2_size=None, NocBW=None, offchipBW=None,
+                        precision=None):
 
-        m_file = "{}".format(random.randint(0, 2**32))
+        m_file = "{}".format(random.randint(0, 2 ** 32))
         self.write_maestro(indv, m_file=m_file, precision=precision)
         if num_pe:
             to_use_num_pe = self.num_pe_to_use(num_pe, precision)
-        elif self.num_pe <1:
+        elif self.num_pe < 1:
             to_use_num_pe = self.num_pe_to_use(indv[0][1], precision)
         else:
             to_use_num_pe = self.num_pe_to_use(self.num_pe, precision)
@@ -919,8 +1040,10 @@ def oberserve_maestro(self, indv, num_pe=None, l1_size=None, l2_size=None, NocBW
                    "--offchip_bw_cstr={}".format(self.offchipBW if not offchipBW else offchipBW),
                    "--noc_mc_support=true", "--num_pes={}".format(int(to_use_num_pe)),
                    "--num_simd_lanes=1", "--l1_size_cstr={}".format(self.l1_size if not l1_size else l1_size),
-                   "--l2_size_cstr={}".format(self.l2_size if not l2_size else l2_size), "--print_res=false", "--print_res_csv_file=true", "--print_log_file=false", "--print_design_space=false", "--msg_print_lv=0"]
-# "--num_simd_lanes=1", "--l1_size_cstr={}".format(int(self.l1_to_use(self.l1_size, precision)) if not l1_size else
+                   "--l2_size_cstr={}".format(self.l2_size if not l2_size else l2_size), "--print_res=false",
+                   "--print_res_csv_file=true", "--print_log_file=false", "--print_design_space=false",
+                   "--msg_print_lv=0"]
+        # "--num_simd_lanes=1", "--l1_size_cstr={}".format(int(self.l1_to_use(self.l1_size, precision)) if not l1_size else
         # int(self.l1_to_use(l1_size, precision))),
 
         process = Popen(command, stdout=PIPE, stderr=PIPE)
@@ -952,7 +1075,13 @@ def oberserve_maestro(self, indv, num_pe=None, l1_size=None, l2_size=None, NocBW
             l2_weight_write = np.array(df[" filter l2 write"]).reshape(-1, 1)
             l2_output_read = np.array(df[" output l2 read"]).reshape(-1, 1)
             l2_output_write = np.array(df[" output l2 write"]).reshape(-1, 1)
+            l1_read = l1_output_read + l1_weight_read + l1_input_read
+            l1_write = l1_output_write + l1_input_write + l1_weight_write
+            l2_read = l2_output_read + l2_weight_read + l2_input_read
+            l2_write = l2_output_write + l2_input_write + l2_weight_write
             mac = np.array(df[" Num MACs"]).reshape(-1, 1)
+            avg_pe_utilized = np.array(df["Avg number of utilized PEs"]).reshape(-1, 1)
+            avg_bw = np.array(df[" Avg BW Req"]).reshape(-1, 1)
             activity_count = {}
             activity_count["l1_input_read"] = l1_input_read
             activity_count["l1_input_write"] = l1_input_write
@@ -967,7 +1096,7 @@ def oberserve_maestro(self, indv, num_pe=None, l1_size=None, l2_size=None, NocBW
             activity_count["l2_output_read"] = l2_output_read
             activity_count["l2_output_write"] = l2_output_write
             activity_count["mac_activity"] = mac
-            os.remove("./{}.csv".format(m_file))  if os.path.exists("./{}.csv".format(m_file)) else None
+            os.remove("./{}.csv".format(m_file)) if os.path.exists("./{}.csv".format(m_file)) else None
             os.remove("./log.txt") if os.path.exists("./log.txt") else None
             if self.external_area_model:
                 area = self.compute_area_external(to_use_num_pe, l1_size, l2_size)
@@ -975,18 +1104,22 @@ def oberserve_maestro(self, indv, num_pe=None, l1_size=None, l2_size=None, NocBW
                 area = self.compute_area_maestro(to_use_num_pe, l1_size, l2_size)
 
             self.observation = [np.mean(x) for x in [runtime, throughput, energy, area, l1_size, l2_size, mac, power,
-                                                     self.restore_num_pe(to_use_num_pe, precision)]]
+                                                     self.restore_num_pe(to_use_num_pe, precision), l1_read, l1_write,
+                                                     l2_read,l2_write, avg_pe_utilized, avg_bw]]
+
             def catch_exception():
-                if l1_size>self.l1_size or l2_size>self.l2_size or any(runtime_series<1) or any(l1_size_series<1) or any(l2_size_series<1):
+                if l1_size > self.l1_size or l2_size > self.l2_size or any(runtime_series < 1) or any(
+                        l1_size_series < 1) or any(l2_size_series < 1):
                     return True
                 else:
                     return False
+
             stdout_as_str = stdout.decode("utf-8")
             stdout_as_str = "".join(stdout_as_str.split())
             # if (len(str(stdout))>3 and stdout_as_str[:len("Numpartialsumsislessthan0!")]!="Numpartialsumsislessthan0!") or catch_exception() or not self.validTo_external_mem_cstr(indv, num_pe=to_use_num_pe):
             # if len(str(stdout))>3  or catch_exception() or not self.validTo_external_mem_cstr(indv, num_pe=to_use_num_pe):
-            if  catch_exception() or not self.validTo_external_mem_cstr(indv, num_pe=to_use_num_pe):
-            # if  catch_exception():
+            if catch_exception() or not self.validTo_external_mem_cstr(indv, num_pe=to_use_num_pe):
+                # if  catch_exception():
                 return None, None
             return self.judge(), activity_count
         except:
@@ -1000,12 +1133,20 @@ def num_pe_to_use(self, num_pe, precision):
             return num_pe / 2
         if precision == "FP8":
             return num_pe / 4
+        if precision == "FP4":
+            return num_pe / 8
+        if precision == "FP2":
+            return num_pe / 16
         if precision == "INT32":
             return num_pe
         if precision == "INT16":
             return num_pe / 2
         if precision == "INT8":
             return num_pe / 4
+        if precision == "INT4":
+            return num_pe / 8
+        if precision == "INT2":
+            return num_pe / 16
 
     def restore_num_pe(self, num_pe, precision):
 
@@ -1015,12 +1156,20 @@ def restore_num_pe(self, num_pe, precision):
             return num_pe * 2
         if precision == "FP8":
             return num_pe * 4
+        if precision == "FP4":
+            return num_pe * 8
+        if precision == "FP2":
+            return num_pe * 16
         if precision == "INT32":
             return num_pe
         if precision == "INT16":
             return num_pe * 2
         if precision == "INT8":
             return num_pe * 4
+        if precision == "INT4":
+            return num_pe * 8
+        if precision == "INT2":
+            return num_pe * 16
 
     def l1_to_use(self, l1_size, precision):
 
@@ -1036,7 +1185,8 @@ def l1_to_use(self, l1_size, precision):
             return l1_size * 2
         if precision == "INT8":
             return l1_size * 4
-    def impose_halloffame(self, observe_value, target="latency_ave" ):
+
+    def impose_halloffame(self, observe_value, target="latency_ave"):
         is_violated = False
         if self.stat is not None:
             target_value = self.stat[target]
@@ -1045,24 +1195,26 @@ def impose_halloffame(self, observe_value, target="latency_ave" ):
         return is_violated
 
     def compute_area_maestro(self, num_pe, l1_size, l2_size):
-        MAC_AREA_MAESTRO=4470
+        MAC_AREA_MAESTRO = 4470
         L2BUF_AREA_MAESTRO = 4161.536
         L1BUF_AREA_MAESTRO = 4505.1889
         L2BUF_UNIT = 32768
         L1BUF_UNIT = 64
-        area = num_pe * MAC_AREA_MAESTRO + ceil(int(l2_size)/L2BUF_UNIT)*L2BUF_AREA_MAESTRO + ceil(int(l1_size)/L1BUF_UNIT)*L1BUF_AREA_MAESTRO * num_pe
+        area = num_pe * MAC_AREA_MAESTRO + ceil(int(l2_size) / L2BUF_UNIT) * L2BUF_AREA_MAESTRO + ceil(
+            int(l1_size) / L1BUF_UNIT) * L1BUF_AREA_MAESTRO * num_pe
         return area
 
     def compute_area_external(self, num_pe, l1_size, l2_size):
-        MAC_AREA_INT8=282
-        MAC_AREA_INT32=3495
+        MAC_AREA_INT8 = 282
+        MAC_AREA_INT32 = 3495
         BUF_AREA_perbit = 0.086
         buf_size = l1_size * num_pe + l2_size
         area = num_pe * MAC_AREA_INT8 + buf_size * BUF_AREA_perbit * 8
         return area
 
     def judge(self):
-        runtime, throughput, energy, area, l1_size, l2_size, mac, power, num_pe = self.observation
+        (runtime, throughput, energy, area, l1_size, l2_size, mac, power, num_pe, _, _, _, _,
+         _, _) = self.observation
 
         def get_objective(objective):
             values = []
@@ -1095,11 +1247,11 @@ def get_objective(objective):
                     reward = -l2_size
                 elif term == "power":
                     reward = -power
-                elif term =="ranking":
+                elif term == "ranking":
                     reward = -1
-                elif term =="L-PE-L2":
+                elif term == "L-PE-L2":
                     reward = -runtime * num_pe * l2_size
-                elif term =="L-PE":
+                elif term == "L-PE":
                     reward = -runtime * num_pe
                 elif term == "PE":
                     reward = -num_pe
@@ -1110,22 +1262,27 @@ def get_objective(objective):
                         return [float("-Inf")] * len(self.fitness_objective)
                 values.append(reward)
             return values
+
         values = get_objective(self.fitness_objective)
         return values
 
-    def print_indv(self, indv,fd=False):
+    def print_indv(self, indv, fd=False):
         for k in range(0, len(indv), 7):
             if fd:
-                fd.write("\n{}".format(indv[k:k+7]))
+                fd.write("\n{}".format(indv[k:k + 7]))
             else:
-                print(indv[k:k+7])
+                print(indv[k:k + 7])
 
     def init_arguement(self, dimension=None, stage_idx=0, prev_stage_value=0, num_population=100, num_generations=100,
                        elite_ratio=0.05,
-                       parents_ratio=0.15, ratio_decay=1, num_finetune=1, best_sol_1st=None, init_pop=None, uni_base=False, use_factor=False, use_pleteau=False,L1_bias_template=None):
+                       parents_ratio=0.15, ratio_decay=1, num_finetune=1, best_sol_1st=None, init_pop=None,
+                       uni_base=False, use_factor=False, use_pleteau=False, L1_bias_template=None):
         self.stage_idx = stage_idx
         self.num_generations = num_generations
-        self.num_population = num_population
+        if self.precision is None or self.precision == "FP32" or self.precision == "INT32":
+            self.num_population = num_population
+        else:
+            self.num_population = num_population*2
         self.prev_stage_value = prev_stage_value
         self.ratio_decay = ratio_decay
         self.best_sol_1st = best_sol_1st
@@ -1137,13 +1294,12 @@ def init_arguement(self, dimension=None, stage_idx=0, prev_stage_value=0, num_po
         self.best_activity = None
         self.best_sol = None
         self.stat_list = []
-        self.uni_base =uni_base
+        self.uni_base = uni_base
         self.stat = None
         self.pleteau_sol = dict()
         self.use_factor = use_factor
         self.use_pleteau = use_pleteau
         self.best_reward_pleteau = None
         self.best_sol_pleteau = None
-        self.normalize=True   if self.fitness_objective[0][:1] == "n" else False
-        self.L1_bias_template =L1_bias_template
-
+        self.normalize = True if self.fitness_objective[0][:1] == "n" else False
+        self.L1_bias_template = L1_bias_template
diff --git a/src/GAMMA/main.py b/src/GAMMA/main.py
index 2d180cb..c023067 100644
--- a/src/GAMMA/main.py
+++ b/src/GAMMA/main.py
@@ -3,8 +3,8 @@
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('--fitness1', type=str, default="latency", choices=('latency', 'energy', 'power', 'EDP', 'area'), help='First objective')
-    parser.add_argument('--fitness2', type=str, default="energy", choices=('latency', 'energy', 'power', 'EDP', 'area'), help='Second objective')
-    parser.add_argument('--num_pop', type=int, default=20,help='Number of populations')
+    parser.add_argument('--fitness2', type=str, default="energy",choices=('latency', 'energy', 'power', 'EDP', 'area'), help='Second objective')
+    parser.add_argument('--num_pop', type=int, default=20 ,help='Number of populations')
     parser.add_argument('--parRS', default=False, action='store_true', help='Parallize across R S dimension')
     parser.add_argument('--epochs', type=int, default=2, help='Number of epochs (i.e., Numbers of generations)')
     parser.add_argument('--outdir', type=str, default="outdir", help='Output directiory')
diff --git a/src/GAMMA/train.py b/src/GAMMA/train.py
index bf901e2..f5ff699 100644
--- a/src/GAMMA/train.py
+++ b/src/GAMMA/train.py
@@ -4,6 +4,7 @@
 
 import glob
 import os, sys
+import cost_database as cdb
 
 script_dir = os.path.dirname(__file__)
 module_path = os.path.abspath(os.path.join(script_dir, '../'))
@@ -84,18 +85,22 @@ def train_model(model_defs, input_arg, chkpt_file='./chkpt', precisions=None):
                           l1_size=opt.l1_size,
                           l2_size=opt.l2_size, pe_limit=opt.pe_limit,
                           area_pebuf_only=False, external_area_model=True, map_cstr=map_cstr,
-                          slevel_max=get_value_for_precision(precision), slevel_min=get_value_for_precision(precision))
+                          slevel_max=get_value_for_precision(precision), slevel_min=get_value_for_precision(precision),
+                          precision=precision)
         #tolto calcolo su l1 value da verificare se rimettere
         chkpt, pops = env.run(dimension, stage_idx=0, num_population=opt.num_pop, prev_stage_value=None,
                               num_generations=opt.epochs,
                               best_sol_1st=None, init_pop=None, bias=None, uni_base=True, use_factor=opt.use_factor,
-                              use_pleteau=False, precision=precision)
+                              use_pleteau=False)
         best_sol = chkpt["best_sol"]
-        best_runtime, best_throughput, best_energy, best_area, best_l1_size, best_l2_size, best_mac, best_power, best_num_pe = env.get_indiv_info(
-            best_sol, num_pe=None, precision=precision)
+        (best_runtime, best_throughput, best_energy, best_area, best_l1_size, best_l2_size, best_mac, best_power,
+         best_num_pe, best_l1_read, best_l1_write, best_l2_read, best_l2_write, best_avg_pe, best_avg_bw) = \
+            (env.get_indiv_info(best_sol, num_pe=None, precision=precision))
         print("Mapping:", chkpt["best_sol"])
         print(
-            f"Reward: {chkpt['best_reward'][0]:.3e}, Runtime: {best_runtime:.0f}(cycles), Area: {best_area / 1e6:.3f}(mm2), PE Area_ratio: {best_num_pe * MAC_AREA_INT8 / best_area * 100:.1f}%, Num_PE: {best_num_pe:.0f}, L1 Buffer: {best_l1_size:.0f}(elements), L2 Buffer: {best_l2_size:.0f}(elements)")
+            f"{num_layer}. Reward: {chkpt['best_reward'][0]:.3e}, Runtime: {best_runtime:.0f}(cycles), "
+            f"Area: {best_area / 1e6:.3f}(mm2), Num_PE: {best_num_pe:.0f}, L1 Buffer: {best_l1_size:.0f}(elements),"
+            f" L2 Buffer: {best_l2_size:.0f}(elements)")
         chkpt = {
             "reward": chkpt['best_reward'][0],
             "Best_solution": best_sol,
@@ -103,13 +108,19 @@ def train_model(model_defs, input_arg, chkpt_file='./chkpt', precisions=None):
             "Throughput (MACs/Cycle)": best_throughput,
             "Activity count-based Energy (nJ)": best_energy,
             "Area": best_area,
-            "PE_Area_Ratio": best_num_pe * MAC_AREA_INT8 / best_area,
             "PE": best_num_pe,
             "PE_area": best_num_pe * MAC_AREA_INT8,
             "L1_area": best_l1_size * best_num_pe * BUF_AREA_perbit * 8,
             "L2_area": best_l2_size * BUF_AREA_perbit * 8,
             "L1_size": best_l1_size,
-            "L2_size": best_l2_size
+            "L2_size": best_l2_size,
+            "L1_read": best_l1_read,
+            "L1_write": best_l1_write,
+            "L2_read": best_l2_read,
+            "L2_write": best_l2_write,
+            "#MACs": best_mac,
+            "Avg #PE utilized": best_avg_pe,
+            "Avg BW": best_avg_bw
         }
         chkpt_list.append(chkpt)
         if opt.num_layer != 0:
@@ -121,8 +132,9 @@ def train_model(model_defs, input_arg, chkpt_file='./chkpt', precisions=None):
 
         num_layer += 1
 
-    columns = ["Runtime", "Throughput (MACs/Cycle)", "Activity count-based Energy (nJ)", "Area", "PE_Area_Ratio", "PE", "L1_size", "L2_size", "PE_area", "L1_area", "L2_area",
-               "Best_solution"]
+    columns = ["Runtime", "Throughput (MACs/Cycle)", "Activity count-based Energy (nJ)", "PE",
+               "L1_size", "L2_size", "L1_read", "L1_write", "L2_read", "L2_write",
+               "#MACs", "Avg #PE utilized", "Avg BW", "Best_solution"]
     np_array = None
     for chkpt in chkpt_list:
         if np_array is None:
@@ -131,6 +143,42 @@ def train_model(model_defs, input_arg, chkpt_file='./chkpt', precisions=None):
             np_array = np.vstack(
                 [np_array, np.array([chkpt[t] for t in columns[:-1]] + [f'{chkpt["Best_solution"]}']).reshape(1, -1)])
     df = pd.DataFrame(np_array, columns=columns)
+
+    if precisions is not None:
+        df['L1_size(bytes)'] = df.apply(
+            lambda row: convert_to_bytes(row['L1_size'], precisions[df.index.get_loc(row.name)]),
+            axis=1).astype(int)
+        # Add 'sram_size' to the DataFrame
+        df['L1_normalized_size'] = df['L1_size(bytes)'].apply(find_sram_size)
+        df['L1_read_energy'] = df.apply(lambda row: float(row['L1_read']) * cdb.get_sram_data(row['L1_normalized_size'], 'Read'),
+                                        axis=1)
+        df['L1_write_energy'] = df.apply(
+            lambda row: float(row['L1_write']) * cdb.get_sram_data(row['L1_normalized_size'], 'Write'), axis=1)
+        df['L2_size(bytes)'] = df.apply(
+            lambda row: convert_to_bytes(row['L2_size'], precisions[df.index.get_loc(row.name)]), axis=1).astype(int)
+
+        df['L2_normalized_size'] = df['L2_size(bytes)'].apply(find_sram_size)
+        df['L2_read_energy'] = df.apply(lambda row: float(row['L2_read']) * cdb.get_sram_data(row['L2_normalized_size'], 'Read'),
+                                        axis=1)
+        df['L2_write_energy'] = df.apply(
+            lambda row: float(row['L2_write']) * cdb.get_sram_data(row['L2_normalized_size'], 'Write'), axis=1)
+        df['MAC_energy'] = df.apply(lambda row: cdb.get_energy(operation='MAC', precision=precisions[df.index.get_loc(row.name)]) * float(row['#MACs']), axis=1)
+        df['NoC_energy'] = df.apply(lambda row: cdb.calculate_noc_dyn_energy(precision=precisions[row.name], bw=row['Avg BW']), axis=1)
+        df['L1 energy'] = df['L1_read_energy'] + df['L1_write_energy']
+        df['L2 energy'] = df['L2_read_energy'] + df['L2_write_energy']
+        df['Activity count-based Energy (nJ)'] = df['Activity count-based Energy (nJ)'].astype(float).astype(int)
+        df['Runtime'] = df['Runtime'].astype(float).astype(int)
+        df['EDP'] = df['Activity count-based Energy (nJ)'] * df['Runtime']
+        df['#MACs'] = df['#MACs'].astype(float).astype(int)
+        df['Runtime'] = df['Runtime'].astype(float).astype(int)
+        df['Activity count-based Energy (nJ)'] = df['Activity count-based Energy (nJ)'].astype('int')
+        df['PE'] = df['PE'].astype(float).astype(int)
+        df['L1_size'] = df['L1_size'].astype(float).astype(int)
+        df['L2_size'] = df['L2_size'].astype(float).astype(int)
+        df['L1_read'] = df['L1_read'].astype(float).astype(int)
+        df['L1_write'] = df['L1_write'].astype(float).astype(int)
+        df['L2_read'] = df['L2_read'].astype(float).astype(int)
+        df['L2_write'] = df['L2_write'].astype(float).astype(int)
     df.to_csv(chkpt_file[:-4] + ".csv", index_label="Layer")
 
     with open(chkpt_file, "wb") as fd:
@@ -146,7 +194,7 @@ def get_cstr_name(mapping_cstr):
 
 
 def get_value_for_precision(precision):
-    if precision is None or precision == "FP32":
+    if precision is None or precision == "FP32" or precision == "INT32":
         return 2
     else:
         return 3
@@ -159,12 +207,20 @@ def get_value_for_pe(precision, num_pe):
         return int(num_pe * 2)
     if precision == "FP8":
         return int(num_pe * 4)
+    if precision == "FP4":
+        return int(num_pe * 8)
+    if precision == "FP2":
+        return int(num_pe * 16)
     if precision == "INT32":
         return int(num_pe)
     if precision == "INT16":
         return int(num_pe * 2)
     if precision == "INT8":
         return int(num_pe * 4)
+    if precision == "INT4":
+        return int(num_pe * 8)
+    if precision == "INT2":
+        return int(num_pe * 16)
 
 
 def get_value_for_l1(precision, l1_size):
@@ -203,3 +259,21 @@ def map_constraints(map_cstr, opt, precision):
         put_into_actual_cstr(costmodel_cstr, map_cstr)
 
     return map_cstr
+
+
+def convert_to_bytes(size, precision):
+    if precision in cdb.precision_to_bits:
+        bits = cdb.precision_to_bits[precision]
+        return (int(float(size)) * bits) / 8
+    else:
+        raise ValueError(f"Unknown precision: {precision}")
+
+
+# Function to find the appropriate SRAM size
+def find_sram_size(required_bytes):
+    for size in sorted(cdb.sram_data.keys()):
+        if size >= required_bytes:
+            return size
+    raise ValueError(f"No suitable SRAM size found for {required_bytes} bytes")
+
+
diff --git a/src/Other_Blackbox_optimization/main.py b/src/Other_Blackbox_optimization/main.py
index 128e738..581992d 100644
--- a/src/Other_Blackbox_optimization/main.py
+++ b/src/Other_Blackbox_optimization/main.py
@@ -29,9 +29,9 @@
 
 def get_reward(env, indv):
     if opt.fixedCluster>0:
-        reward = env.oberserve_maestro(indv, sp2_sz=opt.fixedCluster)
+        reward = env.observe_maestro(indv, sp2_sz=opt.fixedCluster)
     else:
-        reward = env.oberserve_maestro(indv)
+        reward = env.observe_maestro(indv)
     if reward is None:
         reward = [-2**63]
     reward = reward[0]
diff --git a/src/utils/constraint.py b/src/utils/constraint.py
index 5aea439..0c86989 100644
--- a/src/utils/constraint.py
+++ b/src/utils/constraint.py
@@ -1,20 +1,24 @@
 import random
 import numpy as np
 from collections import OrderedDict
-dimension_set = {"K","C","R","S","Y","X"}
+
+dimension_set = {"K", "C", "R", "S", "Y", "X"}
+
+
 class Constraint():
-    def __init__(self,num_pe=256):
+    def __init__(self, num_pe=256):
         self.lastcluster_dict = {}
         self.cstr_list = [None, None, None]
         self.num_free_order = 21
         self.num_pe = num_pe
+
     def set_constraint(self, level, cstr):
         if level == "L3":
             L = 2
         if level == "L2":
             L = 1
         if level == "L1":
-            L  = 0
+            L = 0
         if self.cstr_list[L] is None:
             self.cstr_list[L] = OrderedDict(cstr)
         else:
@@ -24,22 +28,24 @@ def set_constraint(self, level, cstr):
                 self.cstr_list[L][key] = value
 
     def impose_constraint(self, indv, fixed_sp_sz=0):
-        num_levels = len(indv)//7
+        num_levels = len(indv) // 7
         index_offset = 0
         self.lastcluster_dict = {}
-        for nl in range(num_levels-1,-1,-1):
+        for nl in range(num_levels - 1, -1, -1):
             self.set_valid_value_v2(self.cstr_list[nl], index_offset, indv, fixed_sp_sz=fixed_sp_sz)
             index_offset += 7
 
     def create_from_constraint(self, indv, fixed_sp_sz=0, dimension_dict=None):
-        num_levels = len(indv)//7
+        num_levels = len(indv) // 7
         index_offset = 0
         self.dimension_dict = dimension_dict
         self.lastcluster_dict = {}
         ret_num_free_order = 0
         ret_num_free_par = 0
-        for nl in range(num_levels-1,-1,-1):
-            num_free_order, num_free_par, free_orders,num_free_tile = self.set_valid_value_v2(self.cstr_list[nl], index_offset, indv, fixed_sp_sz=fixed_sp_sz)
+        for nl in range(num_levels - 1, -1, -1):
+            num_free_order, num_free_par, free_orders, num_free_tile = self.set_valid_value_v2(self.cstr_list[nl],
+                                                                                               index_offset, indv,
+                                                                                               fixed_sp_sz=fixed_sp_sz)
             index_offset += 7
             ret_num_free_order += num_free_order
             ret_num_free_par += num_free_par
@@ -58,10 +64,10 @@ def get_cstr_list(self, indv, fixed_sp_sz=0):
         ret_num_free_order, ret_num_free_par = self.create_from_constraint(indv, fixed_sp_sz, self.dimension_dict)
         return self.reverse_cstr_list(self.cstr_list), ret_num_free_order, ret_num_free_par
 
-    def set_valid_value(self, lever_cstr, index_offset, indv,fixed_sp_sz=0):
+    def set_valid_value(self, lever_cstr, index_offset, indv, fixed_sp_sz=0):
         for key, value in lever_cstr.items():
             if key == "sp":
-                if  indv[index_offset][0] not in value:
+                if indv[index_offset][0] not in value:
                     sp = np.random.choice(value, 1)[0]
                     indv[index_offset][0] = sp
                     if len(self.lastcluster_dict) > 0:
@@ -77,18 +83,14 @@ def set_valid_value(self, lever_cstr, index_offset, indv,fixed_sp_sz=0):
                     valid_value = random.randint(1, indv[i][1])
                 self.lastcluster_dict[indv[i][0]] = indv[i][1]
             if key in dimension_set:
-                if valid_value >  indv[change_idx][1]:
+                if valid_value > indv[change_idx][1]:
                     indv[change_idx][1] = valid_value
 
-
-
-
-
     def set_valid_value_v2(self, lever_cstr, index_offset, indv, fixed_sp_sz=0):
         num_free_order = 6
-        num_free_par  = 1
+        num_free_par = 1
         num_free_tile = 6
-        free_orders = {"X", "Y", "K","C","R","S"}
+        free_orders = {"X", "Y", "K", "C", "R", "S"}
         for key, value in lever_cstr.items():
             if key == "sp":
                 sp = np.random.choice(value, 1)[0]
@@ -98,12 +100,14 @@ def set_valid_value_v2(self, lever_cstr, index_offset, indv, fixed_sp_sz=0):
                         sp_sz = fixed_sp_sz
                     else:
                         if sp != indv[index_offset][0]:
-                            sp_sz = random.randint(1,min(self.num_pe if self.num_pe>0 else float('Inf'), self.lastcluster_dict[sp])) if len(self.lastcluster_dict) >0 else self.dimension_dict[sp]
+                            sp_sz = random.randint(1, min(self.num_pe if self.num_pe > 0 else float('Inf'),
+                                                          self.lastcluster_dict[sp])) if len(
+                                self.lastcluster_dict) > 0 else self.dimension_dict[sp]
                 indv[index_offset] = [sp, sp_sz]
                 num_free_par = 0
-            elif key== "sp2":
+            elif key == "sp2":
                 sp2 = np.random.choice(value, 1)[0]
-                if len(indv[index_offset])>2:
+                if len(indv[index_offset]) > 2:
                     indv[index_offset][2] = sp2
                 else:
                     indv[index_offset].append(sp2)
@@ -111,7 +115,9 @@ def set_valid_value_v2(self, lever_cstr, index_offset, indv, fixed_sp_sz=0):
                 if type(value) is int:
                     indv[index_offset][1] = value
                 else:
-                    indv[index_offset][1] =  random.randint(1,min(self.num_pe if self.num_pe>0 else float('Inf'), self.lastcluster_dict[value])) if len(self.lastcluster_dict) >0 else self.dimension_dict[value]
+                    indv[index_offset][1] = random.randint(1, min(self.num_pe if self.num_pe > 0 else float('Inf'),
+                                                                  self.lastcluster_dict[value])) if len(
+                        self.lastcluster_dict) > 0 else self.dimension_dict[value]
             elif key == "order":
                 free_orders -= set(value)
                 tile_dict = OrderedDict()
@@ -121,9 +127,9 @@ def set_valid_value_v2(self, lever_cstr, index_offset, indv, fixed_sp_sz=0):
                 num_free_order -= num_fixed_order
                 for value_idx, i in enumerate(range(index_offset + 1 + num_free_order, index_offset + 7)):
                     indv[i][0] = value[value_idx]
-                    indv[i][1] =  tile_dict[indv[i][0]]
+                    indv[i][1] = tile_dict[indv[i][0]]
                     tile_dict.pop(indv[i][0], None)
-                for i in  range(index_offset + 1, index_offset + 1 + num_free_order):
+                for i in range(index_offset + 1, index_offset + 1 + num_free_order):
                     indv[i][0], indv[i][1] = tile_dict.popitem(last=False)
             elif key in dimension_set:
                 num_free_tile -= 1
@@ -135,16 +141,15 @@ def set_valid_value_v2(self, lever_cstr, index_offset, indv, fixed_sp_sz=0):
                         else:
                             if type(value) is list:
                                 left, right = value
-                                right_num = self.lastcluster_dict[right] if len(self.lastcluster_dict) >0 else self.dimension_dict[right]
+                                right_num = self.lastcluster_dict[right] if len(self.lastcluster_dict) > 0 else \
+                                self.dimension_dict[right]
                                 # indv[change_idx][1] =  random.randint(left, right_num)
-                                indv[change_idx][1] =  right_num
+                                indv[change_idx][1] = right_num
                             else:
-                                indv[change_idx][1] = self.lastcluster_dict[value] if len(self.lastcluster_dict) >0 else self.dimension_dict[value]
+                                indv[change_idx][1] = self.lastcluster_dict[value] if len(
+                                    self.lastcluster_dict) > 0 else self.dimension_dict[value]
                         break
 
-
-
         for i in range(index_offset + 1, index_offset + 7):
             self.lastcluster_dict[indv[i][0]] = indv[i][1]
-        return num_free_order, num_free_par, free_orders,num_free_tile
-
+        return num_free_order, num_free_par, free_orders, num_free_tile