diff --git a/.gitignore b/.gitignore index 72fb15694..8109e5dfb 100644 --- a/.gitignore +++ b/.gitignore @@ -25,4 +25,5 @@ test_benchmark* result_parser.py test* configs_test -benchmark_results \ No newline at end of file +benchmark_results +results/ \ No newline at end of file diff --git a/README.md b/README.md index 77e6db780..dec96fc98 100644 --- a/README.md +++ b/README.md @@ -77,10 +77,10 @@ SCALE-Sim uses two input files to run, a configuration file and a topology file. The configuration file is used to specify the architecture and run parameters for the simulations. The following shows a sample config file: -![sample config](https://github.com/scalesim-project/scale-sim-v2/blob/main/documentation/resources/config-file-example.png "sample config") +![sample config](./documentation/resources/config-file-example-new.png "sample config") The config file has three sections. The "*general*" section specifies the run name, which is user specific. The "*architecture_presets*" section describes the parameter of the systolic array hardware to simulate. -The "*run_preset*" section specifies if the simulator should run with user specified bandwidth, or should it calculate the optimal bandwidth for stall free execution. +The "*run_preset*" section specifies if the simulator should run with user specified bandwidth, or should it calculate the optimal bandwidth for stall free execution. It also allows specifying a `TimeLinearModel` parameter (e.g., `TPUv4`, `TPUv5e`, or `TPUv6e`) to convert compute cycles into time estimations using hardware-specific linear models. The detailed documentation for the config file could be found **here (TBD)** @@ -104,11 +104,12 @@ Here is an example output dumped to stdout when running Yolo Tiny (whose configu ![screen_out](https://github.com/scalesim-project/scale-sim-v2/blob/doc/anand/readme/documentation/resources/output.png "std_out") Also, the simulator generates read write traces and summary logs at ```/../scalesim_outputs/```. The user can also provide a custom location using ```-p ``` when using `scalesim.py` file. -There are three summary logs: +There are four summary logs: * COMPUTE_REPORT.csv: Layer wise logs for compute cycles, stalls, utilization percentages etc. * BANDWIDTH_REPORT.csv: Layer wise information about average and maximum bandwidths for each operand when accessing SRAM and DRAM * DETAILED_ACCESS_REPORT.csv: Layer wise information about number of accesses and access cycles for each operand for SRAM and DRAM. +* TIME_REPORT.csv: Layer wise time estimations in microseconds, calculated using the configured linear model (TPUv4, TPUv5e, or TPUv6e) based on compute cycles and spatiotemporal dimensions. In addition cycle accurate SRAM/DRAM access logs are also dumped and could be accesses at ```//``` eg `/../scalesim_outputs/` diff --git a/configs/tpuv4.cfg b/configs/tpuv4.cfg new file mode 100644 index 000000000..9042b2d6a --- /dev/null +++ b/configs/tpuv4.cfg @@ -0,0 +1,18 @@ +[general] +run_name = scale_example_TPUv4 + +[architecture_presets] +ArrayHeight: 128 +ArrayWidth: 128 +IfmapSramSzkB: 4096 +FilterSramSzkB: 4096 +OfmapSramSzkB: 8192 +IfmapOffset: 0 +FilterOffset: 10000000 +OfmapOffset: 20000000 +Bandwidth : 1024 +Dataflow : ws + +[run_presets] +InterfaceBandwidth: USER +TimeLinearModel: TPUv4 \ No newline at end of file diff --git a/documentation/resources/config-file-example-new.png b/documentation/resources/config-file-example-new.png new file mode 100644 index 000000000..acbaa4015 Binary files /dev/null and b/documentation/resources/config-file-example-new.png differ diff --git a/scalesim/linear_model/__init__.py b/scalesim/linear_model/__init__.py new file mode 100644 index 000000000..26f2d4659 --- /dev/null +++ b/scalesim/linear_model/__init__.py @@ -0,0 +1,6 @@ +""" +Linear model package for converting cycles to time. +""" + + + diff --git a/scalesim/linear_model/tpu.py b/scalesim/linear_model/tpu.py new file mode 100644 index 000000000..d1af6a608 --- /dev/null +++ b/scalesim/linear_model/tpu.py @@ -0,0 +1,60 @@ +def tpuv4_linear_model(cycles, s_row=1, s_col=1, t_time=1): + """ + TPUv4 linear model for converting cycles to time in microseconds. + + Args: + cycles: Total compute cycles + s_row: Spatial dimension rows + s_col: Spatial dimension columns + t_time: Temporal dimension + + Returns: + Time in microseconds + """ + if s_row <=128 and s_col <=128 and t_time <=128: + return 0.002762 * cycles - 0.062665 + elif s_row <=1024 and s_col <=1024 and t_time <=1024: + return 0.000388 * cycles + 2.05942 + else: + return 0.000202 * cycles + 29.7217 +def tpuv5e_linear_model(cycles, s_row=1, s_col=1, t_time=1): + """ + TPUv5e linear model for converting cycles to time in microseconds. + + Args: + cycles: Total compute cycles + s_row: Spatial dimension rows + s_col: Spatial dimension columns + t_time: Temporal dimension + + Returns: + Time in microseconds + """ + # TODO: Modify for V5 + if s_row <=128 and s_col <=128 and t_time <=128: + return 0.002133 * cycles - 0.168796 + elif s_row <=1024 and s_col <=1024 and t_time <=1024: + return 0.000167 * cycles + 1.158923 + else: + return 0.000159 * cycles -0.380696 + +def tpuv6e_linear_model(cycles, s_row=1, s_col=1, t_time=1): + """ + TPUv6e linear model for converting cycles to time in microseconds. + + Args: + cycles: Total compute cycles + s_row: Spatial dimension rows + s_col: Spatial dimension columns + t_time: Temporal dimension + + Returns: + Time in microseconds + """ + if s_row <=128 and s_col <=128 and t_time <=128: + return 0.001389 * cycles + 0.604798 + elif s_row <=1024 and s_col <=1024 and t_time <=1024: + return 0.000068 * cycles + 1.546793 + else: + return 0.000040 * cycles + 4.384712 + \ No newline at end of file diff --git a/scalesim/scale_config.py b/scalesim/scale_config.py index 8ae7076b5..109390fb6 100644 --- a/scalesim/scale_config.py +++ b/scalesim/scale_config.py @@ -59,6 +59,9 @@ def __init__(self): # Sarbartha: Added ramulator based DRAM trace support self.use_ramulator_trace = False + + # Time linear model parameter + self.time_linear_model = 'None' # def read_conf_file(self, conf_file_in): """ @@ -85,12 +88,20 @@ def read_conf_file(self, conf_file_in): message += 'Use either USER or CALC in InterfaceBandwidth feild. Aborting!' return - ramulator_on = config.get(section, 'UseRamulatorTrace') - if ramulator_on == 'True': - self.use_ramulator_trace = True - else: - self.use_ramulator_trace = False + # Parse UseRamulatorTrace if present + if config.has_option(section, 'UseRamulatorTrace'): + ramulator_on = config.get(section, 'UseRamulatorTrace') + if ramulator_on == 'True': + self.use_ramulator_trace = True + else: + self.use_ramulator_trace = False + # Parse TimeLinearModel if present + if config.has_option(section, 'TimeLinearModel'): + self.time_linear_model = config.get(section, 'TimeLinearModel') + assert self.time_linear_model in ['None', 'TPUv4', 'TPUv5e', 'TPUv6e'], f"ERROR: Invalid time linear model '{self.time_linear_model}'. Must be one of: None, TPUv4, TPUv5e, TPUv6e" + + # TODO Sarbartha: Should be bw div_factor = 1 @@ -104,8 +115,12 @@ def read_conf_file(self, conf_file_in): self.filter_offset = int(config.get(section, 'FilterOffset')) self.ofmap_offset = int(config.get(section, 'OfmapOffset')) self.df = config.get(section, 'Dataflow') - self.req_buf_sz_rd = int(config.get(section, 'ReadRequestBuffer')) // div_factor - self.req_buf_sz_wr = int(config.get(section, 'WriteRequestBuffer')) // div_factor + + # Make ReadRequestBuffer and WriteRequestBuffer optional + if config.has_option(section, 'ReadRequestBuffer'): + self.req_buf_sz_rd = int(config.get(section, 'ReadRequestBuffer')) // div_factor + if config.has_option(section, 'WriteRequestBuffer'): + self.req_buf_sz_wr = int(config.get(section, 'WriteRequestBuffer')) // div_factor layout_section = 'layout' self.using_ifmap_custom_layout = config.getboolean(layout_section, 'IfmapCustomLayout') @@ -128,27 +143,28 @@ def read_conf_file(self, conf_file_in): if config.has_section('network_presets'): # Read network_presets self.topofile = config.get(section, 'TopologyCsvLoc').split('"')[1] - # Sparsity - section = 'sparsity' - if config.get(section, 'SparsitySupport').lower() == 'true': - self.sparsity_support = True - else: - self.sparsity_support = False - - if self.sparsity_support: - self.sparsity_representation = config.get(section, 'SparseRep') - # self.sparsity_N = int(config.get(section, 'NonZeroElems')) - # self.sparsity_M = int(config.get(section, 'BlockSize')) - if config.get(section, 'OptimizedMapping').lower() == 'true': - self.sparsity_optimized_mapping = True + # Sparsity - make this section optional + if config.has_section('sparsity'): + section = 'sparsity' + if config.get(section, 'SparsitySupport').lower() == 'true': + self.sparsity_support = True else: - self.sparsity_optimized_mapping = False + self.sparsity_support = False + + if self.sparsity_support: + self.sparsity_representation = config.get(section, 'SparseRep') + # self.sparsity_N = int(config.get(section, 'NonZeroElems')) + # self.sparsity_M = int(config.get(section, 'BlockSize')) + if config.get(section, 'OptimizedMapping').lower() == 'true': + self.sparsity_optimized_mapping = True + else: + self.sparsity_optimized_mapping = False - if self.sparsity_optimized_mapping: - self.sparsity_block_size = int(config.get(section, 'BlockSize')) - assert self.sparsity_block_size <= self.array_rows, "ERROR: Invalid block size" + if self.sparsity_optimized_mapping: + self.sparsity_block_size = int(config.get(section, 'BlockSize')) + assert self.sparsity_block_size <= self.array_rows, "ERROR: Invalid block size" - self.sparsity_rand_seed = int(config.get(section, 'RandomNumberGeneratorSeed')) + self.sparsity_rand_seed = int(config.get(section, 'RandomNumberGeneratorSeed')) self.valid_conf_flag = True @@ -221,6 +237,13 @@ def write_conf_file(self, conf_file_out): config.add_section(section) topofile = '"' + self.topofile + '"' config.set(section, 'TopologyCsvLoc', str(topofile)) + + section = 'run_presets' + config.add_section(section) + bw_mode = 'USER' if self.use_user_bandwidth else 'CALC' + config.set(section, 'InterfaceBandwidth', str(bw_mode)) + config.set(section, 'UseRamulatorTrace', str(self.use_ramulator_trace)) + config.set(section, 'TimeLinearModel', str(self.time_linear_model)) with open(conf_file_out, 'w') as configfile: config.write(configfile) @@ -485,6 +508,14 @@ def get_min_dram_bandwidth(self): else: return min(self.bandwidths) + def get_time_linear_model(self): + """ + Method to get the time linear model used for the simulation. + """ + if self.valid_conf_flag: + return self.time_linear_model + return "Default" + # FIX ISSUE #14 @staticmethod def get_default_conf_as_list(): diff --git a/scalesim/simulator.py b/scalesim/simulator.py index 172747dae..fce0a609d 100644 --- a/scalesim/simulator.py +++ b/scalesim/simulator.py @@ -9,6 +9,7 @@ from scalesim.topology_utils import topologies as topo from scalesim.layout_utils import layouts as layout from scalesim.single_layer_sim import single_layer_sim as layer_sim +from scalesim.linear_model.tpu import tpuv4_linear_model, tpuv5e_linear_model, tpuv6e_linear_model class simulator: @@ -174,6 +175,11 @@ def generate_reports(self): header = ('LayerID, Total Cycles (incl. prefetch), Total Cycles, Stall Cycles, Overall Util %, Mapping Efficiency %,' ' Compute Util %,\n') compute_report.write(header) + + # Create TIME_REPORT.csv for linear model time conversion + time_report_name = self.top_path + '/TIME_REPORT.csv' + time_report = open(time_report_name, 'w') + time_report.write('LayerID, Time (us),\n') bandwidth_report_name = self.top_path + '/BANDWIDTH_REPORT.csv' bandwidth_report = open(bandwidth_report_name, 'w') @@ -214,6 +220,29 @@ def generate_reports(self): log += ', '.join([str(x) for x in compute_report_items_this_layer]) log += ',\n' compute_report.write(log) + + # Generate TIME_REPORT entry using linear model + total_cycles = compute_report_items_this_layer[1] # Total Cycles (not including prefetch) + time_linear_model = self.conf.get_time_linear_model() + + # Get spatiotemporal dimensions for this layer + dataflow = self.conf.get_dataflow() + s_row, s_col, t_time = self.topo.get_spatiotemporal_dims(layer_id=lid, df=dataflow) + + + # Apply the appropriate linear model based on config + if time_linear_model == 'TPUv4': + time_us = tpuv4_linear_model(total_cycles, s_row, s_col, t_time) + elif time_linear_model == 'TPUv5e': + time_us = tpuv5e_linear_model(total_cycles, s_row, s_col, t_time) + elif time_linear_model == 'TPUv6e': + time_us = tpuv6e_linear_model(total_cycles, s_row, s_col, t_time) + else: + # Default: no conversion, just use cycles as time + time_us = total_cycles + + time_log = str(lid) + ', ' + str(time_us) + ',\n' + time_report.write(time_log) bandwidth_report_items_this_layer = single_layer_obj.get_bandwidth_report_items() log = str(lid) + ', ' @@ -237,6 +266,7 @@ def generate_reports(self): compute_report.close() bandwidth_report.close() detail_report.close() + time_report.close() if self.conf.sparsity_support is True: sparse_report.close() @@ -254,3 +284,4 @@ def get_total_cycles(self): total_cycles += cycles_this_layer return total_cycles +