Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ test_benchmark*
result_parser.py
test*
configs_test
benchmark_results
benchmark_results
results/
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ SCALE-Sim uses two input files to run, a configuration file and a topology file.
The configuration file is used to specify the architecture and run parameters for the simulations.
The following shows a sample config file:

![sample config](https://github.com/scalesim-project/scale-sim-v2/blob/main/documentation/resources/config-file-example.png "sample config")
![sample config](./documentation/resources/config-file-example-new.png "sample config")

The config file has three sections. The "*general*" section specifies the run name, which is user specific. The "*architecture_presets*" section describes the parameter of the systolic array hardware to simulate.
The "*run_preset*" section specifies if the simulator should run with user specified bandwidth, or should it calculate the optimal bandwidth for stall free execution.
The "*run_preset*" section specifies if the simulator should run with user specified bandwidth, or should it calculate the optimal bandwidth for stall free execution. It also allows specifying a `TimeLinearModel` parameter (e.g., `TPUv4`, `TPUv5e`, or `TPUv6e`) to convert compute cycles into time estimations using hardware-specific linear models.

The detailed documentation for the config file could be found **here (TBD)**

Expand All @@ -104,11 +104,12 @@ Here is an example output dumped to stdout when running Yolo Tiny (whose configu
![screen_out](https://github.com/scalesim-project/scale-sim-v2/blob/doc/anand/readme/documentation/resources/output.png "std_out")

Also, the simulator generates read write traces and summary logs at ```<run_dir>/../scalesim_outputs/```. The user can also provide a custom location using ```-p <custom_output_directory>``` when using `scalesim.py` file.
There are three summary logs:
There are four summary logs:

* COMPUTE_REPORT.csv: Layer wise logs for compute cycles, stalls, utilization percentages etc.
* BANDWIDTH_REPORT.csv: Layer wise information about average and maximum bandwidths for each operand when accessing SRAM and DRAM
* DETAILED_ACCESS_REPORT.csv: Layer wise information about number of accesses and access cycles for each operand for SRAM and DRAM.
* TIME_REPORT.csv: Layer wise time estimations in microseconds, calculated using the configured linear model (TPUv4, TPUv5e, or TPUv6e) based on compute cycles and spatiotemporal dimensions.

In addition cycle accurate SRAM/DRAM access logs are also dumped and could be accesses at ```<outputs_dir>/<run_name>/``` eg `<run_dir>/../scalesim_outputs/<run_name>`

Expand Down
18 changes: 18 additions & 0 deletions configs/tpuv4.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[general]
run_name = scale_example_TPUv4

[architecture_presets]
ArrayHeight: 128
ArrayWidth: 128
IfmapSramSzkB: 4096
FilterSramSzkB: 4096
OfmapSramSzkB: 8192
IfmapOffset: 0
FilterOffset: 10000000
OfmapOffset: 20000000
Bandwidth : 1024
Dataflow : ws

[run_presets]
InterfaceBandwidth: USER
TimeLinearModel: TPUv4
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 6 additions & 0 deletions scalesim/linear_model/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""
Linear model package for converting cycles to time.
"""



60 changes: 60 additions & 0 deletions scalesim/linear_model/tpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
def tpuv4_linear_model(cycles, s_row=1, s_col=1, t_time=1):
"""
TPUv4 linear model for converting cycles to time in microseconds.

Args:
cycles: Total compute cycles
s_row: Spatial dimension rows
s_col: Spatial dimension columns
t_time: Temporal dimension

Returns:
Time in microseconds
"""
if s_row <=128 and s_col <=128 and t_time <=128:
return 0.002762 * cycles - 0.062665
elif s_row <=1024 and s_col <=1024 and t_time <=1024:
return 0.000388 * cycles + 2.05942
else:
return 0.000202 * cycles + 29.7217
def tpuv5e_linear_model(cycles, s_row=1, s_col=1, t_time=1):
"""
TPUv5e linear model for converting cycles to time in microseconds.

Args:
cycles: Total compute cycles
s_row: Spatial dimension rows
s_col: Spatial dimension columns
t_time: Temporal dimension

Returns:
Time in microseconds
"""
# TODO: Modify for V5
if s_row <=128 and s_col <=128 and t_time <=128:
return 0.002133 * cycles - 0.168796
elif s_row <=1024 and s_col <=1024 and t_time <=1024:
return 0.000167 * cycles + 1.158923
else:
return 0.000159 * cycles -0.380696

def tpuv6e_linear_model(cycles, s_row=1, s_col=1, t_time=1):
"""
TPUv6e linear model for converting cycles to time in microseconds.

Args:
cycles: Total compute cycles
s_row: Spatial dimension rows
s_col: Spatial dimension columns
t_time: Temporal dimension

Returns:
Time in microseconds
"""
if s_row <=128 and s_col <=128 and t_time <=128:
return 0.001389 * cycles + 0.604798
elif s_row <=1024 and s_col <=1024 and t_time <=1024:
return 0.000068 * cycles + 1.546793
else:
return 0.000040 * cycles + 4.384712

81 changes: 56 additions & 25 deletions scalesim/scale_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def __init__(self):

# Sarbartha: Added ramulator based DRAM trace support
self.use_ramulator_trace = False

# Time linear model parameter
self.time_linear_model = 'None'
#
def read_conf_file(self, conf_file_in):
"""
Expand All @@ -85,12 +88,20 @@ def read_conf_file(self, conf_file_in):
message += 'Use either USER or CALC in InterfaceBandwidth feild. Aborting!'
return

ramulator_on = config.get(section, 'UseRamulatorTrace')
if ramulator_on == 'True':
self.use_ramulator_trace = True
else:
self.use_ramulator_trace = False
# Parse UseRamulatorTrace if present
if config.has_option(section, 'UseRamulatorTrace'):
ramulator_on = config.get(section, 'UseRamulatorTrace')
if ramulator_on == 'True':
self.use_ramulator_trace = True
else:
self.use_ramulator_trace = False

# Parse TimeLinearModel if present
if config.has_option(section, 'TimeLinearModel'):
self.time_linear_model = config.get(section, 'TimeLinearModel')
assert self.time_linear_model in ['None', 'TPUv4', 'TPUv5e', 'TPUv6e'], f"ERROR: Invalid time linear model '{self.time_linear_model}'. Must be one of: None, TPUv4, TPUv5e, TPUv6e"


# TODO Sarbartha: Should be bw
div_factor = 1

Expand All @@ -104,8 +115,12 @@ def read_conf_file(self, conf_file_in):
self.filter_offset = int(config.get(section, 'FilterOffset'))
self.ofmap_offset = int(config.get(section, 'OfmapOffset'))
self.df = config.get(section, 'Dataflow')
self.req_buf_sz_rd = int(config.get(section, 'ReadRequestBuffer')) // div_factor
self.req_buf_sz_wr = int(config.get(section, 'WriteRequestBuffer')) // div_factor

# Make ReadRequestBuffer and WriteRequestBuffer optional
if config.has_option(section, 'ReadRequestBuffer'):
self.req_buf_sz_rd = int(config.get(section, 'ReadRequestBuffer')) // div_factor
if config.has_option(section, 'WriteRequestBuffer'):
self.req_buf_sz_wr = int(config.get(section, 'WriteRequestBuffer')) // div_factor

layout_section = 'layout'
self.using_ifmap_custom_layout = config.getboolean(layout_section, 'IfmapCustomLayout')
Expand All @@ -128,27 +143,28 @@ def read_conf_file(self, conf_file_in):
if config.has_section('network_presets'): # Read network_presets
self.topofile = config.get(section, 'TopologyCsvLoc').split('"')[1]

# Sparsity
section = 'sparsity'
if config.get(section, 'SparsitySupport').lower() == 'true':
self.sparsity_support = True
else:
self.sparsity_support = False

if self.sparsity_support:
self.sparsity_representation = config.get(section, 'SparseRep')
# self.sparsity_N = int(config.get(section, 'NonZeroElems'))
# self.sparsity_M = int(config.get(section, 'BlockSize'))
if config.get(section, 'OptimizedMapping').lower() == 'true':
self.sparsity_optimized_mapping = True
# Sparsity - make this section optional
if config.has_section('sparsity'):
section = 'sparsity'
if config.get(section, 'SparsitySupport').lower() == 'true':
self.sparsity_support = True
else:
self.sparsity_optimized_mapping = False
self.sparsity_support = False

if self.sparsity_support:
self.sparsity_representation = config.get(section, 'SparseRep')
# self.sparsity_N = int(config.get(section, 'NonZeroElems'))
# self.sparsity_M = int(config.get(section, 'BlockSize'))
if config.get(section, 'OptimizedMapping').lower() == 'true':
self.sparsity_optimized_mapping = True
else:
self.sparsity_optimized_mapping = False

if self.sparsity_optimized_mapping:
self.sparsity_block_size = int(config.get(section, 'BlockSize'))
assert self.sparsity_block_size <= self.array_rows, "ERROR: Invalid block size"
if self.sparsity_optimized_mapping:
self.sparsity_block_size = int(config.get(section, 'BlockSize'))
assert self.sparsity_block_size <= self.array_rows, "ERROR: Invalid block size"

self.sparsity_rand_seed = int(config.get(section, 'RandomNumberGeneratorSeed'))
self.sparsity_rand_seed = int(config.get(section, 'RandomNumberGeneratorSeed'))

self.valid_conf_flag = True

Expand Down Expand Up @@ -221,6 +237,13 @@ def write_conf_file(self, conf_file_out):
config.add_section(section)
topofile = '"' + self.topofile + '"'
config.set(section, 'TopologyCsvLoc', str(topofile))

section = 'run_presets'
config.add_section(section)
bw_mode = 'USER' if self.use_user_bandwidth else 'CALC'
config.set(section, 'InterfaceBandwidth', str(bw_mode))
config.set(section, 'UseRamulatorTrace', str(self.use_ramulator_trace))
config.set(section, 'TimeLinearModel', str(self.time_linear_model))

with open(conf_file_out, 'w') as configfile:
config.write(configfile)
Expand Down Expand Up @@ -485,6 +508,14 @@ def get_min_dram_bandwidth(self):
else:
return min(self.bandwidths)

def get_time_linear_model(self):
"""
Method to get the time linear model used for the simulation.
"""
if self.valid_conf_flag:
return self.time_linear_model
return "Default"

Copilot AI Dec 12, 2025

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return value "Default" when valid_conf_flag is False is inconsistent with the actual default value "None" set in init. This should return "None" or self.time_linear_model for consistency.

Copilot uses AI. Check for mistakes.

# FIX ISSUE #14
@staticmethod
def get_default_conf_as_list():
Expand Down
31 changes: 31 additions & 0 deletions scalesim/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from scalesim.topology_utils import topologies as topo
from scalesim.layout_utils import layouts as layout
from scalesim.single_layer_sim import single_layer_sim as layer_sim
from scalesim.linear_model.tpu import tpuv4_linear_model, tpuv5e_linear_model, tpuv6e_linear_model


class simulator:
Expand Down Expand Up @@ -174,6 +175,11 @@ def generate_reports(self):
header = ('LayerID, Total Cycles (incl. prefetch), Total Cycles, Stall Cycles, Overall Util %, Mapping Efficiency %,'
' Compute Util %,\n')
compute_report.write(header)

# Create TIME_REPORT.csv for linear model time conversion
time_report_name = self.top_path + '/TIME_REPORT.csv'
time_report = open(time_report_name, 'w')

Copilot AI Dec 12, 2025

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File may not be closed if this operation raises an exception.
File may not be closed if this operation raises an exception.

Copilot uses AI. Check for mistakes.
time_report.write('LayerID, Time (us),\n')

bandwidth_report_name = self.top_path + '/BANDWIDTH_REPORT.csv'
bandwidth_report = open(bandwidth_report_name, 'w')
Expand Down Expand Up @@ -214,6 +220,29 @@ def generate_reports(self):
log += ', '.join([str(x) for x in compute_report_items_this_layer])
log += ',\n'
compute_report.write(log)

# Generate TIME_REPORT entry using linear model
total_cycles = compute_report_items_this_layer[1] # Total Cycles (not including prefetch)
time_linear_model = self.conf.get_time_linear_model()

# Get spatiotemporal dimensions for this layer
dataflow = self.conf.get_dataflow()
s_row, s_col, t_time = self.topo.get_spatiotemporal_dims(layer_id=lid, df=dataflow)


# Apply the appropriate linear model based on config
if time_linear_model == 'TPUv4':
time_us = tpuv4_linear_model(total_cycles, s_row, s_col, t_time)
elif time_linear_model == 'TPUv5e':
time_us = tpuv5e_linear_model(total_cycles, s_row, s_col, t_time)
elif time_linear_model == 'TPUv6e':
time_us = tpuv6e_linear_model(total_cycles, s_row, s_col, t_time)
else:
# Default: no conversion, just use cycles as time
time_us = total_cycles

time_log = str(lid) + ', ' + str(time_us) + ',\n'
time_report.write(time_log)

bandwidth_report_items_this_layer = single_layer_obj.get_bandwidth_report_items()
log = str(lid) + ', '
Expand All @@ -237,6 +266,7 @@ def generate_reports(self):
compute_report.close()
bandwidth_report.close()
detail_report.close()
time_report.close()
if self.conf.sparsity_support is True:
sparse_report.close()

Expand All @@ -254,3 +284,4 @@ def get_total_cycles(self):
total_cycles += cycles_this_layer

return total_cycles

Loading