diff --git a/Deeploy/Targets/GAP9/Bindings.py b/Deeploy/Targets/GAP9/Bindings.py index 2bda98af8f..408331fbe9 100644 --- a/Deeploy/Targets/GAP9/Bindings.py +++ b/Deeploy/Targets/GAP9/Bindings.py @@ -23,10 +23,10 @@ # Import templates from PULPOpen and Generic from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, DequantTemplate, FloatReduceMeanTemplate, \ FloatReduceSumTemplate, GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate -from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DequantChecker, \ - GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, \ - QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, SGDChecker, \ - SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker +from Deeploy.Targets.Generic.TypeCheckers import AddChecker, AdamChecker, ConcatChecker, ConvChecker, \ + DequantChecker, GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, \ + MulChecker, QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, \ + SGDChecker, SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker from Deeploy.Targets.PULPOpen.Bindings import ForkClosure, L3MemoryAwareFunctionCallClosure, \ MemoryAwareForkTransformer, MemoryAwareFunctionCallClosure, TilingCallClosure from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass @@ -34,12 +34,12 @@ from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPProfileUntiled import PULPProfileUntiled from Deeploy.Targets.PULPOpen.DataTypes import PULPDMAFuture -from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, DMASliceTemplate, FloatAddTemplate, FloatConvTemplate, \ - FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, \ - FloatMulTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GEMMTemplate, MatrixVectorTemplate, MaxPoolTemplate, \ - MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, ReshapeTemplate, RQAddTemplate, RQSiHardswishTemplate, \ - SGDTemplate, SoftmaxCrossEntropyLossTemplate, TallGEMMTemplate, TransposeTemplate, UniformRequantShiftTemplate, \ - iRMSNormTemplate, iSoftmaxTemplate +from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, DMASliceTemplate, FloatAddTemplate, FloatAdamTemplate, \ + FloatConvTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ + FloatMaxPoolTemplate, FloatMulTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GEMMTemplate, \ + MatrixVectorTemplate, MaxPoolTemplate, MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, ReshapeTemplate, \ + RQAddTemplate, RQSiHardswishTemplate, SGDTemplate, SoftmaxCrossEntropyLossTemplate, TallGEMMTemplate, \ + TransposeTemplate, UniformRequantShiftTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.PULPOpen.TypeCheckers import PULPConvChecker, PULPLinearChecker, PULPMaxPoolChecker, \ PULPRequantShiftChecker from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement, \ @@ -317,6 +317,17 @@ SGDTemplate.referenceTemplate, GAP9Transformer) ] +GAP9AdamBindings = [ + NodeBinding( + AdamChecker( + [PointerClass(float32_t), PointerClass(int32_t), + PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t), PointerClass(float32_t)], # R, T, X, G, V, H + [PointerClass(float32_t)] # X_new + ), + FloatAdamTemplate.referenceTemplate, GAP9Transformer) +] + GAP9TransposeBindings = [ NodeBinding(TransposeChecker([PointerClass(type)], [PointerClass(type)]), TransposeTemplate.referenceTemplate, GAP9Transformer) for type in IntegerDataTypes diff --git a/Deeploy/Targets/GAP9/Platform.py b/Deeploy/Targets/GAP9/Platform.py index bad6f8d859..74cabbfd7f 100644 --- a/Deeploy/Targets/GAP9/Platform.py +++ b/Deeploy/Targets/GAP9/Platform.py @@ -11,29 +11,29 @@ from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryPlatform, MemoryPlatformWrapper from Deeploy.Targets.GAP9.Templates import AllocateTemplate, FreeTemplate # Import GAP9-specific tiler bindings -from Deeploy.Targets.GAP9.Tiler import GAP9AddTilingReadyBindings, GAP9ConcatTilingReadyBindings, \ - GAP9Conv2DTilingReadyBindings, GAP9DWConv2DTilingReadyBindings, GAP9FlattenTilingReadyBindings, \ - GAP9FPGELUTilingReadyBindings, GAP9FPGEMMTilingReadyBindings, GAP9GatherTilingReadyBindings, \ - GAP9iHardswishTilingReadyBindings, GAP9iRMSNormTilingReadyBindings, GAP9iRQSGELUTilingReadyBindings, \ - GAP9LayernormTilingReadyBindings, GAP9MatMulTilingReadyBindings, GAP9MaxPool2DTilingReadyBindings, \ - GAP9MulTilingReadyBindings, GAP9ReduceSumTilingReadyBindings, GAP9ReluTilingReadyBindings, \ - GAP9RQAddTilingReadyBindings, GAP9RQSConv2DTilingReadyBindings, GAP9RQSDWConv2DTilingReadyBindings, \ - GAP9RQSGEMMTilingReadyBindings, GAP9RQSiHardswishTilingReadyBindings, GAP9RQSMatrixVecTilingReadyBindings, \ - GAP9RQSTallGEMMTilingReadyBindings, GAP9RQSTilingReadyBindings, GAP9SGDTilingReadyBindings, \ - GAP9SoftmaxCrossEntropyGradTilingReadyBindings, GAP9SoftmaxCrossEntropyTilingReadyBindings, \ - GAP9SoftmaxGradTilingReadyBindings, GAP9SoftmaxTilingReadyBindings, GAP9TransposeTilingReadyBindings, \ - GAP9UniformRQSTilingReadyBindings +from Deeploy.Targets.GAP9.Tiler import GAP9AdamTilingReadyBindings, GAP9AddTilingReadyBindings, \ + GAP9ConcatTilingReadyBindings, GAP9Conv2DTilingReadyBindings, GAP9DWConv2DTilingReadyBindings, \ + GAP9FlattenTilingReadyBindings, GAP9FPGELUTilingReadyBindings, GAP9FPGEMMTilingReadyBindings, \ + GAP9GatherTilingReadyBindings, GAP9iHardswishTilingReadyBindings, GAP9iRMSNormTilingReadyBindings, \ + GAP9iRQSGELUTilingReadyBindings, GAP9LayernormTilingReadyBindings, GAP9MatMulTilingReadyBindings, \ + GAP9MaxPool2DTilingReadyBindings, GAP9MulTilingReadyBindings, GAP9ReduceSumTilingReadyBindings, \ + GAP9ReluTilingReadyBindings, GAP9RQAddTilingReadyBindings, GAP9RQSConv2DTilingReadyBindings, \ + GAP9RQSDWConv2DTilingReadyBindings, GAP9RQSGEMMTilingReadyBindings, GAP9RQSiHardswishTilingReadyBindings, \ + GAP9RQSMatrixVecTilingReadyBindings, GAP9RQSTallGEMMTilingReadyBindings, GAP9RQSTilingReadyBindings, \ + GAP9SGDTilingReadyBindings, GAP9SoftmaxCrossEntropyGradTilingReadyBindings, \ + GAP9SoftmaxCrossEntropyTilingReadyBindings, GAP9SoftmaxGradTilingReadyBindings, \ + GAP9SoftmaxTilingReadyBindings, GAP9TransposeTilingReadyBindings, GAP9UniformRQSTilingReadyBindings from Deeploy.Targets.Generic.Bindings import BasicGEMMBindings, BasicPad1DBindings, BasicPad2DBindings, \ BasicRQIntegerDivBinding -from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELULayer, GEMMLayer, \ - LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, ReduceMeanLayer, ReduceSumLayer, \ - ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, RQSiHardswishLayer, SGDLayer, \ - SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, SoftmaxGradLayer, SoftmaxLayer, \ - TransposeLayer, iHardswishLayer, iRMSNormLayer -from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, DequantParser, FlattenParser, GatherParser, \ - GELUParser, GEMMParser, LayerNormParser, MatMulParser, MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, \ - QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQAddParser, \ - RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SGDParser, SliceParser, \ +from Deeploy.Targets.Generic.Layers import AdamLayer, AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELULayer, \ + GEMMLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, ReduceMeanLayer, \ + ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \ + RQSiHardswishLayer, SGDLayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, \ + SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer +from Deeploy.Targets.Generic.Parsers import AdamParser, AddParser, ConcatParser, DequantParser, FlattenParser, \ + GatherParser, GELUParser, GEMMParser, LayerNormParser, MatMulParser, MaxPool2DParser, MulParser, Pad1DParser, \ + Pad2DParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, \ + RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SGDParser, SliceParser, \ SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, SoftmaxParser, \ TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate @@ -90,6 +90,7 @@ GAP9_SoftmaxCrossEntropyLossGradMapper = NodeMapper(SoftmaxCrossEntropyLossGradParser(), GAP9SoftmaxCrossEntropyGradTilingReadyBindings) GAP9_SGDMapper = NodeMapper(SGDParser(), GAP9SGDTilingReadyBindings) +GAP9_AdamMapper = NodeMapper(AdamParser(), GAP9AdamTilingReadyBindings) GAP9_QuantMapper = NodeMapper(QuantParser(), BasicQuantBindings) GAP9_DequantMapper = NodeMapper(DequantParser(), BasicDequantBindings) GAP9_GEMMDequantMapper = NodeMapper(PULPGEMMParser(), BasicGEMMBindings) @@ -171,7 +172,9 @@ 'SoftmaxCrossEntropyLossGrad': SoftmaxCrossEntropyLossGradLayer([GAP9_SoftmaxCrossEntropyLossGradMapper]), 'SGD': - SGDLayer([GAP9_SGDMapper]) + SGDLayer([GAP9_SGDMapper]), + 'Adam': + AdamLayer([GAP9_AdamMapper]) } diff --git a/Deeploy/Targets/GAP9/Tiler.py b/Deeploy/Targets/GAP9/Tiler.py index fefe12b6d7..ae419c5fd7 100644 --- a/Deeploy/Targets/GAP9/Tiler.py +++ b/Deeploy/Targets/GAP9/Tiler.py @@ -10,14 +10,14 @@ import copy -from Deeploy.Targets.GAP9.Bindings import GAP9AddBindings, GAP9ConcatBindings, GAP9FloatConv2DBindings, \ - GAP9FloatDWConv2DBindings, GAP9FloatGELUBinding, GAP9FloatGEMMBindings, GAP9GatherBindings, \ - GAP9iHardswishBindings, GAP9iRMSNormBindings, GAP9iRQSGELUBindings, GAP9LayernormBinding, GAP9MatMulBindings, \ - GAP9MaxPool2DBindings, GAP9MulBindings, GAP9ReduceSumBindings, GAP9ReluBinding, GAP9ReshapeBindings, \ - GAP9RQAddBindings, GAP9RQSBindings, GAP9RQSConv2DBindings, GAP9RQSDWConv2DBindings, GAP9RQSGEMMBindings, \ - GAP9RQSiHardswishBindings, GAP9RQSMatrixVecBindings, GAP9RQSTallGEMMBindings, GAP9SGDBindings, \ - GAP9SoftmaxBindings, GAP9SoftmaxCrossEntropyLossBindings, GAP9SoftmaxCrossEntropyLossGradBindings, \ - GAP9SoftmaxGradBindings, GAP9TransposeBindings, GAP9UniformRQSBindings +from Deeploy.Targets.GAP9.Bindings import GAP9AdamBindings, GAP9AddBindings, GAP9ConcatBindings, \ + GAP9FloatConv2DBindings, GAP9FloatDWConv2DBindings, GAP9FloatGELUBinding, GAP9FloatGEMMBindings, \ + GAP9GatherBindings, GAP9iHardswishBindings, GAP9iRMSNormBindings, GAP9iRQSGELUBindings, GAP9LayernormBinding, \ + GAP9MatMulBindings, GAP9MaxPool2DBindings, GAP9MulBindings, GAP9ReduceSumBindings, GAP9ReluBinding, \ + GAP9ReshapeBindings, GAP9RQAddBindings, GAP9RQSBindings, GAP9RQSConv2DBindings, GAP9RQSDWConv2DBindings, \ + GAP9RQSGEMMBindings, GAP9RQSiHardswishBindings, GAP9RQSMatrixVecBindings, GAP9RQSTallGEMMBindings, \ + GAP9SGDBindings, GAP9SoftmaxBindings, GAP9SoftmaxCrossEntropyLossBindings, \ + GAP9SoftmaxCrossEntropyLossGradBindings, GAP9SoftmaxGradBindings, GAP9TransposeBindings, GAP9UniformRQSBindings from Deeploy.Targets.Generic.TileConstraints.AddTileConstraint import AddTileConstraint from Deeploy.Targets.Generic.TileConstraints.ConcatTileConstraint import ConcatTileConstraint from Deeploy.Targets.Generic.TileConstraints.iHardswishTileConstraint import iHardswishTileConstraint @@ -39,6 +39,7 @@ from Deeploy.Targets.PULPOpen.TileConstraints.MatMulTileConstraint import MatMulTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.MaxPoolTileConstraint import MaxPoolCTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.RequantShiftTileConstraint import RequantShiftTileConstraint +from Deeploy.Targets.PULPOpen.TileConstraints.AdamTileConstraint import AdamTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.SGDTileConstraint import SGDTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.SoftmaxCrossEntropyTileConstraint import \ SoftmaxCrossEntropyGradTileConstraint, SoftmaxCrossEntropyTileConstraint @@ -142,3 +143,6 @@ GAP9SGDTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = GAP9SGDBindings, tileConstraint = SGDTileConstraint()) + +GAP9AdamTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = GAP9AdamBindings, + tileConstraint = AdamTileConstraint()) diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 308b179aef..2b8972b581 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -12,19 +12,19 @@ from Deeploy.DeeployTypes import CodeTransformation, NodeBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ - ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \ - FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, \ - FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \ - FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, \ - GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \ - MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ + ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAdamTemplate, \ + FloatAddTemplate, FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, \ + FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, \ + FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, \ + FloatSqrtTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \ + MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, \ iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate -from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ - DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \ - LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \ - ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, SliceChecker, \ - SoftmaxChecker, TransposeChecker +from Deeploy.Targets.Generic.TypeCheckers import AdamChecker, AddChecker, BatchNormChecker, ConcatChecker, \ + ConvChecker, DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, \ + GEMMChecker, LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, \ + ReduceMeanChecker, ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, \ + SliceChecker, SoftmaxChecker, TransposeChecker BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()]) @@ -312,6 +312,17 @@ for type in FloatDataTypes ] +BasicAdamBindings = [ + NodeBinding( + AdamChecker( + # Note: ONNX spec defines T as int64, but we use int32 for embedded compatibility + [PointerClass(float32_t), PointerClass(int32_t), PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t), PointerClass(float32_t)], # R, T, X, G, V, H + [PointerClass(float32_t)] # X_new only + ), + FloatAdamTemplate.referenceTemplate, BasicTransformer) +] + BasicConvTransposeBindings = [ NodeBinding( ConvChecker( diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index cc733937cc..62b90da168 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -492,6 +492,22 @@ def __init__(self, maps: List[NodeMapper]): super().__init__(maps) +class AdamLayer(ONNXLayer): + + def __init__(self, maps: List[NodeMapper]): + super().__init__(maps) + + def computeOps(self): + size = self.mapper.parser.operatorRepresentation['size'] + # Per element: + # m (V) update : 2 mul + 1 add = 3 ops + # v (H) update : 3 mul + 1 add = 4 ops (includes G*G) + # weight update: 1 sqrt + 1 div + + # 1 mul + 1 sub = 4 ops (epsilon=0, +eps eliminated) + # Total = 11 ops + return size * 11 + + class LinearAttentionLayer(ONNXLayer): def __init__(self, maps: List[NodeMapper]): diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index ad787d9e4b..7a31760a02 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2697,6 +2697,51 @@ def parseNodeCtxt(self, return ctxt, True +class AdamParser(NodeParser): + + def __init__(self): + super().__init__() + + def parseNode(self, node: gs.Node) -> bool: + n_inputs = len(node.inputs) + n_outputs = len(node.outputs) + num_tensors = (n_inputs - 2) // 4 + valid_inputs = n_inputs >= 6 and (n_inputs - 2) % 4 == 0 + valid_outputs = n_outputs >= 1 and n_outputs == num_tensors + valid_attrs = all(a in node.attrs for a in ['alpha', 'beta', 'epsilon', 'norm_coefficient', 'norm_coefficient_post']) + + return all([valid_inputs, valid_outputs, valid_attrs]) + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + R = ctxt.lookup(node.inputs[0].name) + T = ctxt.lookup(node.inputs[1].name) + X = ctxt.lookup(node.inputs[2].name) + G = ctxt.lookup(node.inputs[3].name) + V = ctxt.lookup(node.inputs[4].name) + H = ctxt.lookup(node.inputs[5].name) + + X_new = ctxt.lookup(node.outputs[0].name) + + self.operatorRepresentation['R'] = R.name + self.operatorRepresentation['T'] = T.name + self.operatorRepresentation['X'] = X.name + self.operatorRepresentation['G'] = G.name + self.operatorRepresentation['V'] = V.name + self.operatorRepresentation['H'] = H.name + self.operatorRepresentation['X_new'] = X_new.name + self.operatorRepresentation['size'] = np.prod(X.shape) + self.operatorRepresentation['alpha'] = node.attrs['alpha'] + self.operatorRepresentation['beta'] = node.attrs['beta'] + self.operatorRepresentation['epsilon'] = node.attrs['epsilon'] + self.operatorRepresentation['norm_coefficient'] = node.attrs['norm_coefficient'] + self.operatorRepresentation['norm_coefficient_post'] = node.attrs['norm_coefficient_post'] + return ctxt, True + + class BatchNormParser(NodeParser): def __init__(self): diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index e05e897270..3737ae466a 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -6,32 +6,33 @@ RemoveEmptyConvBiasPass, RemoveOnlySingletonReduceMeanPass from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer -from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicBatchNormBindings, BasicConcatBindings, \ - BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, BasicDebugPrintBindings, \ - BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, BasicGatherBindings, \ - BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ - BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, \ - BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, \ - BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, \ - BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicTransposeBindings, \ - DummyBinding -from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, ConcatLayer, ConvLayer, \ +from Deeploy.Targets.Generic.Bindings import BasicAdamBindings, BasicAddBindings, BasicBatchNormBindings, \ + BasicConcatBindings, BasicConv1DBindings, BasicConv2DBindings, BasicConvTransposeBindings, \ + BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, \ + BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, \ + BasicITASoftmaxBinding, BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, \ + BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, \ + BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, \ + BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, \ + BasicSqrtBindings, BasicTransposeBindings, DummyBinding +from Deeploy.Targets.Generic.Layers import AdamLayer, AddLayer, BatchNormalizationLayer, ConcatLayer, ConvLayer, \ ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, GatherLayer, GELULayer, GEMMLayer, ITAMaxLayer, \ LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \ ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, \ SoftmaxLayer, SqrtLayer, TransposeLayer -from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, ConcatParser, ConvTranspose1DParser, \ - DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, GatherParser, GELUParser, GenericConv1DParser, \ - GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, GenericMaxPool2DParser, \ - IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, \ - Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, \ - RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, SoftmaxParser, SqrtParser, \ - TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser +from Deeploy.Targets.Generic.Parsers import AdamParser, AddParser, BatchNormParser, ConcatParser, \ + ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, GatherParser, \ + GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ + GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \ + MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, \ + ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, \ + SoftmaxParser, SqrtParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ iGELURequantMergePass +AdamMapper = NodeMapper(AdamParser(), BasicAdamBindings) AddMapper = NodeMapper(AddParser(), BasicAddBindings) Conv1DMapper = NodeMapper(GenericConv1DParser(), BasicConv1DBindings) Conv2DMapper = NodeMapper(GenericConv2DParser(), BasicConv2DBindings) @@ -79,6 +80,7 @@ DummyMapper = NodeMapper(DummyParser(), [DummyBinding]) GenericMapping = { + 'Adam': AdamLayer([AdamMapper]), 'Add': AddLayer([AddMapper]), 'Conv': ConvLayer([Conv2DMapper, DWConv2DMapper, Conv1DMapper, DWConv1DMapper]), 'Concat': ConcatLayer([ConcatMapper]), diff --git a/Deeploy/Targets/Generic/Templates/FloatAdamTemplate.py b/Deeploy/Targets/Generic/Templates/FloatAdamTemplate.py new file mode 100644 index 0000000000..76dbb47fd6 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatAdamTemplate.py @@ -0,0 +1,34 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NodeTemplate + +referenceTemplate = NodeTemplate(""" +// Adam Weight Update (Name: ${nodeName}, Op: ${nodeOp}) +// inputs=[R, T, X, G, V, H], outputs=[X_new] +// V and H are NOT written back; V_new and H_new are local temporaries used only +// to compute the correct X_new. Use separate kernels to update V and H in memory. +BEGIN_SINGLE_CORE + float32_t R_val = *${R}; + // Note: ONNX spec defines T as int64, but we use int32 for embedded compatibility + int32_t T_val = *${T}; + float32_t alpha = ${alpha}; + float32_t beta_coeff = ${beta}; + float32_t epsilon = ${epsilon}; + float32_t norm_coef = ${norm_coefficient}; + float32_t norm_coef_post = ${norm_coefficient_post}; + float32_t R_adjusted; + if (T_val > 0) { + R_adjusted = R_val * sqrtf(1.0f - powf(beta_coeff, (float32_t)T_val)) / (1.0f - powf(alpha, (float32_t)T_val)); + } else { + R_adjusted = R_val; + } + for (uint32_t i = 0; i < ${size}; i++) { + float32_t G_reg = norm_coef * ${X}[i] + ${G}[i]; + float32_t V_new = alpha * ${V}[i] + (1.0f - alpha) * G_reg; + float32_t H_new = beta_coeff * ${H}[i] + (1.0f - beta_coeff) * G_reg * G_reg; + ${X_new}[i] = (1.0f - norm_coef_post) * (${X}[i] - R_adjusted * V_new / (sqrtf(H_new) + epsilon)); + } +END_SINGLE_CORE +""") diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py index c2c8d436f8..21fc175f07 100644 --- a/Deeploy/Targets/Generic/TypeCheckers.py +++ b/Deeploy/Targets/Generic/TypeCheckers.py @@ -598,6 +598,21 @@ def _inferSignedness(self, inputs: List[VariableBuffer], return [True] +class AdamChecker(SignPropTypeChecker): + + def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): + super().__init__(input_types, output_types) + + def _inferNumLevels(self, inputs: List[VariableBuffer], + operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]: + # Only X_new is an output; V and H are read-only inputs. + return [2**(self.input_types[0].referencedType.typeWidth)] + + def _inferSignedness(self, inputs: List[VariableBuffer], + operatorRepresentation: OperatorRepresentation) -> Optional[List[bool]]: + return [True] + + class BatchNormChecker(SignPropTypeChecker): def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py index 84ee2420e3..eb087a7ba0 100644 --- a/Deeploy/Targets/PULPOpen/Bindings.py +++ b/Deeploy/Targets/PULPOpen/Bindings.py @@ -16,10 +16,10 @@ from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, DequantTemplate, FloatReduceSumTemplate, \ GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate -from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DequantChecker, \ - GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, \ - QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, SGDChecker, \ - SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker +from Deeploy.Targets.Generic.TypeCheckers import AdamChecker, AddChecker, ConcatChecker, ConvChecker, \ + DequantChecker, GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, \ + MulChecker, QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, \ + SGDChecker, SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling @@ -27,12 +27,12 @@ from Deeploy.Targets.PULPOpen.DataTypes import PULPDMAFuture from Deeploy.Targets.PULPOpen.DMA.L3Dma import l3DmaHack from Deeploy.Targets.PULPOpen.DMA.MchanDma import MchanDma -from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, DMASliceTemplate, FloatAddTemplate, FloatConvTemplate, \ - FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, \ - FloatMulTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GEMMTemplate, \ - MatrixVectorTemplate, MaxPoolTemplate, MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, ReshapeTemplate, \ - RQAddTemplate, RQSiHardswishTemplate, SGDTemplate, SoftmaxCrossEntropyLossTemplate, TallGEMMTemplate, \ - TransposeTemplate, UniformRequantShiftTemplate, iRMSNormTemplate, iSoftmaxTemplate +from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, DMASliceTemplate, FloatAdamTemplate, FloatAddTemplate, \ + FloatConvTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \ + FloatMaxPoolTemplate, FloatMulTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, \ + GEMMTemplate, MatrixVectorTemplate, MaxPoolTemplate, MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, \ + ReshapeTemplate, RQAddTemplate, RQSiHardswishTemplate, SGDTemplate, SoftmaxCrossEntropyLossTemplate, \ + TallGEMMTemplate, TransposeTemplate, UniformRequantShiftTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.PULPOpen.TypeCheckers import PULPConvChecker, PULPLinearChecker, PULPMaxPoolChecker, \ PULPRequantShiftChecker from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement, \ @@ -367,6 +367,17 @@ SGDTemplate.referenceTemplate, ForkTransformer) ] +PULPAdamBindings = [ + NodeBinding( + AdamChecker( + # Note: ONNX spec defines T as int64, but we use int32 for embedded compatibility + [PointerClass(float32_t), PointerClass(int32_t), PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t), PointerClass(float32_t)], # R, T, X, G, V, H + [PointerClass(float32_t)] # X_new only + ), + FloatAdamTemplate.referenceTemplate, ForkTransformer) +] + PULPTransposeBindings = [ NodeBinding(TransposeChecker([PointerClass(type)], [PointerClass(type)]), TransposeTemplate.referenceTemplate, ForkTransformer) for type in IntegerDataTypes diff --git a/Deeploy/Targets/PULPOpen/Platform.py b/Deeploy/Targets/PULPOpen/Platform.py index 7456dd9e1b..84701e7886 100644 --- a/Deeploy/Targets/PULPOpen/Platform.py +++ b/Deeploy/Targets/PULPOpen/Platform.py @@ -13,13 +13,13 @@ from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryPlatform, MemoryPlatformWrapper from Deeploy.Targets.Generic.Bindings import BasicGEMMBindings, BasicPad1DBindings, BasicPad2DBindings, \ BasicRQIntegerDivBinding -from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELUGradLayer, GELULayer, \ - GEMMLayer, LayerNormGradLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, \ - ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \ - RQSiHardswishLayer, SGDLayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, \ - SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer -from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, DequantParser, FlattenParser, GatherParser, \ - GELUGradParser, GELUParser, GEMMParser, LayerNormGradParser, LayerNormParser, MatMulParser, MaxPool1DParser, \ +from Deeploy.Targets.Generic.Layers import AdamLayer, AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELUGradLayer, \ + GELULayer, GEMMLayer, LayerNormGradLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, \ + QuantLayer, ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, \ + RQSiGELULayer, RQSiHardswishLayer, SGDLayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, \ + SoftmaxCrossEntropyLossLayer, SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer +from Deeploy.Targets.Generic.Parsers import AdamParser, AddParser, ConcatParser, DequantParser, FlattenParser, \ + GatherParser, GELUGradParser, GELUParser, GEMMParser, LayerNormGradParser, LayerNormParser, MatMulParser, MaxPool1DParser, \ MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, QuantParser, ReduceSumParser, ReluParser, \ RequantShiftParser, ReshapeParser, RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, \ SGDParser, SliceParser, SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, \ @@ -41,15 +41,15 @@ PULPFPGELUGradTilingReadyBindings, PULPFPGELUTilingReadyBindings, PULPFPGEMMTilingReadyBindings, \ PULPGatherTilingReadyBindings, PULPiHardswishTilingReadyBindings, PULPiRMSNormTilingReadyBindings, \ PULPiRQSGELUTilingReadyBindings, PULPLayernormGradTilingReadyBindings, PULPLayernormTilingReadyBindings, \ - PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, \ - PULPMulTilingReadyBindings, PULPReduceMeanTilingReadyBindings, PULPReduceSumTilingReadyBindings, \ - PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, PULPRQSConv1DTilingReadyBindings, \ - PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, PULPRQSGEMMTilingReadyBindings, \ - PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, \ - PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, PULPSliceTilingReadyBindings, \ - PULPSoftmaxCrossEntropyGradTilingReadyBindings, PULPSoftmaxCrossEntropyTilingReadyBindings, \ - PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, PULPTransposeTilingReadyBindings, \ - PULPUniformRQSTilingReadyBindings + PULPAdamTilingReadyBindings, PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, \ + PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, PULPReduceMeanTilingReadyBindings, \ + PULPReduceSumTilingReadyBindings, PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, \ + PULPRQSConv1DTilingReadyBindings, PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, \ + PULPRQSGEMMTilingReadyBindings, PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, \ + PULPRQSTallGEMMTilingReadyBindings, PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, \ + PULPSliceTilingReadyBindings, PULPSoftmaxCrossEntropyGradTilingReadyBindings, \ + PULPSoftmaxCrossEntropyTilingReadyBindings, PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, \ + PULPTransposeTilingReadyBindings, PULPUniformRQSTilingReadyBindings from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import PULPAddRequantMergePass, \ PULPConvRequantMergePass, PULPGEMMRequantMergePass, PULPMatMulRequantMergePass @@ -108,6 +108,7 @@ SoftmaxCrossEntropyLossGradMapper = NodeMapper(SoftmaxCrossEntropyLossGradParser(), PULPSoftmaxCrossEntropyGradTilingReadyBindings) SGDMapper = NodeMapper(SGDParser(), PULPSGDTilingReadyBindings) +AdamMapper = NodeMapper(AdamParser(), PULPAdamTilingReadyBindings) QuantMapper = NodeMapper(QuantParser(), BasicQuantBindings) DequantMapper = NodeMapper(DequantParser(), BasicDequantBindings) GEMMDequantMapper = NodeMapper(PULPGEMMParser(), BasicGEMMBindings) @@ -151,7 +152,8 @@ 'SoftmaxGrad': SoftmaxGradLayer([SoftmaxGradMapper]), 'SoftmaxCrossEntropyLoss': SoftmaxCrossEntropyLossLayer([SoftmaxCrossEntropyLossMapper]), 'SoftmaxCrossEntropyLossGrad': SoftmaxCrossEntropyLossGradLayer([SoftmaxCrossEntropyLossGradMapper]), - 'SGD': SGDLayer([SGDMapper]) + 'SGD': SGDLayer([SGDMapper]), + 'Adam': AdamLayer([AdamMapper]), } diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatAdamTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatAdamTemplate.py new file mode 100644 index 0000000000..3c160bd705 --- /dev/null +++ b/Deeploy/Targets/PULPOpen/Templates/FloatAdamTemplate.py @@ -0,0 +1,70 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from Deeploy.DeeployTypes import NodeTemplate + +referenceTemplate = NodeTemplate(""" +// Adam Weight Update - Parallel with 6x unrolling (Name: ${nodeName}, Op: ${nodeOp}) +// inputs=[R, T, X, G, V, H], outputs=[X_new] +// X, G, V, and H are L1 tile buffers (DMA'd from backing store, which may be L2 or L3). +// V and H are NOT written back; V_new and H_new are local temporaries used only +// to compute the correct X_new. Use separate kernels to update V and H in memory. +float32_t ${nodeName}_R_val = *${R}; +int32_t ${nodeName}_T_val = *${T}; +float32_t ${nodeName}_alpha = ${alpha}; +float32_t ${nodeName}_beta_coeff = ${beta}; +float32_t ${nodeName}_epsilon = ${epsilon}; +float32_t ${nodeName}_norm_coef = ${norm_coefficient}; +float32_t ${nodeName}_norm_coef_post = ${norm_coefficient_post}; +float32_t ${nodeName}_R_adjusted; +if (${nodeName}_T_val > 0) { + ${nodeName}_R_adjusted = ${nodeName}_R_val * sqrtf(1.0f - powf(${nodeName}_beta_coeff, (float32_t)${nodeName}_T_val)) / (1.0f - powf(${nodeName}_alpha, (float32_t)${nodeName}_T_val)); +} else { + ${nodeName}_R_adjusted = ${nodeName}_R_val; +} + +uint8_t ${nodeName}_core_id = (uint8_t) pi_core_id(); +uint8_t ${nodeName}_log2Core = (uint8_t) log2(NUM_CORES); +uint32_t ${nodeName}_chunk = (${size} >> ${nodeName}_log2Core) + ((${size} & (NUM_CORES-1))!=0); +uint32_t ${nodeName}_chunk_start = (uint32_t) MIN(${nodeName}_chunk*${nodeName}_core_id, (uint32_t) ${size}); +uint32_t ${nodeName}_chunk_stop = (uint32_t) MIN(${nodeName}_chunk_start + ${nodeName}_chunk, (uint32_t) ${size}); + +uint32_t i = ${nodeName}_chunk_start; +for (; i + 5 < ${nodeName}_chunk_stop; i += 6) { + float32_t ${nodeName}_G_reg_0 = ${nodeName}_norm_coef * ${X}[i+0] + ${G}[i+0]; + float32_t ${nodeName}_G_reg_1 = ${nodeName}_norm_coef * ${X}[i+1] + ${G}[i+1]; + float32_t ${nodeName}_G_reg_2 = ${nodeName}_norm_coef * ${X}[i+2] + ${G}[i+2]; + float32_t ${nodeName}_G_reg_3 = ${nodeName}_norm_coef * ${X}[i+3] + ${G}[i+3]; + float32_t ${nodeName}_G_reg_4 = ${nodeName}_norm_coef * ${X}[i+4] + ${G}[i+4]; + float32_t ${nodeName}_G_reg_5 = ${nodeName}_norm_coef * ${X}[i+5] + ${G}[i+5]; + + float32_t ${nodeName}_V_new_0 = ${nodeName}_alpha * ${V}[i+0] + (1.0f - ${nodeName}_alpha) * ${nodeName}_G_reg_0; + float32_t ${nodeName}_V_new_1 = ${nodeName}_alpha * ${V}[i+1] + (1.0f - ${nodeName}_alpha) * ${nodeName}_G_reg_1; + float32_t ${nodeName}_V_new_2 = ${nodeName}_alpha * ${V}[i+2] + (1.0f - ${nodeName}_alpha) * ${nodeName}_G_reg_2; + float32_t ${nodeName}_V_new_3 = ${nodeName}_alpha * ${V}[i+3] + (1.0f - ${nodeName}_alpha) * ${nodeName}_G_reg_3; + float32_t ${nodeName}_V_new_4 = ${nodeName}_alpha * ${V}[i+4] + (1.0f - ${nodeName}_alpha) * ${nodeName}_G_reg_4; + float32_t ${nodeName}_V_new_5 = ${nodeName}_alpha * ${V}[i+5] + (1.0f - ${nodeName}_alpha) * ${nodeName}_G_reg_5; + + float32_t ${nodeName}_H_new_0 = ${nodeName}_beta_coeff * ${H}[i+0] + (1.0f - ${nodeName}_beta_coeff) * ${nodeName}_G_reg_0 * ${nodeName}_G_reg_0; + float32_t ${nodeName}_H_new_1 = ${nodeName}_beta_coeff * ${H}[i+1] + (1.0f - ${nodeName}_beta_coeff) * ${nodeName}_G_reg_1 * ${nodeName}_G_reg_1; + float32_t ${nodeName}_H_new_2 = ${nodeName}_beta_coeff * ${H}[i+2] + (1.0f - ${nodeName}_beta_coeff) * ${nodeName}_G_reg_2 * ${nodeName}_G_reg_2; + float32_t ${nodeName}_H_new_3 = ${nodeName}_beta_coeff * ${H}[i+3] + (1.0f - ${nodeName}_beta_coeff) * ${nodeName}_G_reg_3 * ${nodeName}_G_reg_3; + float32_t ${nodeName}_H_new_4 = ${nodeName}_beta_coeff * ${H}[i+4] + (1.0f - ${nodeName}_beta_coeff) * ${nodeName}_G_reg_4 * ${nodeName}_G_reg_4; + float32_t ${nodeName}_H_new_5 = ${nodeName}_beta_coeff * ${H}[i+5] + (1.0f - ${nodeName}_beta_coeff) * ${nodeName}_G_reg_5 * ${nodeName}_G_reg_5; + + ${X_new}[i+0] = (1.0f - ${nodeName}_norm_coef_post) * (${X}[i+0] - ${nodeName}_R_adjusted * ${nodeName}_V_new_0 / (sqrtf(${nodeName}_H_new_0) + ${nodeName}_epsilon)); + ${X_new}[i+1] = (1.0f - ${nodeName}_norm_coef_post) * (${X}[i+1] - ${nodeName}_R_adjusted * ${nodeName}_V_new_1 / (sqrtf(${nodeName}_H_new_1) + ${nodeName}_epsilon)); + ${X_new}[i+2] = (1.0f - ${nodeName}_norm_coef_post) * (${X}[i+2] - ${nodeName}_R_adjusted * ${nodeName}_V_new_2 / (sqrtf(${nodeName}_H_new_2) + ${nodeName}_epsilon)); + ${X_new}[i+3] = (1.0f - ${nodeName}_norm_coef_post) * (${X}[i+3] - ${nodeName}_R_adjusted * ${nodeName}_V_new_3 / (sqrtf(${nodeName}_H_new_3) + ${nodeName}_epsilon)); + ${X_new}[i+4] = (1.0f - ${nodeName}_norm_coef_post) * (${X}[i+4] - ${nodeName}_R_adjusted * ${nodeName}_V_new_4 / (sqrtf(${nodeName}_H_new_4) + ${nodeName}_epsilon)); + ${X_new}[i+5] = (1.0f - ${nodeName}_norm_coef_post) * (${X}[i+5] - ${nodeName}_R_adjusted * ${nodeName}_V_new_5 / (sqrtf(${nodeName}_H_new_5) + ${nodeName}_epsilon)); +} + +for (; i < ${nodeName}_chunk_stop; i++) { + float32_t ${nodeName}_G_reg = ${nodeName}_norm_coef * ${X}[i] + ${G}[i]; + float32_t ${nodeName}_V_new = ${nodeName}_alpha * ${V}[i] + (1.0f - ${nodeName}_alpha) * ${nodeName}_G_reg; + float32_t ${nodeName}_H_new = ${nodeName}_beta_coeff * ${H}[i] + (1.0f - ${nodeName}_beta_coeff) * ${nodeName}_G_reg * ${nodeName}_G_reg; + ${X_new}[i] = (1.0f - ${nodeName}_norm_coef_post) * (${X}[i] - ${nodeName}_R_adjusted * ${nodeName}_V_new / (sqrtf(${nodeName}_H_new) + ${nodeName}_epsilon)); +} +""") diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/AdamTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/AdamTileConstraint.py new file mode 100644 index 0000000000..c3f17f5761 --- /dev/null +++ b/Deeploy/Targets/PULPOpen/TileConstraints/AdamTileConstraint.py @@ -0,0 +1,93 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import Dict, List, Tuple + +import numpy as np + +from Deeploy.AbstractDataTypes import PointerClass +from Deeploy.CommonExtensions.DataTypes import uint16_t +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation +from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint +from Deeploy.TilingExtension.TileConstraint import TileConstraint +from Deeploy.TilingExtension.TilerModel import TilerModel +from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, HyperRectangle, TilingSchedule, \ + VariableReplacementScheme + + +_TILED_TENSORS = ['R', 'T', 'X', 'G', 'V', 'H', 'X_new'] +# Array tensors that share tile geometry with X (excludes the shape-(1,) scalar tensors R and T) +_ARRAY_INPUT_TENSORS = ['X', 'G', 'V', 'H'] + + +class AdamTileConstraint(TileConstraint): + + @classmethod + def addGeometricalConstraint(cls, tilerModel: TilerModel, parseDict: Dict, + ctxt: NetworkContext) -> TilerModel: + + for name in _TILED_TENSORS: + tilerModel.addTensorDimToModel(ctxt, parseDict[name]) + + xShape = ctxt.lookup(parseDict['X']).shape + + for dim in range(len(xShape)): + xDimVar = tilerModel.getTensorDimVar(tensorName = parseDict['X'], dimIdx = dim) + for name in ['G', 'V', 'H', 'X_new']: + dimVar = tilerModel.getTensorDimVar(tensorName = parseDict[name], dimIdx = dim) + tilerModel.addConstraint(xDimVar == dimVar) + + return tilerModel + + @classmethod + def addPolicyConstraint(cls, tilerModel: TilerModel, parseDict: Dict, + ctxt: NetworkContext) -> TilerModel: + # Fix every dimension except the first to its full size so the tiler + # can only split along dim 0. + xShape = ctxt.lookup(parseDict['X']).shape + for dim in range(1, len(xShape)): + dimVar = tilerModel.getTensorDimVar(tensorName = parseDict['X'], dimIdx = dim) + tilerModel.addConstraint(dimVar == xShape[dim]) + return tilerModel + + @classmethod + def serializeTilingSolution( + cls, tilingSolution: NodeMemoryConstraint, + absoluteOutputCubes: List[AbsoluteHyperRectangle], targetMemLevel: str, + ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[VariableReplacementScheme, TilingSchedule]: + + outputCubes = [cube.rectangle for cube in absoluteOutputCubes] + + addrNames = _TILED_TENSORS + inputBaseOffsets, outputBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel, + operatorRepresentation, addrNames) + + replacements = {"size": []} + replacementTypes = {"size": PointerClass(uint16_t)} + + for cubeAbs in absoluteOutputCubes: + cube = cubeAbs.rectangle + replacements["size"].append(int(np.prod(cube.dims))) + + inputLoadSchedule = [] + outputLoadSchedule = [] + + # Shape-(1,) rectangle for the scalar inputs R and T (1 element each). + scalar_cube = HyperRectangle((0,), (1,)) + + for cube in outputCubes: + tile_load = {name: cube for name in _ARRAY_INPUT_TENSORS} + tile_load['R'] = scalar_cube + tile_load['T'] = scalar_cube + inputLoadSchedule.append(tile_load) + + for cube in outputCubes: + outputLoadSchedule.append({'X_new': cube}) + + tilingSchedule = TilingSchedule(inputBaseOffsets, outputBaseOffsets, inputLoadSchedule, + outputLoadSchedule) + variableReplacementSchedule = VariableReplacementScheme(replacements, replacementTypes) + + return variableReplacementSchedule, tilingSchedule diff --git a/Deeploy/Targets/PULPOpen/Tiler.py b/Deeploy/Targets/PULPOpen/Tiler.py index 901106459e..21b1ef7963 100644 --- a/Deeploy/Targets/PULPOpen/Tiler.py +++ b/Deeploy/Targets/PULPOpen/Tiler.py @@ -14,15 +14,15 @@ from Deeploy.Targets.Generic.TileConstraints.RQSiHardswishTileConstraint import RQSiHardswishTileConstraint from Deeploy.Targets.Generic.TileConstraints.TransposeTileConstraint import TransposeTileConstraint from Deeploy.Targets.Generic.TileConstraints.UnaryTileConstraint import UnaryTileConstraint -from Deeploy.Targets.PULPOpen.Bindings import PULPAddBindings, PULPConcatBindings, PULPFloatConv2DBindings, \ - PULPFloatDWConv2DBindings, PULPFloatGELUBinding, PULPFloatGELUGradBinding, PULPFloatGEMMBindings, \ - PULPGatherBindings, PULPiHardswishBindings, PULPiRMSNormBindings, PULPiRQSGELUBindings, PULPLayernormBinding, \ - PULPLayernormGradBinding, PULPMatMulBindings, PULPMaxPool1DBindings, PULPMaxPool2DBindings, PULPMulBindings, \ - PULPReduceMeanBindings, PULPReduceSumBindings, PULPReluBinding, PULPReshapeBindings, PULPRQAddBindings, \ - PULPRQSBindings, PULPRQSConv1DBindings, PULPRQSConv2DBindings, PULPRQSDWConv2DBindings, PULPRQSGEMMBindings, \ - PULPRQSiHardswishBindings, PULPRQSMatrixVecBindings, PULPRQSTallGEMMBindings, PULPSGDBindings, PULPSliceBindings, \ - PULPSoftmaxBindings, PULPSoftmaxCrossEntropyLossBindings, PULPSoftmaxCrossEntropyLossGradBindings, \ - PULPSoftmaxGradBindings, PULPTransposeBindings, PULPUniformRQSBindings +from Deeploy.Targets.PULPOpen.Bindings import PULPAdamBindings, PULPAddBindings, PULPConcatBindings, \ + PULPFloatConv2DBindings, PULPFloatDWConv2DBindings, PULPFloatGELUBinding, PULPFloatGELUGradBinding, \ + PULPFloatGEMMBindings, PULPGatherBindings, PULPiHardswishBindings, PULPiRMSNormBindings, PULPiRQSGELUBindings, \ + PULPLayernormBinding, PULPLayernormGradBinding, PULPMatMulBindings, PULPMaxPool1DBindings, PULPMaxPool2DBindings, \ + PULPMulBindings, PULPReduceMeanBindings, PULPReduceSumBindings, PULPReluBinding, PULPReshapeBindings, \ + PULPRQAddBindings, PULPRQSBindings, PULPRQSConv1DBindings, PULPRQSConv2DBindings, PULPRQSDWConv2DBindings, \ + PULPRQSGEMMBindings, PULPRQSiHardswishBindings, PULPRQSMatrixVecBindings, PULPRQSTallGEMMBindings, \ + PULPSGDBindings, PULPSliceBindings, PULPSoftmaxBindings, PULPSoftmaxCrossEntropyLossBindings, \ + PULPSoftmaxCrossEntropyLossGradBindings, PULPSoftmaxGradBindings, PULPTransposeBindings, PULPUniformRQSBindings from Deeploy.Targets.PULPOpen.TileConstraints.ConvTileConstraint import Conv2DTileConstraint, RQConv1DTileConstraint, \ RQConv2DTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.DWConvTileConstraint import DWConv2DTileConstraint, \ @@ -39,6 +39,7 @@ from Deeploy.Targets.PULPOpen.TileConstraints.ReduceMeanConstraint import ReduceMeanTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.ReduceSumTileConstraint import ReduceSumTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.RequantShiftTileConstraint import RequantShiftTileConstraint +from Deeploy.Targets.PULPOpen.TileConstraints.AdamTileConstraint import AdamTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.SGDTileConstraint import SGDTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.SliceConstraint import SliceTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.SoftmaxCrossEntropyTileConstraint import \ @@ -155,6 +156,9 @@ PULPSGDTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPSGDBindings, tileConstraint = SGDTileConstraint()) +PULPAdamTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPAdamBindings, + tileConstraint = AdamTileConstraint()) + PULPSliceTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPSliceBindings, tileConstraint = SliceTileConstraint()) diff --git a/DeeployTest/Tests/Kernels/FP32/Adam/Regular/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Adam/Regular/inputs.npz new file mode 100644 index 0000000000..7832aaf1d6 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Adam/Regular/inputs.npz differ diff --git a/DeeployTest/Tests/Kernels/FP32/Adam/Regular/network.onnx b/DeeployTest/Tests/Kernels/FP32/Adam/Regular/network.onnx new file mode 100644 index 0000000000..92ce791c17 Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Adam/Regular/network.onnx differ diff --git a/DeeployTest/Tests/Kernels/FP32/Adam/Regular/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Adam/Regular/outputs.npz new file mode 100644 index 0000000000..622e7d782c Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Adam/Regular/outputs.npz differ diff --git a/DeeployTest/test_gap9_config.py b/DeeployTest/test_gap9_config.py index 69b940f0c3..2bde0b06eb 100644 --- a/DeeployTest/test_gap9_config.py +++ b/DeeployTest/test_gap9_config.py @@ -10,7 +10,7 @@ "Kernels/Integer/Pad/Regular_2D", "Kernels/Integer/MatMul/Regular", "Kernels/Integer/MatMul/Add", "Kernels/Integer/Conv/DW_2D_RQ", "Kernels/Integer/Conv/Regular_2D_RQ", "Kernels/Integer/Softmax/Regular", "Kernels/Integer/Concat", "Kernels/Integer/Hardswish/Regular", "Others/Backtracking", "Kernels/FP32/Add/Regular", - "Kernels/FP32/GEMM/Regular", "Kernels/FP32/Conv/Regular_2D_Bias", "Kernels/FP32/Conv/Regular_2D_NoBias", + "Kernels/FP32/Adam/Regular", "Kernels/FP32/GEMM/Regular", "Kernels/FP32/Conv/Regular_2D_Bias", "Kernels/FP32/Conv/Regular_2D_NoBias", "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias", "Kernels/FP32/Conv/DW_2D_Bias", "Kernels/FP32/Conv/DW_2D_NoBias", "Kernels/FP32/Conv/DW_2D_ZeroValuedBias", "Kernels/FP32/LayerNorm", "Kernels/FP32/ReLU", "Kernels/FP32/MaxPool/Regular_2D", "Kernels/FP32/MatMul", "Kernels/FP32/Softmax/Regular", "Kernels/FP32/Transpose", diff --git a/DeeployTest/test_generic_config.py b/DeeployTest/test_generic_config.py index b0d8c659ca..c52e653048 100644 --- a/DeeployTest/test_generic_config.py +++ b/DeeployTest/test_generic_config.py @@ -43,6 +43,7 @@ "Kernels/FP32/ReduceMean/NoKeepDims/Axis2", "Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add", "Kernels/FP32/Reshape/SkipConnection", + "Kernels/FP32/Adam/Regular", "Kernels/FP32/Sqrt", "Kernels/FP32/Transpose", # Integer Kernels diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py index 6d9f3cfcd7..098c159268 100644 --- a/DeeployTest/test_platforms.py +++ b/DeeployTest/test_platforms.py @@ -34,7 +34,8 @@ from test_siracusa_neureka_tiled_config import L3_DOUBLEBUFFER_MODELS_WMEM as NEUREKA_L3_DOUBLEBUFFER_MODELS_WMEM from test_siracusa_neureka_tiled_config import L3_SINGLEBUFFER_MODELS as NEUREKA_L3_SINGLEBUFFER_MODELS from test_siracusa_tiled_config import L2_DOUBLEBUFFER_KERNELS, L2_DOUBLEBUFFER_MODELS, L2_SINGLEBUFFER_KERNELS, \ - L2_SINGLEBUFFER_MODELS, L3_DOUBLEBUFFER_MODELS, L3_SINGLEBUFFER_MODELS + L2_SINGLEBUFFER_MODELS, L3_DOUBLEBUFFER_KERNELS, L3_DOUBLEBUFFER_MODELS, L3_SINGLEBUFFER_KERNELS, \ + L3_SINGLEBUFFER_MODELS from test_snitch_config import DEFAULT_NUM_CORES as SNITCH_DEFAULT_NUM_CORES from test_snitch_config import KERNEL_TESTS as SNITCH_KERNEL_TESTS from test_snitch_config import MODEL_TESTS as SNITCH_MODEL_TESTS @@ -149,6 +150,17 @@ def param_id(param): @pytest.mark.parametrize("test_name", GENERIC_KERNEL_TESTS, ids = GENERIC_KERNEL_TESTS) def test_generic_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None: platform_config = PLATFORM_CONFIGS["generic"] + + # Per-test generator arguments: some tests need type overrides to prevent + # Deeploy's signProp from misinterpreting non-activation integer inputs. + # All Adam variants share the same signProp fix: T=1 fits in uint8_t so + # signProp applies offset 128 and stores T=-127. Force int32_t with zero + # offset so the kernel sees T=1 as intended. + _ADAM_GEN_ARGS = ["--input-type-map", "T=int32_t", "--input-offset-map", "T=0"] + _PER_TEST_GEN_ARGS = { + "Kernels/FP32/Adam/Regular": _ADAM_GEN_ARGS, + } + config = create_test_config( test_name = test_name, platform = platform_config["platform"], @@ -158,6 +170,7 @@ def test_generic_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, toolchain_dir = toolchain_dir, cmake_args = cmake_args, tiling = False, + gen_args = _PER_TEST_GEN_ARGS.get(test_name), ) run_and_assert_test(test_name, config, skipgen, skipsim) @@ -265,6 +278,8 @@ def test_mempool_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, c @pytest.mark.parametrize("test_name", SIRACUSA_KERNEL_TESTS, ids = SIRACUSA_KERNEL_TESTS) def test_siracusa_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim, profile_untiled) -> None: + _ADAM_GEN_ARGS = ["--input-type-map", "T=int32_t", "--input-offset-map", "T=0"] + _PER_TEST_GEN_ARGS = {"Kernels/FP32/Adam/Regular": _ADAM_GEN_ARGS} config = create_test_config( test_name = test_name, platform = "Siracusa", @@ -276,6 +291,7 @@ def test_siracusa_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, tiling = False, cores = SIRACUSA_DEFAULT_CORES, profile_untiled = profile_untiled, + gen_args = _PER_TEST_GEN_ARGS.get(test_name), ) run_and_assert_test(test_name, config, skipgen, skipsim) @@ -358,6 +374,64 @@ def test_siracusa_tiled_kernels_l2_doublebuffer(test_params, deeploy_test_dir, t run_and_assert_test(test_name, config, skipgen, skipsim) +@pytest.mark.siracusa_tiled +@pytest.mark.kernels +@pytest.mark.singlebuffer +@pytest.mark.l3 +@pytest.mark.parametrize( + "test_params", + generate_test_params(L3_SINGLEBUFFER_KERNELS, "L3-singlebuffer"), + ids = param_id, +) +def test_siracusa_tiled_kernels_l3_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, + skipgen, skipsim) -> None: + test_name, l1, config_name = test_params + config = create_test_config( + test_name = test_name, + platform = "Siracusa", + simulator = "gvsoc", + deeploy_test_dir = deeploy_test_dir, + toolchain = toolchain, + toolchain_dir = toolchain_dir, + cmake_args = cmake_args, + tiling = True, + cores = SIRACUSA_DEFAULT_CORES, + l1 = l1, + default_mem_level = "L3", + double_buffer = False, + ) + run_and_assert_test(test_name, config, skipgen, skipsim) + + +@pytest.mark.siracusa_tiled +@pytest.mark.kernels +@pytest.mark.doublebuffer +@pytest.mark.l3 +@pytest.mark.parametrize( + "test_params", + generate_test_params(L3_DOUBLEBUFFER_KERNELS, "L3-doublebuffer"), + ids = param_id, +) +def test_siracusa_tiled_kernels_l3_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, + skipgen, skipsim) -> None: + test_name, l1, config_name = test_params + config = create_test_config( + test_name = test_name, + platform = "Siracusa", + simulator = "gvsoc", + deeploy_test_dir = deeploy_test_dir, + toolchain = toolchain, + toolchain_dir = toolchain_dir, + cmake_args = cmake_args, + tiling = True, + cores = SIRACUSA_DEFAULT_CORES, + l1 = l1, + default_mem_level = "L3", + double_buffer = True, + ) + run_and_assert_test(test_name, config, skipgen, skipsim) + + @pytest.mark.siracusa_tiled @pytest.mark.models @pytest.mark.singlebuffer @@ -756,6 +830,9 @@ def test_gap9_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cma # Add GAP9-specific CMake args for number of cores gap9_cmake_args = cmake_args + [f"NUM_CORES={platform_config['default_num_cores']}"] + _ADAM_GEN_ARGS = ["--input-type-map", "T=int32_t", "--input-offset-map", "T=0"] + _PER_TEST_GEN_ARGS = {"Kernels/FP32/Adam/Regular": _ADAM_GEN_ARGS} + config = create_test_config( test_name = test_name, platform = platform_config["platform"], @@ -765,6 +842,7 @@ def test_gap9_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cma toolchain_dir = toolchain_dir, cmake_args = gap9_cmake_args, tiling = False, + gen_args = _PER_TEST_GEN_ARGS.get(test_name), ) run_and_assert_test(test_name, config, skipgen, skipsim) diff --git a/DeeployTest/test_siracusa_config.py b/DeeployTest/test_siracusa_config.py index 8fa105d9f4..01b7b7928f 100644 --- a/DeeployTest/test_siracusa_config.py +++ b/DeeployTest/test_siracusa_config.py @@ -45,6 +45,7 @@ "Kernels/FP32/ReduceSum", "Kernels/FP32/Reshape/SkipConnection", "Kernels/FP32/Transpose", + "Kernels/FP32/Adam/Regular", "Kernels/Integer/Hardswish/Regular", "Kernels/Integer/Softmax/Regular", "Kernels/Integer/Add/MultIO", diff --git a/DeeployTest/test_siracusa_tiled_config.py b/DeeployTest/test_siracusa_tiled_config.py index a687d9a489..aa97f90a6b 100644 --- a/DeeployTest/test_siracusa_tiled_config.py +++ b/DeeployTest/test_siracusa_tiled_config.py @@ -10,6 +10,7 @@ DEFAULT_SEARCH_STRATEGY = "random-max" L2_SINGLEBUFFER_KERNELS = { + "Kernels/FP32/Adam/Regular": [6200, 80000], "Kernels/FP32/ReLU": [2000], "Kernels/FP32/Softmax/Regular": [4000], "Kernels/FP32/Add/Large": [220000], @@ -58,6 +59,7 @@ } L2_DOUBLEBUFFER_KERNELS = { + "Kernels/FP32/Adam/Regular": [12400, 160000], "Kernels/FP32/ReLU": [20], "Kernels/FP32/Softmax/Regular": [8000], "Kernels/FP32/Conv/DW_2D_Bias": [10000], @@ -101,6 +103,14 @@ "Kernels/Integer/Hardswish/Regular_RQ": [800], } +L3_SINGLEBUFFER_KERNELS = { + "Kernels/FP32/Adam/Regular": [6200, 80000], +} + +L3_DOUBLEBUFFER_KERNELS = { + "Kernels/FP32/Adam/Regular": [12400, 160000], +} + L2_SINGLEBUFFER_MODELS = { "Models/CNN_Linear2": [45000, 30000, 15000], "Models/miniMobileNet": [60000, 12000, 6000, 3000],