From b961f7dc84636a065779fe05cda4d4556010ad8d Mon Sep 17 00:00:00 2001 From: "Haoze He(Hector)" <37875121+HectorHHZ@users.noreply.github.com> Date: Mon, 10 Mar 2025 17:35:27 -0400 Subject: [PATCH] Update model_parallel.py add hints on Assignment 4.2.3 to avoid bugs/ misunderstanding --- pipeline/model_parallel.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pipeline/model_parallel.py b/pipeline/model_parallel.py index b5265d2..f5f430f 100644 --- a/pipeline/model_parallel.py +++ b/pipeline/model_parallel.py @@ -35,6 +35,9 @@ def _prepare_pipeline_parallel(self, split_size=1): 1. Enable self.pipeline_parallel 2. Construct an nn.Sequential module for the transformer layers (self.h). 3. Use Pipe to parallelize the transformer layers. + + Please note that when implementing _prepare_pipeline_parallel, you would want to define the nn.Sequential module to extract useful values from the returned tuple. GPT2Block returns a tuple, not a tensor. + You should construct nn.Sequential using GPT2Block modules. Notice that each block returns multiple values but you will only need the hidden states. ''' # BEGIN SOLUTION @@ -60,4 +63,4 @@ def _finalize_pipeline_parallel(self): if __name__ == '__main__': config = AutoConfig.from_pretrained('gpt2') model = GPT2LMHeadModelParallel(config=config).to('cuda:0') - model._prepare_pipeline_parallel() \ No newline at end of file + model._prepare_pipeline_parallel()