llmsystem · HectorHHZ · Mar 10, 2025
diff --git a/pipeline/model_parallel.py b/pipeline/model_parallel.py
@@ -35,6 +35,9 @@ def _prepare_pipeline_parallel(self, split_size=1):
         1. Enable self.pipeline_parallel
         2. Construct an nn.Sequential module for the transformer layers (self.h).
         3. Use Pipe to parallelize the transformer layers.
+
+        Please note that when implementing _prepare_pipeline_parallel, you would want to define the nn.Sequential module to extract useful values from the returned tuple. GPT2Block returns a tuple, not a tensor. 
+        You should construct nn.Sequential using GPT2Block modules. Notice that each block returns multiple values but you will only need the hidden states.
         '''
 
         # BEGIN SOLUTION
@@ -60,4 +63,4 @@ def _finalize_pipeline_parallel(self):
 if __name__ == '__main__':
     config = AutoConfig.from_pretrained('gpt2')
     model = GPT2LMHeadModelParallel(config=config).to('cuda:0')
-    model._prepare_pipeline_parallel()
+    model._prepare_pipeline_parallel()