From b961f7dc84636a065779fe05cda4d4556010ad8d Mon Sep 17 00:00:00 2001
From: "Haoze He(Hector)" <37875121+HectorHHZ@users.noreply.github.com>
Date: Mon, 10 Mar 2025 17:35:27 -0400
Subject: [PATCH] Update model_parallel.py add hints on Assignment 4.2.3 to
 avoid bugs/ misunderstanding

---
 pipeline/model_parallel.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pipeline/model_parallel.py b/pipeline/model_parallel.py
index b5265d2..f5f430f 100644
--- a/pipeline/model_parallel.py
+++ b/pipeline/model_parallel.py
@@ -35,6 +35,9 @@ def _prepare_pipeline_parallel(self, split_size=1):
         1. Enable self.pipeline_parallel
         2. Construct an nn.Sequential module for the transformer layers (self.h).
         3. Use Pipe to parallelize the transformer layers.
+
+        Please note that when implementing _prepare_pipeline_parallel, you would want to define the nn.Sequential module to extract useful values from the returned tuple. GPT2Block returns a tuple, not a tensor. 
+        You should construct nn.Sequential using GPT2Block modules. Notice that each block returns multiple values but you will only need the hidden states.
         '''
 
         # BEGIN SOLUTION
@@ -60,4 +63,4 @@ def _finalize_pipeline_parallel(self):
 if __name__ == '__main__':
     config = AutoConfig.from_pretrained('gpt2')
     model = GPT2LMHeadModelParallel(config=config).to('cuda:0')
-    model._prepare_pipeline_parallel()
\ No newline at end of file
+    model._prepare_pipeline_parallel()