diff --git a/pipeline/model_parallel.py b/pipeline/model_parallel.py index b5265d2..f5f430f 100644 --- a/pipeline/model_parallel.py +++ b/pipeline/model_parallel.py @@ -35,6 +35,9 @@ def _prepare_pipeline_parallel(self, split_size=1): 1. Enable self.pipeline_parallel 2. Construct an nn.Sequential module for the transformer layers (self.h). 3. Use Pipe to parallelize the transformer layers. + + Please note that when implementing _prepare_pipeline_parallel, you would want to define the nn.Sequential module to extract useful values from the returned tuple. GPT2Block returns a tuple, not a tensor. + You should construct nn.Sequential using GPT2Block modules. Notice that each block returns multiple values but you will only need the hidden states. ''' # BEGIN SOLUTION @@ -60,4 +63,4 @@ def _finalize_pipeline_parallel(self): if __name__ == '__main__': config = AutoConfig.from_pretrained('gpt2') model = GPT2LMHeadModelParallel(config=config).to('cuda:0') - model._prepare_pipeline_parallel() \ No newline at end of file + model._prepare_pipeline_parallel()