diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py index c19cae44bd..33be1680f0 100644 --- a/.jenkins/validate_tutorials_built.py +++ b/.jenkins/validate_tutorials_built.py @@ -39,7 +39,7 @@ "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release. "advanced_source/semi_structured_sparse", # reenable after 3303 is fixed. "intermediate_source/torchrec_intro_tutorial.py", #failing with 2.8 reenable after 3498 - "beginner_source/mosaic_memory_profiling_tutorial.py", # failing with 2.11 issue #3774 + #"beginner_source/mosaic_memory_profiling_tutorial.py", # failing with 2.11 RC issue #3774 ] def tutorial_source_dirs() -> List[Path]: diff --git a/beginner_source/mosaic_memory_profiling_tutorial.py b/beginner_source/mosaic_memory_profiling_tutorial.py index db188a5e90..4d0a19ee3e 100644 --- a/beginner_source/mosaic_memory_profiling_tutorial.py +++ b/beginner_source/mosaic_memory_profiling_tutorial.py @@ -306,7 +306,13 @@ def run_training_ac( # Load model print(f"Loading GPT-2 (activation_checkpointing={activation_checkpointing})...") - model = GPT2LMHeadModel.from_pretrained("gpt2") + # Disable dropout to avoid PyTorch 2.11 checkpoint recomputation bug (#3774). + # _VF.dropout returns NULL without setting an exception during backward + # recomputation of GPT2Block. Dropout is irrelevant to memory profiling. + # Original: model = GPT2LMHeadModel.from_pretrained("gpt2") + model = GPT2LMHeadModel.from_pretrained( + "gpt2", resid_pdrop=0, attn_pdrop=0, embd_pdrop=0 + ) if activation_checkpointing: model.gradient_checkpointing_enable()