huggingface · hawon223 · Nov 18, 2025 · Nov 20, 2025 · Nov 21, 2025 · Nov 21, 2025
diff --git a/docs/source/en/main_classes/text_generation.md b/docs/source/en/main_classes/text_generation.md
@@ -42,3 +42,8 @@ like token streaming.
 [[autodoc]] GenerationMixin
     - generate
     - compute_transition_scores
+
+
+> **Note**  
+> `max_new_tokens` is now the recommended argument to control how many tokens the model generates.  
+> `max_length` remains for backward compatibility because it includes the length of the input prompt, which can be less intuitive.
diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py
@@ -107,6 +107,10 @@ class GenerationConfig(PushToHubMixin):
         max_length (`int`, *optional*, defaults to 20):
             The maximum length the generated tokens can have. Corresponds to the length of the input prompt +
             `max_new_tokens`. Its effect is overridden by `max_new_tokens`, if also set.
+
+            `max_new_tokens` is now the recommended argument to control how many tokens the model generates.
+            `max_length` remains for backward compatibility because it includes the length of the input prompt, which can be less intuitive.
+
         max_new_tokens (`int`, *optional*):
             The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
         min_length (`int`, *optional*, defaults to 0):
@@ -330,6 +334,9 @@ class GenerationConfig(PushToHubMixin):
             Whether to disable the automatic compilation of the forward pass. Automatic compilation happens when
             specific criteria are met, including using a compilable cache. Please open an issue if you find the
             need to use this flag.
+
+
+
     """
 
     extra_output_flags = ("output_attentions", "output_hidden_states", "output_scores", "output_logits")